/////////////////////////////////////////////////////////////////////////////// /// \file regex_algorithms.hpp /// Contains the regex_match(), regex_search() and regex_replace() algorithms. // // Copyright 2008 Eric Niebler. Distributed under the Boost // Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef BOOST_XPRESSIVE_ALGORITHMS_HPP_EAN_10_04_2005 #define BOOST_XPRESSIVE_ALGORITHMS_HPP_EAN_10_04_2005 // MS compatible compilers support #pragma once #if defined(_MSC_VER) # pragma once #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include /// INTERNAL ONLY /// #define BOOST_XPR_NONDEDUCED_TYPE_(x) typename mpl::identity::type namespace boost { namespace xpressive { /////////////////////////////////////////////////////////////////////////////// // regex_match /////////////////////////////////////////////////////////////////////////////// namespace detail { /////////////////////////////////////////////////////////////////////////////// // regex_match_impl template inline bool regex_match_impl ( BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) begin , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) end , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef detail::core_access access; BOOST_ASSERT(0 != re.regex_id()); // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); state.flags_.match_all_ = true; state.sub_match(0).begin_ = begin; if(access::match(re, state)) { access::set_prefix_suffix(what, begin, end); return true; } // handle partial matches else if(state.found_partial_match_ && 0 != (flags & regex_constants::match_partial)) { state.set_partial_match(); return true; } access::reset(what); return false; } } // namespace detail /// \brief See if a regex matches a sequence from beginning to end. /// /// Determines whether there is an exact match between the regular expression \c re, /// and all of the sequence [begin, end). /// /// \pre Type \c BidiIter meets the requirements of a Bidirectional Iterator (24.1.4). /// \pre [begin,end) denotes a valid iterator range. /// \param begin The beginning of the sequence. /// \param end The end of the sequence. /// \param what The \c match_results struct into which the sub_matches will be written /// \param re The regular expression object to use /// \param flags Optional match flags, used to control how the expression is matched /// against the sequence. (See \c match_flag_type.) /// \return \c true if a match is found, \c false otherwise /// \throw regex_error on stack exhaustion template inline bool regex_match ( BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) begin , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) end , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef detail::core_access access; if(0 == re.regex_id()) { access::reset(what); return false; } return detail::regex_match_impl(begin, end, what, re, flags); } /// \overload /// template inline bool regex_match ( BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) begin , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) end , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; return detail::regex_match_impl(begin, end, what, re, flags); } /// \overload /// template inline bool regex_match ( BOOST_XPR_NONDEDUCED_TYPE_(Char) *begin , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef detail::core_access access; if(0 == re.regex_id()) { access::reset(what); return false; } // BUGBUG this is inefficient typedef typename remove_const::type char_type; Char *end = begin + std::char_traits::length(begin); return detail::regex_match_impl(begin, end, what, re, flags); } /// \overload /// template inline bool regex_match ( BidiRange &rng , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { typedef detail::core_access access; if(0 == re.regex_id()) { access::reset(what); return false; } // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); return detail::regex_match_impl(begin, end, what, re, flags); } /// \overload /// template inline bool regex_match ( BidiRange const &rng , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { typedef detail::core_access access; if(0 == re.regex_id()) { access::reset(what); return false; } // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); return detail::regex_match_impl(begin, end, what, re, flags); } /// \overload /// template inline bool regex_match ( BOOST_XPR_NONDEDUCED_TYPE_(Char) *begin , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; typedef typename remove_const::type char_type; Char *end = begin + std::char_traits::length(begin); return detail::regex_match_impl(begin, end, what, re, flags); } /// \overload /// template inline bool regex_match ( BidiRange &rng , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); return detail::regex_match_impl(begin, end, what, re, flags); } /// \overload /// template inline bool regex_match ( BidiRange const &rng , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); return detail::regex_match_impl(begin, end, what, re, flags); } /////////////////////////////////////////////////////////////////////////////// // regex_search /////////////////////////////////////////////////////////////////////////////// namespace detail { /////////////////////////////////////////////////////////////////////////////// // regex_search_impl template inline bool regex_search_impl ( match_state &state , basic_regex const &re , bool not_initial_null = false ) { typedef core_access access; match_results &what = *state.context_.results_ptr_; BOOST_ASSERT(0 != re.regex_id()); bool const partial_ok = state.flags_.match_partial_; save_restore not_null(state.flags_.match_not_null_, state.flags_.match_not_null_ || not_initial_null); state.flags_.match_prev_avail_ = state.flags_.match_prev_avail_ || !state.bos(); regex_impl const &impl = *access::get_regex_impl(re); BidiIter const begin = state.cur_, end = state.end_; BidiIter &sub0begin = state.sub_match(0).begin_; sub0begin = state.cur_; // If match_continuous is set, we only need to check for a match at the current position if(state.flags_.match_continuous_) { if(access::match(re, state)) { access::set_prefix_suffix(what, begin, end); return true; } // handle partial matches else if(partial_ok && state.found_partial_match_) { state.set_partial_match(); return true; } } // If we have a finder, use it to find where a potential match can start else if(impl.finder_ && (!partial_ok || impl.finder_->ok_for_partial_matches())) { finder const &find = *impl.finder_; if(find(state)) { if(state.cur_ != begin) { not_null.restore(); } do { sub0begin = state.cur_; if(access::match(re, state)) { access::set_prefix_suffix(what, begin, end); return true; } // handle partial matches else if(partial_ok && state.found_partial_match_) { state.set_partial_match(); return true; } BOOST_ASSERT(state.cur_ == sub0begin); not_null.restore(); } while(state.cur_ != state.end_ && (++state.cur_, find(state))); } } // Otherwise, use brute force search at every position. else { for(;;) { if(access::match(re, state)) { access::set_prefix_suffix(what, begin, end); return true; } // handle partial matches else if(partial_ok && state.found_partial_match_) { state.set_partial_match(); return true; } else if(end == sub0begin) { break; } BOOST_ASSERT(state.cur_ == sub0begin); state.cur_ = ++sub0begin; not_null.restore(); } } access::reset(what); return false; } } // namespace detail /// \brief Determines whether there is some sub-sequence within [begin,end) /// that matches the regular expression \c re. /// /// Determines whether there is some sub-sequence within [begin,end) that matches /// the regular expression \c re. /// /// \pre Type \c BidiIter meets the requirements of a Bidirectional Iterator (24.1.4). /// \pre [begin,end) denotes a valid iterator range. /// \param begin The beginning of the sequence /// \param end The end of the sequence /// \param what The \c match_results struct into which the sub_matches will be written /// \param re The regular expression object to use /// \param flags Optional match flags, used to control how the expression is matched against /// the sequence. (See \c match_flag_type.) /// \return \c true if a match is found, \c false otherwise /// \throw regex_error on stack exhaustion template inline bool regex_search ( BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) begin , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) end , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { access::reset(what); return false; } // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /// \overload /// template inline bool regex_search ( BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) begin , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) end , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /// \overload /// template inline bool regex_search ( BOOST_XPR_NONDEDUCED_TYPE_(Char) *begin , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { access::reset(what); return false; } // BUGBUG this is inefficient typedef typename remove_const::type char_type; Char *end = begin + std::char_traits::length(begin); // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /// \overload /// template inline bool regex_search ( BidiRange &rng , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { access::reset(what); return false; } // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /// \overload /// template inline bool regex_search ( BidiRange const &rng , match_results &what , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { access::reset(what); return false; } // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /// \overload /// template inline bool regex_search ( BOOST_XPR_NONDEDUCED_TYPE_(Char) *begin , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; // BUGBUG this is inefficient typedef typename remove_const::type char_type; Char *end = begin + std::char_traits::length(begin); // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /// \overload /// template inline bool regex_search ( BidiRange &rng , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /// \overload /// template inline bool regex_search ( BidiRange const &rng , basic_regex const &re , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { typedef detail::core_access access; // a default-constructed regex matches nothing if(0 == re.regex_id()) { return false; } // BUGBUG this is inefficient match_results what; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(rng), end = boost::end(rng); // the state object holds matching state and // is passed by reference to all the matchers detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); return detail::regex_search_impl(state, re); } /////////////////////////////////////////////////////////////////////////////// // regex_replace /////////////////////////////////////////////////////////////////////////////// namespace detail { /////////////////////////////////////////////////////////////////////////////// // regex_replace_impl template inline OutIter regex_replace_impl ( OutIter out , BidiIter begin , BidiIter end , basic_regex const &re , Formatter const &format , regex_constants::match_flag_type flags = regex_constants::match_default ) { using namespace regex_constants; typedef detail::core_access access; BOOST_ASSERT(0 != re.regex_id()); BidiIter cur = begin; match_results what; detail::match_state state(begin, end, what, *access::get_regex_impl(re), flags); bool const yes_copy = (0 == (flags & format_no_copy)); if(detail::regex_search_impl(state, re)) { if(yes_copy) { out = std::copy(cur, what[0].first, out); } out = what.format(out, format, flags); cur = state.cur_ = state.next_search_ = what[0].second; if(0 == (flags & format_first_only)) { bool not_null = (0 == what.length()); state.reset(what, *access::get_regex_impl(re)); while(detail::regex_search_impl(state, re, not_null)) { if(yes_copy) { out = std::copy(cur, what[0].first, out); } access::set_prefix_suffix(what, begin, end); out = what.format(out, format, flags); cur = state.cur_ = state.next_search_ = what[0].second; not_null = (0 == what.length()); state.reset(what, *access::get_regex_impl(re)); } } } if(yes_copy) { out = std::copy(cur, end, out); } return out; } } // namespace detail /// \brief Build an output sequence given an input sequence, a regex, and a format string or /// a formatter object, function, or expression. /// /// Constructs a \c regex_iterator object: regex_iterator\< BidiIter \> i(begin, end, re, flags), /// and uses \c i to enumerate through all of the matches m of type match_results\< BidiIter \> that /// occur within the sequence [begin, end). If no such matches are found and !(flags \& format_no_copy) /// then calls std::copy(begin, end, out). Otherwise, for each match found, if !(flags \& format_no_copy) /// calls std::copy(m.prefix().first, m.prefix().second, out), and then calls m.format(out, format, flags). /// Finally if !(flags \& format_no_copy) calls std::copy(last_m.suffix().first, last_m.suffix().second, out) /// where \c last_m is a copy of the last match found. /// /// If flags \& format_first_only is non-zero then only the first match found is replaced. /// /// \pre Type \c BidiIter meets the requirements of a Bidirectional Iterator (24.1.4). /// \pre Type \c OutIter meets the requirements of an Output Iterator (24.1.2). /// \pre Type \c Formatter models \c ForwardRange, Callable\ \>, /// Callable\, OutIter\>, or /// Callable\, OutIter, regex_constants::match_flag_type\>; /// or else it is a null-terminated format string, or an expression template /// representing a formatter lambda expression. /// \pre [begin,end) denotes a valid iterator range. /// \param out An output iterator into which the output sequence is written. /// \param begin The beginning of the input sequence. /// \param end The end of the input sequence. /// \param re The regular expression object to use. /// \param format The format string used to format the replacement sequence, /// or a formatter function, function object, or expression. /// \param flags Optional match flags, used to control how the expression is matched against /// the sequence. (See \c match_flag_type.) /// \return The value of the output iterator after the output sequence has been written to it. /// \throw regex_error on stack exhaustion or invalid format string. template inline OutIter regex_replace ( OutIter out , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) begin , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) end , basic_regex const &re , Formatter const &format , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { out = std::copy(begin, end, out); } return out; } return detail::regex_replace_impl(out, begin, end, re, format, flags); } /// \overload /// template inline OutIter regex_replace ( OutIter out , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) begin , BOOST_XPR_NONDEDUCED_TYPE_(BidiIter) end , basic_regex const &re , typename iterator_value::type const *format , regex_constants::match_flag_type flags = regex_constants::match_default ) { // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { out = std::copy(begin, end, out); } return out; } return detail::regex_replace_impl(out, begin, end, re, format, flags); } /// \overload /// template inline BidiContainer regex_replace ( BidiContainer &str , basic_regex const &re , Formatter const &format , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if, detail::is_char_ptr > >::type * = 0 ) { BidiContainer result; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(str), end = boost::end(str); // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { std::copy(begin, end, std::back_inserter(result)); } return result; } detail::regex_replace_impl(std::back_inserter(result), begin, end, re, format, flags); return result; } /// \overload /// template inline BidiContainer regex_replace ( BidiContainer const &str , basic_regex const &re , Formatter const &format , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if, detail::is_char_ptr > >::type * = 0 ) { BidiContainer result; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(str), end = boost::end(str); // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { std::copy(begin, end, std::back_inserter(result)); } return result; } detail::regex_replace_impl(std::back_inserter(result), begin, end, re, format, flags); return result; } /// \overload /// template inline std::basic_string::type> regex_replace ( BOOST_XPR_NONDEDUCED_TYPE_(Char) *str , basic_regex const &re , Formatter const &format , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { typedef typename remove_const::type char_type; std::basic_string result; // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { result = str; } return result; } Char *end = str + std::char_traits::length(str); detail::regex_replace_impl(std::back_inserter(result), str, end, re, format, flags); return result; } /// \overload /// template inline BidiContainer regex_replace ( BidiContainer &str , basic_regex const &re , typename iterator_value::type const *format , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { BidiContainer result; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(str), end = boost::end(str); // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { std::copy(begin, end, std::back_inserter(result)); } return result; } detail::regex_replace_impl(std::back_inserter(result), begin, end, re, format, flags); return result; } /// \overload /// template inline BidiContainer regex_replace ( BidiContainer const &str , basic_regex const &re , typename iterator_value::type const *format , regex_constants::match_flag_type flags = regex_constants::match_default , typename disable_if >::type * = 0 ) { BidiContainer result; // Note that the result iterator of the range must be convertible // to BidiIter here. BidiIter begin = boost::begin(str), end = boost::end(str); // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { std::copy(begin, end, std::back_inserter(result)); } return result; } detail::regex_replace_impl(std::back_inserter(result), begin, end, re, format, flags); return result; } /// \overload /// template inline std::basic_string::type> regex_replace ( BOOST_XPR_NONDEDUCED_TYPE_(Char) *str , basic_regex const &re , typename add_const::type *format , regex_constants::match_flag_type flags = regex_constants::match_default ) { typedef typename remove_const::type char_type; std::basic_string result; // Default-constructed regexes match nothing if(0 == re.regex_id()) { if((0 == (flags & regex_constants::format_no_copy))) { result = str; } return result; } Char *end = str + std::char_traits::length(str); detail::regex_replace_impl(std::back_inserter(result), str, end, re, format, flags); return result; } }} // namespace boost::xpressive #endif