/* * * Copyright (c) 2002 * John Maddock * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /* * LOCATION: see http://www.boost.org for most recent version. * FILE perl_matcher_common.cpp * VERSION see * DESCRIPTION: Definitions of perl_matcher member functions that are * specific to the recursive implementation. */ #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) #endif #ifdef BOOST_HAS_ABI_HEADERS # include BOOST_ABI_PREFIX #endif #ifdef BOOST_MSVC #pragma warning(pop) #endif #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4800) #endif namespace boost{ namespace re_detail{ template class backup_subex { int index; sub_match sub; public: template backup_subex(const match_results& w, int i) : index(i), sub(w[i], false) {} template void restore(match_results& w) { w.set_first(sub.first, index); w.set_second(sub.second, index, sub.matched); } const sub_match& get() { return sub; } }; template bool perl_matcher::match_all_states() { static matcher_proc_type const s_match_vtable[29] = { (&perl_matcher::match_startmark), &perl_matcher::match_endmark, &perl_matcher::match_literal, &perl_matcher::match_start_line, &perl_matcher::match_end_line, &perl_matcher::match_wild, &perl_matcher::match_match, &perl_matcher::match_word_boundary, &perl_matcher::match_within_word, &perl_matcher::match_word_start, &perl_matcher::match_word_end, &perl_matcher::match_buffer_start, &perl_matcher::match_buffer_end, &perl_matcher::match_backref, &perl_matcher::match_long_set, &perl_matcher::match_set, &perl_matcher::match_jump, &perl_matcher::match_alt, &perl_matcher::match_rep, &perl_matcher::match_combining, &perl_matcher::match_soft_buffer_end, &perl_matcher::match_restart_continue, (::boost::is_random_access_iterator::value ? &perl_matcher::match_dot_repeat_fast : &perl_matcher::match_dot_repeat_slow), &perl_matcher::match_char_repeat, &perl_matcher::match_set_repeat, &perl_matcher::match_long_set_repeat, &perl_matcher::match_backstep, &perl_matcher::match_assert_backref, &perl_matcher::match_toggle_case, }; if(state_count > max_state_count) raise_error(traits_inst, regex_constants::error_space); while(pstate) { matcher_proc_type proc = s_match_vtable[pstate->type]; ++state_count; if(!(this->*proc)()) { if((m_match_flags & match_partial) && (position == last) && (position != search_base)) m_has_partial_match = true; return 0; } } return true; } template bool perl_matcher::match_startmark() { int index = static_cast(pstate)->index; bool r = true; switch(index) { case 0: pstate = pstate->next.p; break; case -1: case -2: { // forward lookahead assert: BidiIterator old_position(position); const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; pstate = pstate->next.p->next.p; r = match_all_states(); pstate = next_pstate; position = old_position; if((r && (index != -1)) || (!r && (index != -2))) r = false; else r = true; break; } case -3: { // independent sub-expression: bool old_independent = m_independent; m_independent = true; const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; pstate = pstate->next.p->next.p; r = match_all_states(); pstate = next_pstate; m_independent = old_independent; #ifdef BOOST_REGEX_MATCH_EXTRA if(r && (m_match_flags & match_extra)) { // // our captures have been stored in *m_presult // we need to unpack them, and insert them // back in the right order when we unwind the stack: // unsigned i; match_results tm(*m_presult); for(i = 0; i < tm.size(); ++i) (*m_presult)[i].get_captures().clear(); // match everything else: r = match_all_states(); // now place the stored captures back: for(i = 0; i < tm.size(); ++i) { typedef typename sub_match::capture_sequence_type seq; seq& s1 = (*m_presult)[i].get_captures(); const seq& s2 = tm[i].captures(); s1.insert( s1.end(), s2.begin(), s2.end()); } } #endif break; } case -4: { // conditional expression: const re_alt* alt = static_cast(pstate->next.p); BOOST_ASSERT(alt->type == syntax_element_alt); pstate = alt->next.p; if(pstate->type == syntax_element_assert_backref) { if(!match_assert_backref()) pstate = alt->alt.p; break; } else { // zero width assertion, have to match this recursively: BOOST_ASSERT(pstate->type == syntax_element_startmark); bool negated = static_cast(pstate)->index == -2; BidiIterator saved_position = position; const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; pstate = pstate->next.p->next.p; bool r = match_all_states(); position = saved_position; if(negated) r = !r; if(r) pstate = next_pstate; else pstate = alt->alt.p; break; } } default: { BOOST_ASSERT(index > 0); if((m_match_flags & match_nosubs) == 0) { backup_subex sub(*m_presult, index); m_presult->set_first(position, index); pstate = pstate->next.p; r = match_all_states(); if(r == false) sub.restore(*m_presult); #ifdef BOOST_REGEX_MATCH_EXTRA // // we have a match, push the capture information onto the stack: // else if(sub.get().matched && (match_extra & m_match_flags)) ((*m_presult)[index]).get_captures().push_back(sub.get()); #endif } else { pstate = pstate->next.p; } break; } } return r; } template bool perl_matcher::match_alt() { bool take_first, take_second; const re_alt* jmp = static_cast(pstate); // find out which of these two alternatives we need to take: if(position == last) { take_first = jmp->can_be_null & mask_take; take_second = jmp->can_be_null & mask_skip; } else { take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); } if(take_first) { // we can take the first alternative, // see if we need to push next alternative: if(take_second) { BidiIterator oldposition(position); const re_syntax_base* old_pstate = jmp->alt.p; pstate = pstate->next.p; if(!match_all_states()) { pstate = old_pstate; position = oldposition; } return true; } pstate = pstate->next.p; return true; } if(take_second) { pstate = jmp->alt.p; return true; } return false; // neither option is possible } template bool perl_matcher::match_rep() { #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4127 4244) #endif const re_repeat* rep = static_cast(pstate); // // Always copy the repeat count, so that the state is restored // when we exit this scope: // repeater_count r(rep->id, &next_count, position); // // If we've had at least one repeat already, and the last one // matched the NULL string then set the repeat count to // maximum: // next_count->check_null_repeat(position, rep->max); // find out which of these two alternatives we need to take: bool take_first, take_second; if(position == last) { take_first = rep->can_be_null & mask_take; take_second = rep->can_be_null & mask_skip; } else { take_first = can_start(*position, rep->_map, (unsigned char)mask_take); take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); } if(next_count->get_count() < rep->min) { // we must take the repeat: if(take_first) { // increase the counter: ++(*next_count); pstate = rep->next.p; return match_all_states(); } return false; } bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); if(greedy) { // try and take the repeat if we can: if((next_count->get_count() < rep->max) && take_first) { // store position in case we fail: BidiIterator pos = position; // increase the counter: ++(*next_count); pstate = rep->next.p; if(match_all_states()) return true; // failed repeat, reset posistion and fall through for alternative: position = pos; } if(take_second) { pstate = rep->alt.p; return true; } return false; // can't take anything, fail... } else // non-greedy { // try and skip the repeat if we can: if(take_second) { // store position in case we fail: BidiIterator pos = position; pstate = rep->alt.p; if(match_all_states()) return true; // failed alternative, reset posistion and fall through for repeat: position = pos; } if((next_count->get_count() < rep->max) && take_first) { // increase the counter: ++(*next_count); pstate = rep->next.p; return match_all_states(); } } return false; #ifdef BOOST_MSVC #pragma warning(pop) #endif } template bool perl_matcher::match_dot_repeat_slow() { #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4127) #endif unsigned count = 0; const re_repeat* rep = static_cast(pstate); re_syntax_base* psingle = rep->next.p; // match compulsary repeats first: while(count < rep->min) { pstate = psingle; if(!match_wild()) return false; ++count; } bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); if(greedy) { // normal repeat: while(count < rep->max) { pstate = psingle; if(!match_wild()) break; ++count; } if((rep->leading) && (count < rep->max)) restart = position; pstate = rep; return backtrack_till_match(count - rep->min); } else { // non-greedy, keep trying till we get a match: BidiIterator save_pos; do { if((rep->leading) && (rep->max == UINT_MAX)) restart = position; pstate = rep->alt.p; save_pos = position; ++state_count; if(match_all_states()) return true; if(count >= rep->max) return false; ++count; pstate = psingle; position = save_pos; if(!match_wild()) return false; }while(true); } #ifdef BOOST_MSVC #pragma warning(pop) #endif } template bool perl_matcher::match_dot_repeat_fast() { #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4127) #endif if(m_match_flags & match_not_dot_null) return match_dot_repeat_slow(); if((static_cast(pstate->next.p)->mask & match_any_mask) == 0) return match_dot_repeat_slow(); // // start by working out how much we can skip: // const re_repeat* rep = static_cast(pstate); #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4267) #endif bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); std::size_t count = (std::min)(static_cast(::boost::re_detail::distance(position, last)), static_cast(greedy ? rep->max : rep->min)); if(rep->min > count) { position = last; return false; // not enough text left to match } std::advance(position, count); #ifdef BOOST_MSVC #pragma warning(pop) #endif if((rep->leading) && (count < rep->max) && greedy) restart = position; if(greedy) return backtrack_till_match(count - rep->min); // non-greedy, keep trying till we get a match: BidiIterator save_pos; do { while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) { ++position; ++count; } if((rep->leading) && (rep->max == UINT_MAX)) restart = position; pstate = rep->alt.p; save_pos = position; ++state_count; if(match_all_states()) return true; if(count >= rep->max) return false; if(save_pos == last) return false; position = ++save_pos; ++count; }while(true); #ifdef BOOST_MSVC #pragma warning(pop) #endif } template bool perl_matcher::match_char_repeat() { #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4127) #pragma warning(disable:4267) #endif #ifdef __BORLANDC__ #pragma option push -w-8008 -w-8066 -w-8004 #endif const re_repeat* rep = static_cast(pstate); BOOST_ASSERT(1 == static_cast(rep->next.p)->length); const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); // // start by working out how much we can skip: // bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); std::size_t count, desired; if(::boost::is_random_access_iterator::value) { desired = (std::min)( (std::size_t)(greedy ? rep->max : rep->min), (std::size_t)::boost::re_detail::distance(position, last)); count = desired; ++desired; if(icase) { while(--desired && (traits_inst.translate_nocase(*position) == what)) { ++position; } } else { while(--desired && (traits_inst.translate(*position) == what)) { ++position; } } count = count - desired; } else { count = 0; desired = greedy ? rep->max : rep->min; while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) { ++position; ++count; } } if((rep->leading) && (count < rep->max) && greedy) restart = position; if(count < rep->min) return false; if(greedy) return backtrack_till_match(count - rep->min); // non-greedy, keep trying till we get a match: BidiIterator save_pos; do { while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) { if((traits_inst.translate(*position, icase) == what)) { ++position; ++count; } else return false; // counldn't repeat even though it was the only option } if((rep->leading) && (rep->max == UINT_MAX)) restart = position; pstate = rep->alt.p; save_pos = position; ++state_count; if(match_all_states()) return true; if(count >= rep->max) return false; position = save_pos; if(position == last) return false; if(traits_inst.translate(*position, icase) == what) { ++position; ++count; } else { return false; } }while(true); #ifdef __BORLANDC__ #pragma option pop #endif #ifdef BOOST_MSVC #pragma warning(pop) #endif } template bool perl_matcher::match_set_repeat() { #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4127) #endif #ifdef __BORLANDC__ #pragma option push -w-8008 -w-8066 -w-8004 #endif const re_repeat* rep = static_cast(pstate); const unsigned char* map = static_cast(rep->next.p)->_map; unsigned count = 0; // // start by working out how much we can skip: // bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); std::size_t desired = greedy ? rep->max : rep->min; if(::boost::is_random_access_iterator::value) { BidiIterator end = position; std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired)); BidiIterator origin(position); while((position != end) && map[static_cast(traits_inst.translate(*position, icase))]) { ++position; } count = (unsigned)::boost::re_detail::distance(origin, position); } else { while((count < desired) && (position != last) && map[static_cast(traits_inst.translate(*position, icase))]) { ++position; ++count; } } if((rep->leading) && (count < rep->max) && greedy) restart = position; if(count < rep->min) return false; if(greedy) return backtrack_till_match(count - rep->min); // non-greedy, keep trying till we get a match: BidiIterator save_pos; do { while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) { if(map[static_cast(traits_inst.translate(*position, icase))]) { ++position; ++count; } else return false; // counldn't repeat even though it was the only option } if((rep->leading) && (rep->max == UINT_MAX)) restart = position; pstate = rep->alt.p; save_pos = position; ++state_count; if(match_all_states()) return true; if(count >= rep->max) return false; position = save_pos; if(position == last) return false; if(map[static_cast(traits_inst.translate(*position, icase))]) { ++position; ++count; } else { return false; } }while(true); #ifdef __BORLANDC__ #pragma option pop #endif #ifdef BOOST_MSVC #pragma warning(pop) #endif } template bool perl_matcher::match_long_set_repeat() { #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4127) #endif #ifdef __BORLANDC__ #pragma option push -w-8008 -w-8066 -w-8004 #endif typedef typename traits::char_class_type char_class_type; const re_repeat* rep = static_cast(pstate); const re_set_long* set = static_cast*>(pstate->next.p); unsigned count = 0; // // start by working out how much we can skip: // bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); std::size_t desired = greedy ? rep->max : rep->min; if(::boost::is_random_access_iterator::value) { BidiIterator end = position; std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired)); BidiIterator origin(position); while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) { ++position; } count = (unsigned)::boost::re_detail::distance(origin, position); } else { while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) { ++position; ++count; } } if((rep->leading) && (count < rep->max) && greedy) restart = position; if(count < rep->min) return false; if(greedy) return backtrack_till_match(count - rep->min); // non-greedy, keep trying till we get a match: BidiIterator save_pos; do { while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) { if(position != re_is_set_member(position, last, set, re.get_data(), icase)) { ++position; ++count; } else return false; // counldn't repeat even though it was the only option } if((rep->leading) && (rep->max == UINT_MAX)) restart = position; pstate = rep->alt.p; save_pos = position; ++state_count; if(match_all_states()) return true; if(count >= rep->max) return false; position = save_pos; if(position == last) return false; if(position != re_is_set_member(position, last, set, re.get_data(), icase)) { ++position; ++count; } else { return false; } }while(true); #ifdef __BORLANDC__ #pragma option pop #endif #ifdef BOOST_MSVC #pragma warning(pop) #endif } template bool perl_matcher::backtrack_till_match(std::size_t count) { #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4127) #endif if((m_match_flags & match_partial) && (position == last)) m_has_partial_match = true; const re_repeat* rep = static_cast(pstate); BidiIterator backtrack = position; if(position == last) { if(rep->can_be_null & mask_skip) { pstate = rep->alt.p; if(match_all_states()) return true; } if(count) { position = --backtrack; --count; } else return false; } do { while(count && !can_start(*position, rep->_map, mask_skip)) { --position; --count; ++state_count; } pstate = rep->alt.p; backtrack = position; if(match_all_states()) return true; if(count == 0) return false; position = --backtrack; ++state_count; --count; }while(true); #ifdef BOOST_MSVC #pragma warning(pop) #endif } } // namespace re_detail } // namespace boost #ifdef BOOST_MSVC #pragma warning(pop) #endif #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) #endif #ifdef BOOST_HAS_ABI_HEADERS # include BOOST_ABI_SUFFIX #endif #ifdef BOOST_MSVC #pragma warning(pop) #endif #endif