/////////////////////////////////////////////////////////////////////////////// /// \file boyer_moore.hpp /// Contains the boyer-moore implementation. Note: this is *not* a general- /// purpose boyer-moore implementation. It truncates the search string at /// 256 characters, but it is sufficient for the needs of xpressive. // // Copyright 2007 Eric Niebler. Distributed under the Boost // Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 // MS compatible compilers support #pragma once #if defined(_MSC_VER) && (_MSC_VER >= 1020) # pragma once # pragma warning(push) # pragma warning(disable : 4100) // unreferenced formal parameter #endif #include // for UCHAR_MAX #include // for std::ptrdiff_t #include // for std::max #include #include #include #include #include #include namespace boost { namespace xpressive { namespace detail { /////////////////////////////////////////////////////////////////////////////// // boyer_moore // template struct boyer_moore : noncopyable { typedef typename iterator_value::type char_type; typedef Traits traits_type; typedef has_fold_case case_fold; typedef typename Traits::string_type string_type; // initialize the Boyer-Moore search data structure, using the // search sub-sequence to prime the pump. boyer_moore(char_type const *begin, char_type const *end, Traits const &traits, bool icase) : begin_(begin) , last_(begin) , fold_() , find_fun_( icase ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_) : &boyer_moore::find_ ) { std::ptrdiff_t const uchar_max = UCHAR_MAX; std::ptrdiff_t diff = std::distance(begin, end); this->length_ = static_cast((std::min)(diff, uchar_max)); std::fill_n(static_cast(this->offsets_), uchar_max + 1, this->length_); --this->length_; icase ? this->init_(traits, case_fold()) : this->init_(traits, mpl::false_()); } BidiIter find(BidiIter begin, BidiIter end, Traits const &traits) const { return (this->*this->find_fun_)(begin, end, traits); } private: void init_(Traits const &traits, mpl::false_) { for(unsigned char offset = this->length_; offset; --offset, ++this->last_) { this->offsets_[traits.hash(*this->last_)] = offset; } } void init_(Traits const &traits, mpl::true_) { this->fold_.reserve(this->length_ + 1); for(unsigned char offset = this->length_; offset; --offset, ++this->last_) { this->fold_.push_back(traits.fold_case(*this->last_)); for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end(); beg != end; ++beg) { this->offsets_[traits.hash(*beg)] = offset; } } this->fold_.push_back(traits.fold_case(*this->last_)); } // case-sensitive Boyer-Moore search BidiIter find_(BidiIter begin, BidiIter end, Traits const &traits) const { typedef typename boost::iterator_difference::type diff_type; diff_type const endpos = std::distance(begin, end); diff_type offset = static_cast(this->length_); for(diff_type curpos = offset; curpos < endpos; curpos += offset) { std::advance(begin, offset); char_type const *pat_tmp = this->last_; BidiIter str_tmp = begin; for(; traits.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) { if(pat_tmp == this->begin_) { return str_tmp; } } offset = this->offsets_[traits.hash(traits.translate(*begin))]; } return end; } // case-insensitive Boyer-Moore search BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &traits) const { typedef typename boost::iterator_difference::type diff_type; diff_type const endpos = std::distance(begin, end); diff_type offset = static_cast(this->length_); for(diff_type curpos = offset; curpos < endpos; curpos += offset) { std::advance(begin, offset); char_type const *pat_tmp = this->last_; BidiIter str_tmp = begin; for(; traits.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) { if(pat_tmp == this->begin_) { return str_tmp; } } offset = this->offsets_[traits.hash(traits.translate_nocase(*begin))]; } return end; } // case-insensitive Boyer-Moore search with case-folding BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &traits) const { typedef typename boost::iterator_difference::type diff_type; diff_type const endpos = std::distance(begin, end); diff_type offset = static_cast(this->length_); for(diff_type curpos = offset; curpos < endpos; curpos += offset) { std::advance(begin, offset); string_type const *pat_tmp = &this->fold_.back(); BidiIter str_tmp = begin; for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp); --pat_tmp, --str_tmp) { if(pat_tmp == &this->fold_.front()) { return str_tmp; } } offset = this->offsets_[traits.hash(*begin)]; } return end; } private: char_type const *begin_; char_type const *last_; std::vector fold_; BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const; unsigned char length_; unsigned char offsets_[UCHAR_MAX + 1]; }; }}} // namespace boost::xpressive::detail #if defined(_MSC_VER) && (_MSC_VER >= 1020) # pragma warning(pop) #endif #endif