Package lepl :: Package regexp :: Module matchers
[hide private]
[frames] | no frames]

Source Code for Module lepl.regexp.matchers

  1   
  2  # The contents of this file are subject to the Mozilla Public License 
  3  # (MPL) Version 1.1 (the "License"); you may not use this file except 
  4  # in compliance with the License. You may obtain a copy of the License 
  5  # at http://www.mozilla.org/MPL/ 
  6  # 
  7  # Software distributed under the License is distributed on an "AS IS" 
  8  # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
  9  # the License for the specific language governing rights and 
 10  # limitations under the License. 
 11  # 
 12  # The Original Code is LEPL (http://www.acooke.org/lepl) 
 13  # The Initial Developer of the Original Code is Andrew Cooke. 
 14  # Portions created by the Initial Developer are Copyright (C) 2009-2010 
 15  # Andrew Cooke (andrew@acooke.org). All Rights Reserved. 
 16  # 
 17  # Alternatively, the contents of this file may be used under the terms 
 18  # of the LGPL license (the GNU Lesser General Public License, 
 19  # http://www.gnu.org/licenses/lgpl.html), in which case the provisions 
 20  # of the LGPL License are applicable instead of those above. 
 21  # 
 22  # If you wish to allow use of your version of this file only under the 
 23  # terms of the LGPL License and not to allow others to use your version 
 24  # of this file under the MPL, indicate your decision by deleting the 
 25  # provisions above and replace them with the notice and other provisions 
 26  # required by the LGPL License.  If you do not delete the provisions 
 27  # above, a recipient may use your version of this file under either the 
 28  # MPL or the LGPL License. 
 29   
 30  ''' 
 31  Matchers that call the regular expression engine. 
 32   
 33  These are used internally for rewriting; users typically use `Regexp` which 
 34  calls the standard Python regular expression library (and so is faster). 
 35  ''' 
 36   
 37  from lepl.matchers.support import Transformable, NoTrampoline 
 38  from lepl.matchers.transform import raise_ 
 39  from lepl.core.parser import tagged 
 40  from lepl.regexp.core import Compiler 
 41  from lepl.regexp.unicode import UnicodeAlphabet 
42 43 44 # pylint: disable-msg=R0904, R0901, E1101 45 # lepl convention 46 -class BaseRegexp(NoTrampoline, Transformable):
47 ''' 48 Common code for all matchers. 49 ''' 50 51 # pylint: disable-msg=E1101 52 # (using _arg to set attributes)
53 - def __init__(self, regexp, alphabet=None):
54 super(BaseRegexp, self).__init__() 55 self._arg(regexp=regexp) 56 self._karg(alphabet=alphabet)
57
58 - def compose(self, wrapper):
59 ''' 60 Implement the Transformable interface. 61 ''' 62 copy = type(self)(self.regexp, self.alphabet) 63 copy.wrapper = self.wrapper.compose(wrapper) 64 return copy
65 66 @tagged
67 - def _match(self, stream_in):
68 ''' 69 Delegate to the implementation. 70 ''' 71 for result in self._untagged_match(stream_in): 72 yield result
73
74 75 -class NfaRegexp(BaseRegexp):
76 ''' 77 A matcher for NFA-based regular expressions. This will yield alternative 78 matches. 79 80 This doesn't suffer from the same limitations as `Regexp` (it can "see" 81 all the input data, if necessary), but currently has quite basic syntax 82 and no grouping (the syntax may improve, but grouping will not be added - 83 use LEPL itself for complex problems). 84 ''' 85
86 - def __init__(self, regexp, alphabet=None):
87 alphabet = UnicodeAlphabet.instance() if alphabet is None else alphabet 88 super(NfaRegexp, self).__init__(regexp, alphabet) 89 self.__cached_matcher = None
90
91 - def _compile(self):
92 ''' 93 Compile the matcher. 94 ''' 95 if self.__cached_matcher is None: 96 self.__cached_matcher = \ 97 Compiler.single(self.alphabet, self.regexp).nfa().match 98 return self.__cached_matcher
99
100 - def _untagged_match(self, stream_in):
101 ''' 102 Actually do the work of matching. 103 ''' 104 function = self.wrapper.function 105 matches = self._compile()(stream_in) 106 for (_terminal, match, stream_out) in matches: 107 yield function(stream_in, lambda: ([match], stream_out)) \ 108 if function else ([match], stream_out) 109 while True: 110 yield function(stream_in, lambda: raise_(StopIteration))
111
112 113 -class DfaRegexp(BaseRegexp):
114 ''' 115 A matcher for DFA-based regular expressions. This yields a single greedy 116 match. 117 118 Typically used only in specialised situations (see `Regexp`). 119 ''' 120
121 - def __init__(self, regexp, alphabet=None):
122 alphabet = UnicodeAlphabet.instance() if alphabet is None else alphabet 123 super(DfaRegexp, self).__init__(regexp, alphabet) 124 self.__cached_matcher = None
125
126 - def _compile(self):
127 ''' 128 Compile the matcher. 129 ''' 130 if self.__cached_matcher is None: 131 self.__cached_matcher = \ 132 Compiler.single(self.alphabet, self.regexp).dfa().match 133 return self.__cached_matcher
134
135 - def _untagged_match(self, stream_in):
136 ''' 137 Actually do the work of matching. 138 ''' 139 function = self.wrapper.function 140 match = self._compile()(stream_in) 141 if match is not None: 142 (_terminals, match, stream_out) = match 143 yield function(stream_in, lambda: ([match], stream_out)) \ 144 if function else ([match], stream_out) 145 while True: 146 yield function(stream_in, lambda: raise_(StopIteration))
147