Package lepl :: Package matchers :: Module complex
[hide private]
[frames] | no frames]

Source Code for Module lepl.matchers.complex

  1   
  2  # The contents of this file are subject to the Mozilla Public License 
  3  # (MPL) Version 1.1 (the "License"); you may not use this file except 
  4  # in compliance with the License. You may obtain a copy of the License 
  5  # at http://www.mozilla.org/MPL/ 
  6  # 
  7  # Software distributed under the License is distributed on an "AS IS" 
  8  # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
  9  # the License for the specific language governing rights and 
 10  # limitations under the License. 
 11  # 
 12  # The Original Code is LEPL (http://www.acooke.org/lepl) 
 13  # The Initial Developer of the Original Code is Andrew Cooke. 
 14  # Portions created by the Initial Developer are Copyright (C) 2009-2010 
 15  # Andrew Cooke (andrew@acooke.org). All Rights Reserved. 
 16  # 
 17  # Alternatively, the contents of this file may be used under the terms 
 18  # of the LGPL license (the GNU Lesser General Public License, 
 19  # http://www.gnu.org/licenses/lgpl.html), in which case the provisions 
 20  # of the LGPL License are applicable instead of those above. 
 21  # 
 22  # If you wish to allow use of your version of this file only under the 
 23  # terms of the LGPL License and not to allow others to use your version 
 24  # of this file under the MPL, indicate your decision by deleting the 
 25  # provisions above and replace them with the notice and other provisions 
 26  # required by the LGPL License.  If you do not delete the provisions 
 27  # above, a recipient may use your version of this file under either the 
 28  # MPL or the LGPL License. 
 29   
 30  ''' 
 31  Complex matchers that are rearely used. 
 32  ''' 
 33   
 34  from lepl.matchers.core import Literal 
 35  from lepl.regexp.matchers import DfaRegexp 
 36  from lepl.matchers.support import to, trampoline_matcher_factory 
 37  from lepl.stream.factory import DEFAULT_STREAM_FACTORY 
 38  from lepl.stream.core import s_line, s_stream, s_next, s_fmt, s_id 
 39  from lepl.support.lib import fmt 
 40   
 41   
 42  @trampoline_matcher_factory(matcher=to(Literal), condition=to(DfaRegexp)) 
43 -def PostMatch(matcher, condition, not_=False, equals=True, stream_factory=None):
44 ''' 45 Apply the condition to each result from the matcher. It should return 46 either an exact match (equals=True) or simply not fail (equals=False). 47 If `not_` is set, the test is inverted. 48 49 `matcher` is coerced to `Literal()`, condition to `DfaRegexp()` 50 51 `factory` is used to generate a stream from the result. If not set the 52 default factory is used. 53 ''' 54 def match(support, stream_in, stream_factory=stream_factory): 55 ''' 56 Do the match and test the result. 57 ''' 58 stream_factory = stream_factory if stream_factory else DEFAULT_STREAM_FACTORY 59 generator = matcher._match(stream_in) 60 while True: 61 (results, stream_out) = yield generator 62 success = True 63 for result in results: 64 if not success: break 65 generator2 = condition._match(stream_factory(result)) 66 try: 67 (results2, _ignored) = yield generator2 68 if not_: 69 # if equals is false, we need to fail just because 70 # we matched. otherwise, we need to fail only if 71 # we match. 72 if not equals or (len(results2) == 1 or 73 results2[0] == result): 74 success = False 75 else: 76 # if equals is false, not generating an error is 77 # sufficient, otherwise we must fail if the result 78 # does not match 79 if equals and (len(results2) != 1 or 80 results2[0] != result): 81 success = False 82 except: 83 # fail unless if we were expecting any kind of match 84 if not not_: 85 success = False 86 if success: 87 yield (results, stream_out)
88 89 return match 90
91 92 @trampoline_matcher_factory() 93 -def _Columns(indices, *matchers):
94 95 def match(support, stream): 96 # we increment id so that different strings (which might overlap or 97 # be contiguous) don't affect each other's memoisation (the hash key 98 # is based on offset and ('one past the') end of one column can have 99 # the same offset as the start of the next). 100 id_ = s_id(stream) 101 # extract a line 102 (line, next_stream) = s_line(stream, False) 103 line_stream = s_stream(stream, line) 104 results = [] 105 for ((left, right), matcher) in zip(indices, matchers): 106 id_ += 1 107 # extract the location in the line 108 (_, left_aligned_line_stream) = s_next(line_stream, count=left) 109 (word, _) = s_next(left_aligned_line_stream, count=right-left) 110 support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) 111 word_stream = s_stream(left_aligned_line_stream, word, id_=id_) 112 # do the match 113 support._debug(s_fmt(word_stream, 'matching {rest}')) 114 (result, _) = yield matcher._match(word_stream) 115 results.extend(result) 116 support._debug(repr(results)) 117 yield (results, next_stream)
118 119 return match 120
121 122 -def Columns(*columns, **kargs):
123 ''' 124 Match data in a set of columns. 125 126 This is a fairly complex matcher. It allows matchers to be associated 127 with a range of indices (measured from the current point in the stream) 128 and only succeeds if all matchers succeed. The results are returned in 129 a list, in the same order as the matchers are specified. 130 131 A range if indices is given as a tuple (start, stop) which works like an 132 array index. So (0, 4) selects the first four characters (like [0:4]). 133 Alternatively, a number of characters can be given, in which case they 134 start where the previous column finished (or at zero for the first). 135 136 The matcher for each column will see the (selected) input data as a 137 separate stream. If a matcher should consume the entire column then 138 it should check for `Eos`. 139 140 Finally, the skip parameter controls how data to "the right" of the 141 columns is handled. If unset, the data are discarded (this functions 142 as an additional, final, column that currently drops data). Data to 143 "the left" are simply discarded. 144 145 Note: This does not support backtracking over the columns. 146 ''' 147 # Note - this is the public-facing wrapper that pre-process the arguments 148 # so that matchers are handled correctly during cloning. The work is done 149 # by `_Columns`. 150 def clean(): 151 right = 0 152 for (col, matcher) in columns: 153 try: 154 (left, right) = col 155 except TypeError: 156 left = right 157 right = right + col 158 yield ((left, right), matcher)
159 (indices, matchers) = zip(*clean()) 160 return _Columns(indices, *matchers) 161
162 163 @trampoline_matcher_factory() 164 -def Iterate(matcher):
165 ''' 166 This isn't complex to implement, but conceptually is rather odd. It takes 167 a single matcher and returns a result for each match as it consumes the 168 input. 169 170 This means `parse_all()` is needed to retrieve the entire result (and there 171 is no backtracking). 172 173 In practice this means that if you have a matcher whose top level is a 174 repeating element (for example, lines in a file) then you can treat the 175 entire parser as a lazy iterator over the input. The obvious application 176 is with `.config.low_memory()` as this allows for large output to be 177 generated without consuming a large amount of memory. 178 ''' 179 def match(support, stream): 180 while True: 181 (result, stream) = yield matcher._match(stream) 182 yield (result, stream)
183 return match 184