1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 '''
31 Complex matchers that are rearely used.
32 '''
33
34 from lepl.matchers.core import Literal
35 from lepl.regexp.matchers import DfaRegexp
36 from lepl.matchers.support import to, trampoline_matcher_factory
37 from lepl.stream.factory import DEFAULT_STREAM_FACTORY
38 from lepl.stream.core import s_line, s_stream, s_next, s_fmt, s_id
39 from lepl.support.lib import fmt
40
41
42 @trampoline_matcher_factory(matcher=to(Literal), condition=to(DfaRegexp))
43 -def PostMatch(matcher, condition, not_=False, equals=True, stream_factory=None):
44 '''
45 Apply the condition to each result from the matcher. It should return
46 either an exact match (equals=True) or simply not fail (equals=False).
47 If `not_` is set, the test is inverted.
48
49 `matcher` is coerced to `Literal()`, condition to `DfaRegexp()`
50
51 `factory` is used to generate a stream from the result. If not set the
52 default factory is used.
53 '''
54 def match(support, stream_in, stream_factory=stream_factory):
55 '''
56 Do the match and test the result.
57 '''
58 stream_factory = stream_factory if stream_factory else DEFAULT_STREAM_FACTORY
59 generator = matcher._match(stream_in)
60 while True:
61 (results, stream_out) = yield generator
62 success = True
63 for result in results:
64 if not success: break
65 generator2 = condition._match(stream_factory(result))
66 try:
67 (results2, _ignored) = yield generator2
68 if not_:
69
70
71
72 if not equals or (len(results2) == 1 or
73 results2[0] == result):
74 success = False
75 else:
76
77
78
79 if equals and (len(results2) != 1 or
80 results2[0] != result):
81 success = False
82 except:
83
84 if not not_:
85 success = False
86 if success:
87 yield (results, stream_out)
88
89 return match
90
91
92 @trampoline_matcher_factory()
93 -def _Columns(indices, *matchers):
94
95 def match(support, stream):
96
97
98
99
100 id_ = s_id(stream)
101
102 (line, next_stream) = s_line(stream, False)
103 line_stream = s_stream(stream, line)
104 results = []
105 for ((left, right), matcher) in zip(indices, matchers):
106 id_ += 1
107
108 (_, left_aligned_line_stream) = s_next(line_stream, count=left)
109 (word, _) = s_next(left_aligned_line_stream, count=right-left)
110 support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word))
111 word_stream = s_stream(left_aligned_line_stream, word, id_=id_)
112
113 support._debug(s_fmt(word_stream, 'matching {rest}'))
114 (result, _) = yield matcher._match(word_stream)
115 results.extend(result)
116 support._debug(repr(results))
117 yield (results, next_stream)
118
119 return match
120
121
122 -def Columns(*columns, **kargs):
123 '''
124 Match data in a set of columns.
125
126 This is a fairly complex matcher. It allows matchers to be associated
127 with a range of indices (measured from the current point in the stream)
128 and only succeeds if all matchers succeed. The results are returned in
129 a list, in the same order as the matchers are specified.
130
131 A range if indices is given as a tuple (start, stop) which works like an
132 array index. So (0, 4) selects the first four characters (like [0:4]).
133 Alternatively, a number of characters can be given, in which case they
134 start where the previous column finished (or at zero for the first).
135
136 The matcher for each column will see the (selected) input data as a
137 separate stream. If a matcher should consume the entire column then
138 it should check for `Eos`.
139
140 Finally, the skip parameter controls how data to "the right" of the
141 columns is handled. If unset, the data are discarded (this functions
142 as an additional, final, column that currently drops data). Data to
143 "the left" are simply discarded.
144
145 Note: This does not support backtracking over the columns.
146 '''
147
148
149
150 def clean():
151 right = 0
152 for (col, matcher) in columns:
153 try:
154 (left, right) = col
155 except TypeError:
156 left = right
157 right = right + col
158 yield ((left, right), matcher)
159 (indices, matchers) = zip(*clean())
160 return _Columns(indices, *matchers)
161
162
163 @trampoline_matcher_factory()
164 -def Iterate(matcher):
165 '''
166 This isn't complex to implement, but conceptually is rather odd. It takes
167 a single matcher and returns a result for each match as it consumes the
168 input.
169
170 This means `parse_all()` is needed to retrieve the entire result (and there
171 is no backtracking).
172
173 In practice this means that if you have a matcher whose top level is a
174 repeating element (for example, lines in a file) then you can treat the
175 entire parser as a lazy iterator over the input. The obvious application
176 is with `.config.low_memory()` as this allows for large output to be
177 generated without consuming a large amount of memory.
178 '''
179 def match(support, stream):
180 while True:
181 (result, stream) = yield matcher._match(stream)
182 yield (result, stream)
183 return match
184