1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 '''
31 Matchers that are defined in terms of other matchers (ie the majority).
32 '''
33
34 from string import whitespace, digits, ascii_letters, \
35 ascii_uppercase, ascii_lowercase, printable, punctuation
36 from operator import __add__
37
38 from lepl.matchers.combine import And, DepthFirst, BreadthFirst, \
39 OrderByResultCount, Or, Limit
40 from lepl.matchers.core import Lookahead, Any, Eof, Literal, Regexp
41 from lepl.matchers.operators import BREADTH_FIRST, DEPTH_FIRST, GREEDY, \
42 NON_GREEDY
43 from lepl.matchers.support import coerce_
44 from lepl.matchers.transform import TransformationWrapper, Transform, \
45 ApplyArgs, ApplyRaw
46 from lepl.regexp.matchers import NfaRegexp, DfaRegexp
47 from lepl.support.lib import assert_type, lmap, fmt, basestring, reduce
48 from lepl.support.warn import warn_on_use
49
50
51
52
53
54
55
56
57
58
59
60 -def Repeat(matcher, start=0, stop=None, limit=None, algorithm=DEPTH_FIRST,
61 separator=None, add_=False, reduce=None):
62 '''
63 This is called by the [] operator. It repeats the given matcher between
64 `start` and `stop` number of times (inclusive).
65
66 If `limit` is given it is an upper limit on the number of different
67 results returned on backtracking.
68
69 `algorithm` selects the repeat algorithm to use.
70
71 If `separator` is given then each repetition is separated by that matcher.
72
73 If `add_` is true then the results are joined with `Add` (once all
74 results are obtained).
75
76 If `reduce` is given it should be a pair (zero, join) where
77 `join(results, next)` is used to accumulate results and `zero` is the
78 initial value of `results`. This is implemented via `Reduce`.
79
80 `reduce` and `add_` cannot be given together.
81 '''
82 first = coerce_(matcher)
83 if separator is None:
84 rest = first
85 else:
86 rest = And(coerce_(separator, Regexp), first)
87 if start is None:
88 start = 0
89
90
91
92
93
94
95
96
97
98
99
100
101
102 if add_ and reduce:
103 raise ValueError('Repeat cannot apply both add_ and reduce')
104 elif add_:
105 process = Add
106 elif reduce:
107 process = lambda r: Reduce(r, reduce[0], reduce[1])
108 else:
109 process = Identity
110 matcher = {DEPTH_FIRST:
111 process(DepthFirst(first=first, start=start, stop=stop, rest=rest)),
112 BREADTH_FIRST:
113 process(BreadthFirst(first=first, start=start, stop=stop, rest=rest)),
114 GREEDY:
115 process(OrderByResultCount(
116 BreadthFirst(first=first, start=start, stop=stop, rest=rest))),
117 NON_GREEDY:
118 process(OrderByResultCount(
119 BreadthFirst(first=first, start=start, stop=stop, rest=rest),
120 False))
121 }[algorithm]
122 if limit is not None:
123 matcher = Limit(matcher, count=limit)
124 return matcher
125
126
127 -def Apply(matcher, function, raw=False, args=False):
128 '''
129 Apply an arbitrary function to the results of the matcher
130 (**>**, **>=**).
131
132 Apply can be used via the standard operators by placing ``>``
133 (or ``>=`` to set ``raw=True``, or ``*`` to set ``args=True``)
134 to the right of a matcher.
135
136 If the function is a `TransformationWrapper` it is used directly.
137 Otherwise a `TransformationWrapper` is constructed via the `raw` and
138 `args` parameters, as described below.
139
140 **Note:** The creation of named pairs (when a string argument is
141 used) behaves more like a mapping than a single function invocation.
142 If several values are present, several pairs will be created.
143
144 **Note:** There is an asymmetry in the default values of *raw*
145 and *args*. If the identity function is used with the default settings
146 then a list of results is passed as a single argument (``args=False``).
147 That is then returned (by the identity) as a list, which is wrapped
148 in an additional list (``raw=False``), giving an extra level of
149 grouping. This is necessary because Python's ``list()`` is an
150 identity for lists, but we want it to add an extra level of grouping
151 so that nested S-expressions are easy to generate.
152
153 See also `Map`.
154
155 :Parameters:
156
157 matcher
158 The matcher whose results will be modified.
159
160 function
161 The modification to apply.
162
163 If a `Transformation`, this is used directly.
164
165 If a string is given, named pairs will be created (and raw and args
166 ignored).
167
168 Otherwise the function should expect a list of results (unless
169 ``args=True`` in which case the list is supplied as ``*args``)
170 and can return any value (unless ``raw=True`` in which case it should
171 return a list).
172
173 raw
174 If True the results are used directly. Otherwise they are wrapped in
175 a list. The default is False --- a list is added. This is set to
176 true if the target function is an `ApplyRaw` instance.
177
178 args
179 If True, the results are passed to the function as separate
180 arguments (Python's '*args' behaviour). The default is False ---
181 the results are passed inside a list). This is set to true if the
182 target function is an `ApplyArgs` instance.
183 '''
184 raw = raw or (type(function) is type and issubclass(function, ApplyRaw))
185 args = args or (type(function) is type
186 and issubclass(function, ApplyArgs))
187 if isinstance(function, TransformationWrapper):
188 apply = function
189 else:
190 if isinstance(function, basestring):
191 function = lambda results, f=function: \
192 lmap(lambda x: (f, x), results)
193 raw = True
194 args = False
195 if args:
196 if raw:
197 function = lambda results, f=function: f(*results)
198 else:
199 function = lambda results, f=function: [f(*results)]
200 else:
201 if not raw:
202 function = lambda results, f=function: [f(results)]
203 def apply(stream_in, matcher):
204 (results, stream_out) = matcher()
205 return (function(results), stream_out)
206 return Transform(matcher, apply)
207
208
209 -def args(function):
210 '''
211 A decorator that has the same effect as ApplyArgs for functions/methods.
212 '''
213 def wrapper(args_):
214 '''
215 Apply args as *args.
216 '''
217 return function(*args_)
218 return wrapper
219
220
221 -def KApply(matcher, function, raw=False):
222 '''
223 Apply an arbitrary function to named arguments (**\****).
224 The function should typically expect and return a list.
225 It can be used indirectly by placing ``**=`` to the right of the matcher.
226
227 The function will be applied with the following keyword arguments:
228
229 stream_out
230 The stream returned from the matcher.
231
232 results
233 A list of the results returned.
234
235 :Parameters:
236
237 matcher
238 The matcher whose results will be modified.
239
240 function
241 The modification to apply.
242
243 raw
244 If false (the default), the final return value from the function
245 will be placed in a list and returned in a pair together with the
246 new stream returned from the matcher (ie the function returns a
247 single new result).
248
249 If true, the final return value from the function is used directly
250 and so should match the ``([results], stream)`` type expected by
251 other matchers.
252 '''
253 def fun(stream_in, matcher):
254 '''
255 Apply args as **kargs.
256 '''
257 (results, stream_out) = matcher()
258 kargs = {'results': results,
259 'stream_in': stream_in,
260 'stream_out': stream_out}
261 if raw:
262 return function(**kargs)
263 else:
264 return ([function(**kargs)], stream_out)
265 return Transform(matcher, fun)
266
269 '''
270 Match any character except those specified (or, if a matcher is used as
271 the exclude, if the matcher fails).
272
273 The argument should be a list of tokens (or a string of suitable
274 characters) to exclude, or a matcher. If omitted all tokens are accepted.
275 '''
276 return And(~Lookahead(coerce_(exclude, Any)), Any())
277
280 '''
281 Match zero or one instances of a matcher (**[0:1]**).
282 '''
283 return Repeat(coerce_(matcher), stop=1)
284
285
286
287 -def Star(matcher):
288 '''
289 Match zero or more instances of a matcher (**[0:]**)
290 '''
291 return Repeat(coerce_(matcher))
292
293
294 ZeroOrMore = Star
295 '''
296 Match zero or more instances of a matcher (**[0:]**)
297 '''
298
299
300 -def Plus(matcher):
301 '''
302 Match one or more instances of a matcher (**[1:]**)
303 '''
304 return Repeat(coerce_(matcher), start=1)
305
306
307 OneOrMore = Plus
308 '''
309 Match one or more instances of a matcher (**[1:]**)
310 '''
311
312
313 -def Map(matcher, function):
314 '''
315 Apply an arbitrary function to each of the tokens in the result of the
316 matcher (**>>**). If the function is a name, named pairs are created
317 instead. It can be used indirectly by placing ``>>`` to the right of the
318 matcher.
319
320 See also `Apply`.
321 '''
322
323 if isinstance(function, basestring):
324 return Apply(matcher, lambda l: list(map(lambda x: (function, x), l)),
325 raw=True)
326 else:
327 return Apply(matcher, lambda l: list(map(function, l)), raw=True)
328
331 '''
332 Combine the results from the matcher using `reduce(join, results, zero)`.
333 Unlike `Add` this will return a value (`zero`) when there are no matches.
334 '''
335 def reduce_(_stream, matcher):
336 (results, stream_out) = matcher()
337 return ([reduce(join, results, zero)], stream_out)
338 return Apply(matcher, TransformationWrapper(reduce_))
339
340
341 -def add(_stream, matcher):
342 '''
343 The transformation used in `Add` - we carefully use "+" in as generic
344 a manner as possible.
345 '''
346 (results, stream_out) = matcher()
347 if results:
348 result = results[0]
349 for extra in results[1:]:
350 try:
351 result = result + extra
352 except TypeError:
353 raise TypeError(
354 fmt('An attempt was made to add two results '
355 'that do not have consistent types: {0!r} + {1!r}',
356 result, extra))
357 result = [result]
358 else:
359 result = []
360 return (result, stream_out)
361
362
363 -def Add(matcher):
364 '''
365 Join tokens in the result using the "+" operator (**+**).
366 This joins strings and merges lists.
367 Unlike `Reduce` this will have no effect of there are no matches.
368 '''
369 return Apply(matcher, TransformationWrapper(add))
370
371
372 -def Join(*matchers):
373 '''
374 Combine many matchers together with Add(And(...)).
375 It can be used indirectly by placing ``+`` between matchers.
376 '''
377 return Add(And(*matchers))
378
379
380 -def Drop(matcher):
381 '''
382 Do the match, but return nothing (**~**). The ~ prefix is equivalent.
383 '''
384 return Apply(matcher, lambda l: [], raw=True)
385
388 '''
389 Replace each return value with that given.
390 '''
391 return Map(matcher, lambda x: value)
392
393
394 -def Name(matcher, name):
395 '''
396 Name the result of matching (**> name**)
397
398 This replaces each value in the match with a tuple whose first value is
399 the given name and whose second value is the matched token. The Node
400 class recognises this form and associates such values with named attributes.
401 '''
402 return Map(matcher, name)
403
404
405 Eos = Eof
406 '''Match the end of a stream. Returns nothing.'''
410 '''Functions identically to the matcher given as an argument.'''
411 return coerce_(matcher)
412
415 '''Match newline (Unix) or carriage return newline (Windows)'''
416 return Or(Literal('\n'), Literal('\r\n'))
417
418
419 -def Space(space=' \t'):
420 '''Match a single space (by default space or tab).'''
421 return Any(space)
422
425 '''
426 Match a single space (by default from string.whitespace,
427 which includes newlines).
428 '''
429 return Any(space)
430
433 '''Match any single digit.'''
434 return Any(digits)
435
438 '''Match any ASCII letter (A-Z, a-z).'''
439 return Any(ascii_letters)
440
443 '''Match any ASCII uppercase letter (A-Z).'''
444 return Any(ascii_uppercase)
445
448 '''Match any ASCII lowercase letter (A-Z).'''
449 return Any(ascii_lowercase)
450
453 '''Match any printable character (string.printable).'''
454 return Any(printable)
455
458 '''Match any punctuation character (string.punctuation).'''
459 return Any(punctuation)
460
463 '''Match a simple sequence of digits.'''
464 return Repeat(Digit(), start=1, add_=True)
465
470
471
472 Integer = SignedInteger
473 '''
474 The default integer is a signed one.
475 '''
486
489 '''
490 Match a signed sequence of digits that may include a decimal point.
491 This will match both integer and float values.
492 '''
493 return Join(Optional(Any('+-')), UnsignedReal(decimal))
494
497 '''
498 Match an `UnsignedReal` followed by an optional exponent
499 (e+02 etc). This will match both integer and float values.
500 '''
501 return Join(UnsignedReal(decimal),
502 Optional(And(Any(exponent), SignedInteger())))
503
506 '''
507 Match a `SignedReal` followed by an optional exponent
508 (e+02 etc). This will match both integer and float values.
509 '''
510 if decimal == '.' and exponent == 'eE':
511
512 return NfaRegexp(r'[\+\-]?(?:[0-9]*\.[0-9]+|[0-9]+\.|[0-9]+)(?:[eE][\+\-]?[0-9]+)?')
513 else:
514 return Join(SignedReal(decimal),
515 Optional(Join(Any(exponent), SignedInteger())))
516
517
518 Real = SignedEReal
519 '''
520 The default float is signed with exponents.
521 '''
522
523 _FLOAT_WARN = '''WARNING: The definition of the Float matchers changed in Lepl 4.4.0
524 (you may want to use Real instead).'''
535
539 '''
540 Match a signed sequence of digits that must include a decimal point. This
541 will match real values that are not integers.
542 '''
543 return Join(Optional(Any('+-')), UnsignedFloat(decimal))
544
548 '''
549 As `UnsignedEReal`, but must contain a decimal or exponent. This
550 will match real values that are not integers.
551 '''
552 return Or(Join(UnsignedReal(decimal), Any(exponent), SignedInteger()),
553 UnsignedFloat(decimal))
554
558 '''
559 As `SignedEReal`, but must contain a decimal or exponent. This
560 will match real values that are not integers.
561 '''
562 if decimal == '.' and exponent == 'eE':
563
564 return NfaRegexp(r'[\+\-]?(?:[0-9]*\.[0-9]+(?:[eE][\+\-]?[0-9]+)?|[0-9]+\.(?:[eE][\+\-]?[0-9]+)?|[0-9]+[eE][\+\-]?[0-9]+)')
565 else:
566 return Or(Join(SignedReal(decimal), Any(exponent), SignedInteger()),
567 SignedFloat(decimal))
568
569
570 Float = SignedEFloat
571 '''
572 The default float matcher accepts signed values with optional exponents.
573 This will not match integers - see `Real` for that.
574 '''
575
576
577 -def Word(chars=NfaRegexp('[^%s]' % whitespace), body=None):
578 '''
579 Match a sequence of non-space characters, joining them together.
580
581 chars and body, if given as strings, define possible characters to use
582 for the first and rest of the characters in the word, respectively.
583 If body is not given, then chars is used for the entire word.
584 They can also specify matchers, which typically should match only a
585 single character.
586
587 So ``Word(Upper(), Lower())`` would match names that being with an upper
588 case letter, for example, while ``Word(AnyBut(Space()))`` (the default)
589 matches any sequence of non-space characters.
590 '''
591 chars = coerce_(chars, Any)
592 body = chars if body is None else coerce_(body, Any)
593 return Add(And(chars, Star(body)))
594
597 '''
598 Drop results if they are empty (ie if they are ``False`` in Python).
599
600 This will drop empty strings and lists. It will also drop
601 `Node` instances if they are empty (since the length is then
602 zero).
603 '''
604 def drop(results):
605 '''
606 Only drop "False" values.
607 '''
608 return [result for result in results if result]
609 return Apply(matcher, drop, raw=True)
610
613 '''
614 A series of literals, joined with `Or`.
615 '''
616
617
618
619 return Or(*lmap(Literal, matchers))
620
623 '''
624 Match a string with quotes that can be escaped. This will match across
625 newlines (see `SingleLineString` for an alternative).
626
627 More generally, a string is a grouping of results. Setting `empty` and
628 `join` correctly will allow this matcher to work with a variety of types.
629 '''
630 q = Literal(quote)
631 content = AnyBut(q)
632 if escape:
633 content = Or(And(Drop(escape), q), content)
634 content = Repeat(content, reduce=(empty, join))
635 return And(Drop(q), content, Drop(q))
636
648
651 '''
652 Like `String`, matching across multiple lines, but will silently
653 drop newlines.
654 '''
655 q = Literal(quote)
656 content = AnyBut(Or(q, Any(ignore)))
657 if escape:
658 content = Or(content, And(Drop(escape), q))
659 content = Or(content, Drop(Any(ignore)))
660 content = Repeat(content, reduce=(empty, join))
661 return And(Drop(q), content, Drop(q))
662
663
664 -def SkipTo(matcher, include=True):
673