1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 '''
31 Support for operator syntactic sugar (and operator redefinition).
32 '''
33
34 from re import compile as compile_
35
36 from lepl.matchers.matcher import Matcher
37 from lepl.support.context import Namespace, NamespaceMixin, Scope
38 from lepl.support.lib import open_stop, fmt, basestring
39
40
41 DIGITS = compile_('^(-?\d+)(.*)')
42
43 -def RepeatWrapper(matcher, start, stop, step, separator, add, reduce):
44 '''Parse `step` if it is a string.'''
45
46 from lepl.matchers.derived import Repeat
47 if (isinstance(step, int)):
48 limit = step
49 algorithm = DEPTH_FIRST
50 elif (isinstance(step, basestring)):
51 limit = None
52 algorithm = None
53 while step:
54 match = DIGITS.match(step)
55 if match:
56 if limit is None:
57 limit = int(match.group(1))
58 step = match.group(2)
59 else:
60 raise TypeError(fmt('Cannot parse limit/algorithm for []: {}',
61 step))
62 else:
63 if algorithm is None:
64 algorithm = step[0]
65 step = step[1:]
66 else:
67 raise TypeError('The step of [...] must be an integer limit, or a '
68 'string to select the algorithm, or both as a string '
69 'like "d1" for a single value, depth first')
70 return Repeat(matcher, start=start, stop=stop, limit=limit,
71 algorithm=algorithm, separator=separator, add_=add,
72 reduce=reduce)
73
75 '''
76 Define the default operators.
77 '''
78
80
81 from lepl.matchers.error import raise_error
82 from lepl.matchers.derived import Space, Add, Apply, KApply, Drop, Map
83 from lepl.matchers.combine import And, Or, First
84 super(OperatorNamespace, self).__init__({
85 SPACE_OPT: lambda a, b: And(a, Space()[0:,...], b),
86 SPACE_REQ: lambda a, b: And(a, Space()[1:,...], b),
87 ADD: lambda a, b: Add(And(a, b)),
88 AND: And,
89 OR: Or,
90 APPLY: Apply,
91 APPLY_RAW: lambda a, b: Apply(a, b, raw=True),
92 NOT: Drop,
93 KARGS: KApply,
94 RAISE: lambda a, b: KApply(a, raise_error(b)),
95 REPEAT: RepeatWrapper,
96 FIRST: First,
97 MAP: Map,
98 REDUCE: None,
99 })
100
101
102 OPERATORS = 'operators'
103 '''
104 The name used to retrieve operators definitions.
105 '''
106
107 SPACE_OPT = '/'
108 '''Name for / operator.'''
109 SPACE_REQ = '//'
110 '''Name for // operator.'''
111 ADD = '+'
112 '''Name for + operator.'''
113 AND = '&'
114 '''Name for & operator.'''
115 OR = '|'
116 '''Name for | operator.'''
117 APPLY = '>'
118 '''Name for > operator.'''
119 APPLY_RAW = '>='
120 '''Name for >= operator.'''
121 NOT = '~'
122 '''Name for ~ operator.'''
123 KARGS = '**'
124 '''Name for ** operator.'''
125 RAISE = '^'
126 '''Name for ^ operator.'''
127 REPEAT = '[]'
128 '''Name for [] operator.'''
129 FIRST = '%'
130 '''Name for % operator.'''
131 MAP = '>>'
132 '''Name for >> operator.'''
133 REDUCE = '<reduce>'
134 '''Name for accumulator of data during repetition.'''
135
136
138 '''
139 Allow an operator to be redefined within a with context. Uses the
140 OPERATORS namespace.
141 '''
142
143 - def __init__(self, space_opt=None, space_req=None, repeat=None,
144 add=None, and_=None, or_=None, not_=None,
145 apply_=None, apply_raw=None, kargs=None,
146 raise_=None, first=None, map_=None, reduce=None):
147 super(Override, self).__init__(OPERATORS, OperatorNamespace,
148 {SPACE_OPT: space_opt, SPACE_REQ: space_req,
149 REPEAT: repeat, ADD: add, AND: and_, OR: or_,
150 NOT: not_, APPLY: apply_, APPLY_RAW: apply_raw,
151 KARGS: kargs, RAISE: raise_, FIRST: first, MAP: map_,
152 REDUCE: reduce})
153
154
156 '''
157 Support class for `Separator` and similar classes.
158
159 Uses the OPERATORS namespace.
160 '''
161
178
180 '''
181 Sub-classes should return (And, Repeat)
182 '''
183 raise Exception('Unimplemented')
184
186 '''
187 A simple Repeat with separator.
188 '''
189 from lepl.matchers.combine import And
190 def repeat(m, st=0, sp=None, d=0, s=None, a=False, r=None):
191 '''
192 Wrap `Repeat` to adapt the separator.
193 '''
194 if s is None:
195 s = separator
196 elif not a:
197 s = And(separator, s, separator)
198 return RepeatWrapper(m, st, sp, d, s, a, r)
199 return repeat
200
201
203 '''
204 Redefine ``[]`` and ``&`` to include the given matcher as a separator
205 (so it will be used between list items and between matchers separated by the &
206 operator)
207
208 Uses the OPERATORS namespace.
209 '''
210
219
220
222 '''
223 Skip spaces (by default, zero or more Space()). Any argument is dropped.
224 '''
225
232
233
235 '''
236 Similar to `Separator`, but tried to be clever about whether the
237 separator is needed. It replaces `&` with a matcher that only uses
238 the separator if the second sub-matcher consumes some input.
239
240 Uses the OPERATORS namespace.
241
242 See also `SmartSeparator2`, which is less general, but more efficient.
243 '''
244
246 '''
247 Require the separator on each `And`.
248 '''
249
250 from lepl.matchers.combine import And, Or
251 from lepl.matchers.core import Consumer
252 def and_(a, b):
253 '''
254 Add space only in the case when both consume something.
255 '''
256 return Or(And(Consumer(a), separator, Consumer(b)),
257 And(Consumer(a), Consumer(b, False)),
258 And(Consumer(a, False), Consumer(b)),
259 And(Consumer(a, False), Consumer(b, False)))
260 return (and_, self._repeat(separator))
261
262
263 GREEDY = 'g'
264 '''Flag (splice increment) for inefficient, guaranteed greedy matching.'''
265 NON_GREEDY = 'n'
266 '''Flag (splice increment) for inefficient, guaranteed non-greedy matching.'''
267 DEPTH_FIRST = 'd'
268 '''Flag (splice increment) for efficient, quasi-greedy, matching (default).'''
269 BREADTH_FIRST = 'b'
270 '''Flag (splice increment) for efficient, quasi-non-greedy, matching.'''
271
272
274 '''
275 Define the operators used to combine elements in a grammar specification.
276 '''
277
280
282 '''
283 **self + other** - Join strings, merge lists.
284
285 Combine adjacent matchers in sequence, merging the result with "+"
286 (so strings are joined, lists merged).
287
288 :Parameters:
289
290 other
291 Another matcher or a string that will be converted to a literal
292 match.
293 '''
294 self.__check(ADD, other, True)
295 return self._lookup(ADD)(self, other)
296
298 '''
299 **other + self** - Join strings, merge lists.
300
301 Combine adjacent matchers in sequence, merging the result with "+"
302 (so strings are joined, lists merged).
303
304 :Parameters:
305
306 other
307 Another matcher or a string that will be converted to a literal
308 match.
309 '''
310 self.__check(ADD, other, True)
311 return self._lookup(ADD)(other, self)
312
314 '''
315 **self & other** - Append results.
316
317 Combine adjacent matchers in sequence. This is equivalent to
318 `And()`.
319
320 :Parameters:
321
322 other
323 Another matcher or a string that will be converted to a literal
324 match.
325 '''
326 self.__check(AND, other, True)
327 return self._lookup(AND)(self, other)
328
330 '''
331 **other & self** - Append results.
332
333 Combine adjacent matchers in sequence. This is equivalent to
334 `And()`.
335
336 :Parameters:
337
338 other
339 Another matcher or a string that will be converted to a literal
340 match.
341 '''
342 self.__check(AND, other, True)
343 return self._lookup(AND)(other, self)
344
346 '''
347 For 2.6
348 '''
349 return self.__truediv__(other)
350
352 '''
353 For 2.6
354 '''
355 return self.__rtruediv__(other)
356
358 '''
359 **self / other** - Append results, with optional separating space.
360
361 Combine adjacent matchers in sequence, with an optional space between
362 them. The space is included in the results.
363
364 :Parameters:
365
366 other
367 Another matcher or a string that will be converted to a literal
368 match.
369 '''
370 self.__check(SPACE_OPT, other, True)
371 return self._lookup(SPACE_OPT)(self, other)
372
374 '''
375 **other / self** - Append results, with optional separating space.
376
377 Combine adjacent matchers in sequence, with an optional space between
378 them. The space is included in the results.
379
380 :Parameters:
381
382 other
383 Another matcher or a string that will be converted to a literal
384 match.
385 '''
386 self.__check(SPACE_OPT, other, True)
387 return self._lookup(SPACE_OPT)(other, self)
388
390 '''
391 **self // other** - Append results, with required separating space.
392
393 Combine adjacent matchers in sequence, with a space between them.
394 The space is included in the results.
395
396 :Parameters:
397
398 other
399 Another matcher or a string that will be converted to a literal
400 match.
401 '''
402 self.__check(SPACE_REQ, other, True)
403 return self._lookup(SPACE_REQ)(self, other)
404
406 '''
407 **other // self** - Append results, with required separating space.
408
409 Combine adjacent matchers in sequence, with a space between them.
410 The space is included in the results.
411
412 :Parameters:
413
414 other
415 Another matcher or a string that will be converted to a literal
416 match.
417 '''
418 self.__check(SPACE_REQ, other, True)
419 return self._lookup(SPACE_REQ)(other, self)
420
422 '''
423 **self | other** - Try alternative matchers.
424
425 This introduces backtracking. Matches are tried from left to right
426 and successful results returned (one on each "recall"). This is
427 equivalent to `Or()`.
428
429 :Parameters:
430
431 other
432 Another matcher or a string that will be converted to a literal
433 match.
434 '''
435 self.__check(OR, other, True)
436 return self._lookup(OR)(self, other)
437
439 '''
440 **other | self** - Try alternative matchers.
441
442 This introduces backtracking. Matches are tried from left to right
443 and successful results returned (one on each "recall"). This is
444 equivalent to `Or()`.
445
446 :Parameters:
447
448 other
449 Another matcher or a string that will be converted to a literal
450 match.
451 '''
452 self.__check(OR, other, True)
453 return self._lookup(OR)(other, self)
454
456 '''
457 **self % other** - Take first match (committed choice).
458
459 Matches are tried from left to right and the first successful result
460 is returned. This is equivalent to `First()`.
461
462 :Parameters:
463
464 other
465 Another matcher or a string that will be converted to a literal
466 match.
467 '''
468 self.__check(FIRST, other, True)
469 return self._lookup(FIRST)(self, other)
470
472 '''
473 **other % self** - Take first match (committed choice).
474
475 Matches are tried from left to right and the first successful result
476 is returned. This is equivalent to `First()`.
477
478 :Parameters:
479
480 other
481 Another matcher or a string that will be converted to a literal
482 match.
483 '''
484 self.__check(FIRST, other, True)
485 return self._lookup(FIRST)(other, self)
486
488 '''
489 **~self** - Discard the result.
490
491 This generates a matcher that behaves as the original, but returns
492 an empty list. This is equivalent to `Drop()`.
493
494 Note that `Lookahead()` overrides this method to have
495 different semantics (negative lookahead).
496 '''
497 return self._lookup(NOT)(self)
498
500 '''
501 **self[start:stop:algorithm, separator, ...]** - Repetition and lists.
502
503 This is a complex statement that modifies the current matcher so
504 that it matches several times. A separator may be specified
505 (eg for comma-separated lists) and the results may be combined with
506 "+" (so repeated matching of characters would give a word).
507
508 start:stop:algorithm
509 This controls the number of matches made and the order in which
510 different numbers of matches are returned.
511
512 [start]
513 Repeat exactly *start* times
514
515 [start:stop]
516 Repeat *start* to *stop* times (starting with as many matches
517 as possible, and then decreasing as necessary).
518
519 [start:stop:algorithm]
520 Direction selects the algorithm for searching.
521
522 'b' (BREADTH_FIRST)
523 A breadth first search is used, which tends to give shorter
524 matches before longer ones. This tries all possible matches for
525 the sub-matcher first (before repeating calls to consume more
526 of the stream). If the sub-matcher does not backtrack then this
527 guarantees that the number of matches returned will not decrease
528 (ie will monotonically increase) on backtracking.
529
530 'd' (DEPTH_FIRST)
531 A depth first search is used, which tends to give longer
532 matches before shorter ones. This tries to repeats matches
533 with the sub-matcher, consuming as much of the stream as
534 possible, before backtracking to find alternative matchers.
535 If the sub-matcher does not backtrack then this guarantees
536 that the number of matches returned will not increase (ie will
537 monotonically decrease) on backtracking.
538
539 'g' (GREEDY)
540 An exhaustive search is used, which finds all results (by
541 breadth first search) and orders them by length before returning
542 them ordered from longest to shortest. This guarantees that
543 the number of matches returned will not increase (ie will
544 monotonically decrease) on backtracking, but can consume a lot
545 of resources.
546
547 'n' (NON_GREEDY)
548 As for 'g' (GREEDY), but results are ordered shortest to
549 longest. This guarantees that the number of matches returned
550 will not decrease (ie will monotonically increase) on
551 backtracking, but can consume a lot of resources,
552
553 Values may be omitted; the defaults are: *start* = 0, *stop* =
554 infinity, *algorithm* = 'd' (DEPTH_FIRST).
555
556 separator
557 If given, this must appear between repeated values. Matched
558 separators are returned as part of the result (unless, of course,
559 they are implemented with a matcher that returns nothing). If
560 *separator* is a string it is converted to a literal match.
561
562 ...
563 If ... (an ellipsis) is given then the results are joined together
564 with "+".
565
566 Examples
567 --------
568
569 Any()[0:3,...] will match 3 or less characters, joining them
570 together so that the result is a single string.
571
572 Word()[:,','] will match a comma-separated list of words.
573
574 value[:] or value[0:] or value[0::'d'] is a "greedy" match that,
575 if value does not backtrack, is equivalent to the "*" in a regular
576 expression.
577 value[::'n'] is the "non-greedy" equivalent (preferring as short a
578 match as possible) and value[::'g'] is greedy even when value does
579 provide alternative matches on backtracking.
580 '''
581 start = 0
582 stop = None
583 step = DEPTH_FIRST
584 separator = None
585 add = False
586 have_index = False
587 if not isinstance(indices, tuple):
588 indices = [indices]
589 for index in indices:
590 if isinstance(index, int):
591 if have_index:
592 raise TypeError(
593 fmt('Multiple slices not supported: {0!r}', index))
594 start = index
595 stop = index
596 step = DEPTH_FIRST
597 have_index = True
598 elif isinstance(index, slice):
599 if have_index:
600 raise TypeError(
601 fmt('Multiple slices not supported: {0!r}', index))
602 start = index.start if index.start != None else 0
603 stop = index.stop if not open_stop(index) else None
604 step = index.step if index.step != None else DEPTH_FIRST
605 have_index = True
606 elif index == Ellipsis:
607 add = True
608 elif separator is None:
609 separator = index
610 else:
611 raise TypeError(index)
612
613 if stop == 1:
614 add = False
615 return self._lookup(REPEAT)(self, start, stop, step, separator, add,
616 self._lookup(REDUCE))
617
619 '''
620 **self > function** - Process or label the results.
621
622 Create a named pair or apply a function to the results. This is
623 equivalent to `Apply()`.
624
625 :Parameters:
626
627 function
628 This can be a string or a function.
629
630 If a string is given each result is replaced by a
631 (name, value) pair, where name is the string and value is the
632 result.
633
634 If a function is given it is called with the results as an
635 argument. The return value is used *within a list* as the new
636 result. This is equivalent to `Apply()` with raw=False.
637 '''
638 self.__check(APPLY, function, False)
639 return self._lookup(APPLY)(self, function)
640
642 '''
643 **self >= function** - Process or label the results.
644
645 Apply a function to the results.
646 This is equivalent to `Apply(raw=True)`.
647
648 :Parameters:
649
650 function
651 This is called with the results as an argument. The return value
652 is used as the new result. This is equivalent to `Apply()` with
653 raw=True.
654 '''
655 self.__check(APPLY_RAW, function, False)
656 return self._lookup(APPLY_RAW)(self, function)
657
659 '''
660 **self >> function** - Process or label the results (map).
661
662 Create a named pair or apply a function to each result in turn.
663 This is equivalent to `Map()`. It is similar to
664 *self >= function*, except that the function is applied to each
665 result in turn.
666
667 :Parameters:
668
669 function
670 This can be a string or a function.
671
672 If a string is given each result is replaced by a
673 (name, value) pair, where name is the string and value is the
674 result.
675
676 If a function is given it is called with each result in turn.
677 The return values are used as the new result.
678 '''
679 self.__check(MAP, function, False)
680 return self._lookup(MAP)(self, function)
681
683 '''
684 **self \** function** - Process the results (\**kargs).
685
686 Apply a function to keyword arguments
687 This is equivalent to `KApply()`.
688
689 :Parameters:
690
691 function
692 A function that is called with the keyword arguments described below.
693 The return value is used as the new result.
694
695 Keyword arguments:
696
697 stream_in
698 The stream passed to the matcher.
699
700 stream_out
701 The stream returned from the matcher.
702
703 results
704 A list of the results returned.
705 '''
706 self.__check(KARGS, function, False)
707 return self._lookup(KARGS)(self, function)
708
710 '''
711 **self ^ message**
712
713 Raise a SytaxError.
714
715 :Parameters:
716
717 message
718 The message for the SyntaxError.
719 '''
720 return self._lookup(RAISE)(self, message)
721
722 - def __check(self, name, other, is_match):
723 '''
724 Provide some diagnostics if the syntax is completely mixed up.
725 '''
726 if not isinstance(other, basestring):
727 if is_match != isinstance(other, Matcher):
728 if is_match:
729 msg = 'The operator {0} for {1} was applied to something ' \
730 'that is not a matcher ({2}).'
731 else:
732 msg = 'The operator {0} for {1} was applied to a matcher ' \
733 '({2}).'
734 msg += ' Check syntax and parentheses.'
735 raise SyntaxError(fmt(msg, name, self, other))
736