1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 '''
31 Support for operator syntactic sugar (and operator redefinition).
32 '''
33
34 from re import compile as compile_
35
36 from lepl.matchers.matcher import Matcher
37 from lepl.support.context import Namespace, NamespaceMixin, Scope
38 from lepl.support.lib import open_stop, fmt, basestring
39
40
41 DIGITS = compile_('^(-?\d+)(.*)')
42
43 -def RepeatWrapper(matcher, start, stop, step, separator, add, reduce):
44 '''Parse `step` if it is a string.'''
45
46 from lepl.matchers.derived import Repeat
47 try:
48 int(step)
49 limit = step
50 algorithm = DEPTH_FIRST
51 except ValueError:
52 if (isinstance(step, basestring)):
53 limit = None
54 algorithm = None
55 while step:
56 match = DIGITS.match(step)
57 if match:
58 if limit is None:
59 limit = int(match.group(1))
60 step = match.group(2)
61 else:
62 raise TypeError(fmt('Cannot parse limit/algorithm for []: {}',
63 step))
64 else:
65 if algorithm is None:
66 algorithm = step[0]
67 step = step[1:]
68 else:
69 raise TypeError('The step of [...] must be an integer limit, or a '
70 'string to select the algorithm, or both as a string '
71 'like "d1" for a single value, depth first')
72 return Repeat(matcher, start=start, stop=stop, limit=limit,
73 algorithm=algorithm, separator=separator, add_=add,
74 reduce=reduce)
75
77 '''
78 Define the default operators.
79 '''
80
82
83 from lepl.matchers.error import raise_error
84 from lepl.matchers.derived import Space, Add, Apply, KApply, Drop, Map
85 from lepl.matchers.combine import And, Or, First
86 super(OperatorNamespace, self).__init__({
87 SPACE_OPT: lambda a, b: And(a, Space()[0:,...], b),
88 SPACE_REQ: lambda a, b: And(a, Space()[1:,...], b),
89 ADD: lambda a, b: Add(And(a, b)),
90 AND: And,
91 OR: Or,
92 APPLY: Apply,
93 APPLY_RAW: lambda a, b: Apply(a, b, raw=True),
94 NOT: Drop,
95 KARGS: KApply,
96 RAISE: lambda a, b: KApply(a, raise_error(b)),
97 REPEAT: RepeatWrapper,
98 FIRST: First,
99 MAP: Map,
100 REDUCE: None,
101 })
102
103
104 OPERATORS = 'operators'
105 '''
106 The name used to retrieve operators definitions.
107 '''
108
109 SPACE_OPT = '/'
110 '''Name for / operator.'''
111 SPACE_REQ = '//'
112 '''Name for // operator.'''
113 ADD = '+'
114 '''Name for + operator.'''
115 AND = '&'
116 '''Name for & operator.'''
117 OR = '|'
118 '''Name for | operator.'''
119 APPLY = '>'
120 '''Name for > operator.'''
121 APPLY_RAW = '>='
122 '''Name for >= operator.'''
123 NOT = '~'
124 '''Name for ~ operator.'''
125 KARGS = '**'
126 '''Name for ** operator.'''
127 RAISE = '^'
128 '''Name for ^ operator.'''
129 REPEAT = '[]'
130 '''Name for [] operator.'''
131 FIRST = '%'
132 '''Name for % operator.'''
133 MAP = '>>'
134 '''Name for >> operator.'''
135 REDUCE = '<reduce>'
136 '''Name for accumulator of data during repetition.'''
137
138
140 '''
141 Allow an operator to be redefined within a with context. Uses the
142 OPERATORS namespace.
143 '''
144
145 - def __init__(self, space_opt=None, space_req=None, repeat=None,
146 add=None, and_=None, or_=None, not_=None,
147 apply_=None, apply_raw=None, kargs=None,
148 raise_=None, first=None, map_=None, reduce=None):
149 super(Override, self).__init__(OPERATORS, OperatorNamespace,
150 {SPACE_OPT: space_opt, SPACE_REQ: space_req,
151 REPEAT: repeat, ADD: add, AND: and_, OR: or_,
152 NOT: not_, APPLY: apply_, APPLY_RAW: apply_raw,
153 KARGS: kargs, RAISE: raise_, FIRST: first, MAP: map_,
154 REDUCE: reduce})
155
156
158 '''
159 Support class for `Separator` and similar classes.
160
161 Uses the OPERATORS namespace.
162 '''
163
180
182 '''
183 Sub-classes should return (And, Repeat)
184 '''
185 raise Exception('Unimplemented')
186
188 '''
189 A simple Repeat with separator.
190 '''
191 from lepl.matchers.combine import And
192 def repeat(m, st=0, sp=None, d=0, s=None, a=False, r=None):
193 '''
194 Wrap `Repeat` to adapt the separator.
195 '''
196 if s is None:
197 s = separator
198 elif not a:
199 s = And(separator, s, separator)
200 return RepeatWrapper(m, st, sp, d, s, a, r)
201 return repeat
202
203
205 '''
206 Redefine ``[]`` and ``&`` to include the given matcher as a separator
207 (so it will be used between list items and between matchers separated by the &
208 operator)
209
210 Uses the OPERATORS namespace.
211 '''
212
221
222
224 '''
225 Skip spaces (by default, zero or more Space()). Any argument is dropped.
226 '''
227
234
235
237 '''
238 Similar to `Separator`, but tried to be clever about whether the
239 separator is needed. It replaces `&` with a matcher that only uses
240 the separator if the second sub-matcher consumes some input.
241
242 Uses the OPERATORS namespace.
243
244 See also `SmartSeparator2`, which is less general, but more efficient.
245 '''
246
248 '''
249 Require the separator on each `And`.
250 '''
251
252 from lepl.matchers.combine import And, Or
253 from lepl.matchers.core import Consumer
254 def and_(a, b):
255 '''
256 Add space only in the case when both consume something.
257 '''
258 return Or(And(Consumer(a), separator, Consumer(b)),
259 And(Consumer(a), Consumer(b, False)),
260 And(Consumer(a, False), Consumer(b)),
261 And(Consumer(a, False), Consumer(b, False)))
262 return (and_, self._repeat(separator))
263
264
265 GREEDY = 'g'
266 '''Flag (splice increment) for inefficient, guaranteed greedy matching.'''
267 NON_GREEDY = 'n'
268 '''Flag (splice increment) for inefficient, guaranteed non-greedy matching.'''
269 DEPTH_FIRST = 'd'
270 '''Flag (splice increment) for efficient, quasi-greedy, matching (default).'''
271 BREADTH_FIRST = 'b'
272 '''Flag (splice increment) for efficient, quasi-non-greedy, matching.'''
273
274
276 '''
277 Define the operators used to combine elements in a grammar specification.
278 '''
279
282
284 '''
285 **self + other** - Join strings, merge lists.
286
287 Combine adjacent matchers in sequence, merging the result with "+"
288 (so strings are joined, lists merged).
289
290 :Parameters:
291
292 other
293 Another matcher or a string that will be converted to a literal
294 match.
295 '''
296 self.__check(ADD, other, True)
297 return self._lookup(ADD)(self, other)
298
300 '''
301 **other + self** - Join strings, merge lists.
302
303 Combine adjacent matchers in sequence, merging the result with "+"
304 (so strings are joined, lists merged).
305
306 :Parameters:
307
308 other
309 Another matcher or a string that will be converted to a literal
310 match.
311 '''
312 self.__check(ADD, other, True)
313 return self._lookup(ADD)(other, self)
314
316 '''
317 **self & other** - Append results.
318
319 Combine adjacent matchers in sequence. This is equivalent to
320 `And()`.
321
322 :Parameters:
323
324 other
325 Another matcher or a string that will be converted to a literal
326 match.
327 '''
328 self.__check(AND, other, True)
329 return self._lookup(AND)(self, other)
330
332 '''
333 **other & self** - Append results.
334
335 Combine adjacent matchers in sequence. This is equivalent to
336 `And()`.
337
338 :Parameters:
339
340 other
341 Another matcher or a string that will be converted to a literal
342 match.
343 '''
344 self.__check(AND, other, True)
345 return self._lookup(AND)(other, self)
346
348 '''
349 For 2.6
350 '''
351 return self.__truediv__(other)
352
354 '''
355 For 2.6
356 '''
357 return self.__rtruediv__(other)
358
360 '''
361 **self / other** - Append results, with optional separating space.
362
363 Combine adjacent matchers in sequence, with an optional space between
364 them. The space is included in the results.
365
366 :Parameters:
367
368 other
369 Another matcher or a string that will be converted to a literal
370 match.
371 '''
372 self.__check(SPACE_OPT, other, True)
373 return self._lookup(SPACE_OPT)(self, other)
374
376 '''
377 **other / self** - Append results, with optional separating space.
378
379 Combine adjacent matchers in sequence, with an optional space between
380 them. The space is included in the results.
381
382 :Parameters:
383
384 other
385 Another matcher or a string that will be converted to a literal
386 match.
387 '''
388 self.__check(SPACE_OPT, other, True)
389 return self._lookup(SPACE_OPT)(other, self)
390
392 '''
393 **self // other** - Append results, with required separating space.
394
395 Combine adjacent matchers in sequence, with a space between them.
396 The space is included in the results.
397
398 :Parameters:
399
400 other
401 Another matcher or a string that will be converted to a literal
402 match.
403 '''
404 self.__check(SPACE_REQ, other, True)
405 return self._lookup(SPACE_REQ)(self, other)
406
408 '''
409 **other // self** - Append results, with required separating space.
410
411 Combine adjacent matchers in sequence, with a space between them.
412 The space is included in the results.
413
414 :Parameters:
415
416 other
417 Another matcher or a string that will be converted to a literal
418 match.
419 '''
420 self.__check(SPACE_REQ, other, True)
421 return self._lookup(SPACE_REQ)(other, self)
422
424 '''
425 **self | other** - Try alternative matchers.
426
427 This introduces backtracking. Matches are tried from left to right
428 and successful results returned (one on each "recall"). This is
429 equivalent to `Or()`.
430
431 :Parameters:
432
433 other
434 Another matcher or a string that will be converted to a literal
435 match.
436 '''
437 self.__check(OR, other, True)
438 return self._lookup(OR)(self, other)
439
441 '''
442 **other | self** - Try alternative matchers.
443
444 This introduces backtracking. Matches are tried from left to right
445 and successful results returned (one on each "recall"). This is
446 equivalent to `Or()`.
447
448 :Parameters:
449
450 other
451 Another matcher or a string that will be converted to a literal
452 match.
453 '''
454 self.__check(OR, other, True)
455 return self._lookup(OR)(other, self)
456
458 '''
459 **self % other** - Take first match (committed choice).
460
461 Matches are tried from left to right and the first successful result
462 is returned. This is equivalent to `First()`.
463
464 :Parameters:
465
466 other
467 Another matcher or a string that will be converted to a literal
468 match.
469 '''
470 self.__check(FIRST, other, True)
471 return self._lookup(FIRST)(self, other)
472
474 '''
475 **other % self** - Take first match (committed choice).
476
477 Matches are tried from left to right and the first successful result
478 is returned. This is equivalent to `First()`.
479
480 :Parameters:
481
482 other
483 Another matcher or a string that will be converted to a literal
484 match.
485 '''
486 self.__check(FIRST, other, True)
487 return self._lookup(FIRST)(other, self)
488
490 '''
491 **~self** - Discard the result.
492
493 This generates a matcher that behaves as the original, but returns
494 an empty list. This is equivalent to `Drop()`.
495
496 Note that `Lookahead()` overrides this method to have
497 different semantics (negative lookahead).
498 '''
499 return self._lookup(NOT)(self)
500
502 '''
503 **self[start:stop:algorithm, separator, ...]** - Repetition and lists.
504
505 This is a complex statement that modifies the current matcher so
506 that it matches several times. A separator may be specified
507 (eg for comma-separated lists) and the results may be combined with
508 "+" (so repeated matching of characters would give a word).
509
510 start:stop:algorithm
511 This controls the number of matches made and the order in which
512 different numbers of matches are returned.
513
514 [start]
515 Repeat exactly *start* times
516
517 [start:stop]
518 Repeat *start* to *stop* times (starting with as many matches
519 as possible, and then decreasing as necessary).
520
521 [start:stop:algorithm]
522 Direction selects the algorithm for searching.
523
524 'b' (BREADTH_FIRST)
525 A breadth first search is used, which tends to give shorter
526 matches before longer ones. This tries all possible matches for
527 the sub-matcher first (before repeating calls to consume more
528 of the stream). If the sub-matcher does not backtrack then this
529 guarantees that the number of matches returned will not decrease
530 (ie will monotonically increase) on backtracking.
531
532 'd' (DEPTH_FIRST)
533 A depth first search is used, which tends to give longer
534 matches before shorter ones. This tries to repeats matches
535 with the sub-matcher, consuming as much of the stream as
536 possible, before backtracking to find alternative matchers.
537 If the sub-matcher does not backtrack then this guarantees
538 that the number of matches returned will not increase (ie will
539 monotonically decrease) on backtracking.
540
541 'g' (GREEDY)
542 An exhaustive search is used, which finds all results (by
543 breadth first search) and orders them by length before returning
544 them ordered from longest to shortest. This guarantees that
545 the number of matches returned will not increase (ie will
546 monotonically decrease) on backtracking, but can consume a lot
547 of resources.
548
549 'n' (NON_GREEDY)
550 As for 'g' (GREEDY), but results are ordered shortest to
551 longest. This guarantees that the number of matches returned
552 will not decrease (ie will monotonically increase) on
553 backtracking, but can consume a lot of resources,
554
555 Values may be omitted; the defaults are: *start* = 0, *stop* =
556 infinity, *algorithm* = 'd' (DEPTH_FIRST).
557
558 separator
559 If given, this must appear between repeated values. Matched
560 separators are returned as part of the result (unless, of course,
561 they are implemented with a matcher that returns nothing). If
562 *separator* is a string it is converted to a literal match.
563
564 ...
565 If ... (an ellipsis) is given then the results are joined together
566 with "+".
567
568 Examples
569 --------
570
571 Any()[0:3,...] will match 3 or less characters, joining them
572 together so that the result is a single string.
573
574 Word()[:,','] will match a comma-separated list of words.
575
576 value[:] or value[0:] or value[0::'d'] is a "greedy" match that,
577 if value does not backtrack, is equivalent to the "*" in a regular
578 expression.
579 value[::'n'] is the "non-greedy" equivalent (preferring as short a
580 match as possible) and value[::'g'] is greedy even when value does
581 provide alternative matches on backtracking.
582 '''
583 start = 0
584 stop = None
585 step = DEPTH_FIRST
586 separator = None
587 add = False
588 have_index = False
589 if not isinstance(indices, tuple):
590 indices = [indices]
591 for index in indices:
592 if isinstance(index, int):
593 if have_index:
594 raise TypeError(
595 fmt('Multiple slices not supported: {0!r}', index))
596 start = index
597 stop = index
598 step = DEPTH_FIRST
599 have_index = True
600 elif isinstance(index, slice):
601 if have_index:
602 raise TypeError(
603 fmt('Multiple slices not supported: {0!r}', index))
604 start = index.start if index.start != None else 0
605 stop = index.stop if not open_stop(index) else None
606 step = index.step if index.step != None else DEPTH_FIRST
607 have_index = True
608 elif index == Ellipsis:
609 add = True
610 elif separator is None:
611 separator = index
612 else:
613 raise TypeError(index)
614
615 if stop == 1:
616 add = False
617 return self._lookup(REPEAT)(self, start, stop, step, separator, add,
618 self._lookup(REDUCE))
619
621 '''
622 **self > function** - Process or label the results.
623
624 Create a named pair or apply a function to the results. This is
625 equivalent to `Apply()`.
626
627 :Parameters:
628
629 function
630 This can be a string or a function.
631
632 If a string is given each result is replaced by a
633 (name, value) pair, where name is the string and value is the
634 result.
635
636 If a function is given it is called with the results as an
637 argument. The return value is used *within a list* as the new
638 result. This is equivalent to `Apply()` with raw=False.
639 '''
640 self.__check(APPLY, function, False)
641 return self._lookup(APPLY)(self, function)
642
644 '''
645 **self >= function** - Process or label the results.
646
647 Apply a function to the results.
648 This is equivalent to `Apply(raw=True)`.
649
650 :Parameters:
651
652 function
653 This is called with the results as an argument. The return value
654 is used as the new result. This is equivalent to `Apply()` with
655 raw=True.
656 '''
657 self.__check(APPLY_RAW, function, False)
658 return self._lookup(APPLY_RAW)(self, function)
659
661 '''
662 **self >> function** - Process or label the results (map).
663
664 Create a named pair or apply a function to each result in turn.
665 This is equivalent to `Map()`. It is similar to
666 *self >= function*, except that the function is applied to each
667 result in turn.
668
669 :Parameters:
670
671 function
672 This can be a string or a function.
673
674 If a string is given each result is replaced by a
675 (name, value) pair, where name is the string and value is the
676 result.
677
678 If a function is given it is called with each result in turn.
679 The return values are used as the new result.
680 '''
681 self.__check(MAP, function, False)
682 return self._lookup(MAP)(self, function)
683
685 '''
686 **self \** function** - Process the results (\**kargs).
687
688 Apply a function to keyword arguments
689 This is equivalent to `KApply()`.
690
691 :Parameters:
692
693 function
694 A function that is called with the keyword arguments described below.
695 The return value is used as the new result.
696
697 Keyword arguments:
698
699 stream_in
700 The stream passed to the matcher.
701
702 stream_out
703 The stream returned from the matcher.
704
705 results
706 A list of the results returned.
707 '''
708 self.__check(KARGS, function, False)
709 return self._lookup(KARGS)(self, function)
710
712 '''
713 **self ^ message**
714
715 Raise a SytaxError.
716
717 :Parameters:
718
719 message
720 The message for the SyntaxError.
721 '''
722 return self._lookup(RAISE)(self, message)
723
724 - def __check(self, name, other, is_match):
725 '''
726 Provide some diagnostics if the syntax is completely mixed up.
727 '''
728 if not isinstance(other, basestring):
729 if is_match != isinstance(other, Matcher):
730 if is_match:
731 msg = 'The operator {0} for {1} was applied to something ' \
732 'that is not a matcher ({2}).'
733 else:
734 msg = 'The operator {0} for {1} was applied to a matcher ' \
735 '({2}).'
736 msg += ' Check syntax and parentheses.'
737 raise SyntaxError(fmt(msg, name, self, other))
738