1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 '''
31 A lexer that adds line start and end tokens. The start may also contain
32 leading spaces, depending on the configuration.
33 '''
34
35 from lepl.lexer.lexer import Lexer
36 from lepl.stream.core import s_empty, s_line, s_stream, s_fmt, s_next, s_id
37 from lepl.lexer.support import RuntimeLexerError
38
39
40 START = 'SOL'
41 '''
42 Name for start of line token.
43 '''
44
45 END = 'EOL'
46 '''
47 Name for end of line token.
48 '''
49
50
52 '''
53 Provide the standard `Lexer` interface while including `tabsize`.
54 '''
55 def wrapper(matcher, tokens, alphabet, discard,
56 t_regexp=None, s_regexp=None):
57 '''
58 Return the lexer with tabsize and blocks as specified earlier.
59 '''
60 return _OffsideLexer(matcher, tokens, alphabet, discard,
61 t_regexp=t_regexp, s_regexp=s_regexp,
62 tabsize=tabsize, blocks=blocks)
63 return wrapper
64
65
67 '''
68 An alternative lexer that adds `LineStart` and `LineEnd` tokens.
69
70 Note that because of the extend argument list this must be used in
71 the config via `make_offside_lexer()` (although in normal use it is
72 supplied by simply calling `config.lines()` so you don't need to refer
73 to this class at all)
74 '''
75
76 - def __init__(self, matcher, tokens, alphabet, discard,
77 t_regexp=None, s_regexp=None, tabsize=8, blocks=False):
78 super(_OffsideLexer, self).__init__(matcher, tokens, alphabet, discard,
79 t_regexp=t_regexp, s_regexp=s_regexp)
80 self._karg(tabsize=tabsize)
81 self._karg(blocks=blocks)
82 if tabsize is not None:
83 self._tab = ' ' * tabsize
84 else:
85 self._tab = '\t'
86
88 '''
89 Generate tokens, on demand.
90 '''
91 id_ = s_id(stream)
92 try:
93 while not s_empty(stream):
94
95
96 id_ += 1
97 (line, next_stream) = s_line(stream, False)
98 line_stream = s_stream(stream, line)
99 size = 0
100
101 if self.blocks:
102 try:
103 (_, size, _) = self.s_regexp.size_match(line_stream)
104 except TypeError:
105 pass
106
107 (indent, next_line_stream) = s_next(line_stream, count=size)
108 indent = indent.replace('\t', self._tab)
109 yield ((START,),
110 s_stream(line_stream, indent, id_=id_, max=max))
111 line_stream = next_line_stream
112
113 while not s_empty(line_stream):
114 id_ += 1
115 try:
116 (terminals, match, next_line_stream) = \
117 self.t_regexp.match(line_stream)
118 yield (terminals, s_stream(line_stream, match,
119 max=max, id_=id_))
120 except TypeError:
121 (terminals, _size, next_line_stream) = \
122 self.s_regexp.size_match(line_stream)
123 line_stream = next_line_stream
124
125 id_ += 1
126 yield ((END,),
127 s_stream(line_stream, '', max=max, id_=id_))
128 stream = next_stream
129
130 except TypeError:
131 raise RuntimeLexerError(
132 s_fmt(stream,
133 'No token for {rest} at {location} of {text}.'))
134