Package lepl :: Package lexer :: Package lines :: Module lexer
[hide private]
[frames] | no frames]

Source Code for Module lepl.lexer.lines.lexer

  1   
  2  # The contents of this file are subject to the Mozilla Public License 
  3  # (MPL) Version 1.1 (the "License"); you may not use this file except 
  4  # in compliance with the License. You may obtain a copy of the License 
  5  # at http://www.mozilla.org/MPL/ 
  6  # 
  7  # Software distributed under the License is distributed on an "AS IS" 
  8  # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
  9  # the License for the specific language governing rights and 
 10  # limitations under the License. 
 11  # 
 12  # The Original Code is LEPL (http://www.acooke.org/lepl) 
 13  # The Initial Developer of the Original Code is Andrew Cooke. 
 14  # Portions created by the Initial Developer are Copyright (C) 2009-2010 
 15  # Andrew Cooke (andrew@acooke.org). All Rights Reserved. 
 16  # 
 17  # Alternatively, the contents of this file may be used under the terms 
 18  # of the LGPL license (the GNU Lesser General Public License, 
 19  # http://www.gnu.org/licenses/lgpl.html), in which case the provisions 
 20  # of the LGPL License are applicable instead of those above. 
 21  # 
 22  # If you wish to allow use of your version of this file only under the 
 23  # terms of the LGPL License and not to allow others to use your version 
 24  # of this file under the MPL, indicate your decision by deleting the 
 25  # provisions above and replace them with the notice and other provisions 
 26  # required by the LGPL License.  If you do not delete the provisions 
 27  # above, a recipient may use your version of this file under either the 
 28  # MPL or the LGPL License. 
 29   
 30  ''' 
 31  A lexer that adds line start and end tokens.  The start may also contain  
 32  leading spaces, depending on the configuration. 
 33  ''' 
 34   
 35  from lepl.lexer.lexer import Lexer 
 36  from lepl.stream.core import s_empty, s_line, s_stream, s_fmt, s_next, s_id 
 37  from lepl.lexer.support import RuntimeLexerError 
 38   
 39   
 40  START = 'SOL' 
 41  ''' 
 42  Name for start of line token. 
 43  ''' 
 44   
 45  END = 'EOL' 
 46  ''' 
 47  Name for end of line token. 
 48  ''' 
 49   
 50   
51 -def make_offside_lexer(tabsize, blocks):
52 ''' 53 Provide the standard `Lexer` interface while including `tabsize`. 54 ''' 55 def wrapper(matcher, tokens, alphabet, discard, 56 t_regexp=None, s_regexp=None): 57 ''' 58 Return the lexer with tabsize and blocks as specified earlier. 59 ''' 60 return _OffsideLexer(matcher, tokens, alphabet, discard, 61 t_regexp=t_regexp, s_regexp=s_regexp, 62 tabsize=tabsize, blocks=blocks)
63 return wrapper 64 65
66 -class _OffsideLexer(Lexer):
67 ''' 68 An alternative lexer that adds `LineStart` and `LineEnd` tokens. 69 70 Note that because of the extend argument list this must be used in 71 the config via `make_offside_lexer()` (although in normal use it is 72 supplied by simply calling `config.lines()` so you don't need to refer 73 to this class at all) 74 ''' 75
76 - def __init__(self, matcher, tokens, alphabet, discard, 77 t_regexp=None, s_regexp=None, tabsize=8, blocks=False):
78 super(_OffsideLexer, self).__init__(matcher, tokens, alphabet, discard, 79 t_regexp=t_regexp, s_regexp=s_regexp) 80 self._karg(tabsize=tabsize) 81 self._karg(blocks=blocks) 82 if tabsize is not None: 83 self._tab = ' ' * tabsize 84 else: 85 self._tab = '\t'
86
87 - def _tokens(self, stream, max):
88 ''' 89 Generate tokens, on demand. 90 ''' 91 id_ = s_id(stream) 92 try: 93 while not s_empty(stream): 94 95 # caches for different tokens with same contents differ 96 id_ += 1 97 (line, next_stream) = s_line(stream, False) 98 line_stream = s_stream(stream, line) 99 size = 0 100 # if we use blocks, match leading space 101 if self.blocks: 102 try: 103 (_, size, _) = self.s_regexp.size_match(line_stream) 104 except TypeError: 105 pass 106 # this will be empty (size=0) if blocks unused 107 (indent, next_line_stream) = s_next(line_stream, count=size) 108 indent = indent.replace('\t', self._tab) 109 yield ((START,), 110 s_stream(line_stream, indent, id_=id_, max=max)) 111 line_stream = next_line_stream 112 113 while not s_empty(line_stream): 114 id_ += 1 115 try: 116 (terminals, match, next_line_stream) = \ 117 self.t_regexp.match(line_stream) 118 yield (terminals, s_stream(line_stream, match, 119 max=max, id_=id_)) 120 except TypeError: 121 (terminals, _size, next_line_stream) = \ 122 self.s_regexp.size_match(line_stream) 123 line_stream = next_line_stream 124 125 id_ += 1 126 yield ((END,), 127 s_stream(line_stream, '', max=max, id_=id_)) 128 stream = next_stream 129 130 except TypeError: 131 raise RuntimeLexerError( 132 s_fmt(stream, 133 'No token for {rest} at {location} of {text}.'))
134