blob: d50886ed2f106622fd2eb1fd28709bc7e942fd2a [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2009,
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammer is specified by supplying the BNF inside
36# Python documentation strings. The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist). However, most of the variables used during table
43# construction are defined in terms of global variables. Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style." Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
61
62__version__ = "3.3"
63__tabversion__ = "3.2" # Table version
64
65#-----------------------------------------------------------------------------
66# === User configurable parameters ===
67#
68# Change these to modify the default behavior of yacc (if you wish)
69#-----------------------------------------------------------------------------
70
71yaccdebug = 0 # Debugging mode. If set, yacc generates a
72 # a 'parser.out' file in the current directory
73
74debug_file = 'parser.out' # Default name of the debugging file
75tab_module = 'parsetab' # Default name of the table module
76default_lr = 'LALR' # Default LR table generation method
77
78error_count = 3 # Number of symbols that must be shifted to leave recovery mode
79
80yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
81 # implementations of certain functions.
82
83resultlimit = 40 # Size limit of results when running in debug mode.
84
85pickle_protocol = 0 # Protocol to use when writing pickle files
86
87import re, types, sys, os.path
88
89# Compatibility function for python 2.6/3.0
90if sys.version_info[0] < 3:
91 def func_code(f):
92 return f.func_code
93else:
94 def func_code(f):
95 return f.__code__
96
97# Compatibility
98try:
99 MAXINT = sys.maxint
100except AttributeError:
101 MAXINT = sys.maxsize
102
103# Python 2.x/3.0 compatibility.
104def load_ply_lex():
105 if sys.version_info[0] < 3:
106 import lex
107 else:
108 import ply.lex as lex
109 return lex
110
111# This object is a stand-in for a logging object created by the
112# logging module. PLY will use this by default to create things
113# such as the parser.out file. If a user wants more detailed
114# information, they can create their own logging object and pass
115# it into PLY.
116
117class PlyLogger(object):
118 def __init__(self,f):
119 self.f = f
120 def debug(self,msg,*args,**kwargs):
121 self.f.write((msg % args) + "\n")
122 info = debug
123
124 def warning(self,msg,*args,**kwargs):
125 self.f.write("WARNING: "+ (msg % args) + "\n")
126
127 def error(self,msg,*args,**kwargs):
128 self.f.write("ERROR: " + (msg % args) + "\n")
129
130 critical = debug
131
132# Null logger is used when no output is generated. Does nothing.
133class NullLogger(object):
134 def __getattribute__(self,name):
135 return self
136 def __call__(self,*args,**kwargs):
137 return self
138
139# Exception raised for yacc-related errors
140class YaccError(Exception): pass
141
142# Format the result message that the parser produces when running in debug mode.
143def format_result(r):
144 repr_str = repr(r)
145 if '\n' in repr_str: repr_str = repr(repr_str)
146 if len(repr_str) > resultlimit:
147 repr_str = repr_str[:resultlimit]+" ..."
148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
149 return result
150
151
152# Format stack entries when the parser is running in debug mode
153def format_stack_entry(r):
154 repr_str = repr(r)
155 if '\n' in repr_str: repr_str = repr(repr_str)
156 if len(repr_str) < 16:
157 return repr_str
158 else:
159 return "<%s @ 0x%x>" % (type(r).__name__,id(r))
160
161#-----------------------------------------------------------------------------
162# === LR Parsing Engine ===
163#
164# The following classes are used for the LR parser itself. These are not
165# used during table construction and are independent of the actual LR
166# table generation algorithm
167#-----------------------------------------------------------------------------
168
169# This class is used to hold non-terminal grammar symbols during parsing.
170# It normally has the following attributes set:
171# .type = Grammar symbol type
172# .value = Symbol value
173# .lineno = Starting line number
174# .endlineno = Ending line number (optional, set automatically)
175# .lexpos = Starting lex position
176# .endlexpos = Ending lex position (optional, set automatically)
177
178class YaccSymbol:
179 def __str__(self): return self.type
180 def __repr__(self): return str(self)
181
182# This class is a wrapper around the objects actually passed to each
183# grammar rule. Index lookup and assignment actually assign the
184# .value attribute of the underlying YaccSymbol object.
185# The lineno() method returns the line number of a given
186# item (or 0 if not defined). The linespan() method returns
187# a tuple of (startline,endline) representing the range of lines
188# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
189# representing the range of positional information for a symbol.
190
191class YaccProduction:
192 def __init__(self,s,stack=None):
193 self.slice = s
194 self.stack = stack
195 self.lexer = None
196 self.parser= None
197 def __getitem__(self,n):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600198 if isinstance(n,slice):
199 return [self[i] for i in range(*(n.indices(len(self.slice))))]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500200 if n >= 0: return self.slice[n].value
201 else: return self.stack[n].value
202
203 def __setitem__(self,n,v):
204 self.slice[n].value = v
205
206 def __getslice__(self,i,j):
207 return [s.value for s in self.slice[i:j]]
208
209 def __len__(self):
210 return len(self.slice)
211
212 def lineno(self,n):
213 return getattr(self.slice[n],"lineno",0)
214
215 def set_lineno(self,n,lineno):
216 self.slice[n].lineno = lineno
217
218 def linespan(self,n):
219 startline = getattr(self.slice[n],"lineno",0)
220 endline = getattr(self.slice[n],"endlineno",startline)
221 return startline,endline
222
223 def lexpos(self,n):
224 return getattr(self.slice[n],"lexpos",0)
225
226 def lexspan(self,n):
227 startpos = getattr(self.slice[n],"lexpos",0)
228 endpos = getattr(self.slice[n],"endlexpos",startpos)
229 return startpos,endpos
230
231 def error(self):
232 raise SyntaxError
233
234
235# -----------------------------------------------------------------------------
236# == LRParser ==
237#
238# The LR Parsing engine.
239# -----------------------------------------------------------------------------
240
241class LRParser:
242 def __init__(self,lrtab,errorf):
243 self.productions = lrtab.lr_productions
244 self.action = lrtab.lr_action
245 self.goto = lrtab.lr_goto
246 self.errorfunc = errorf
247
248 def errok(self):
249 self.errorok = 1
250
251 def restart(self):
252 del self.statestack[:]
253 del self.symstack[:]
254 sym = YaccSymbol()
255 sym.type = '$end'
256 self.symstack.append(sym)
257 self.statestack.append(0)
258
259 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
260 if debug or yaccdevel:
261 if isinstance(debug,int):
262 debug = PlyLogger(sys.stderr)
263 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
264 elif tracking:
265 return self.parseopt(input,lexer,debug,tracking,tokenfunc)
266 else:
267 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
268
269
270 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
271 # parsedebug().
272 #
273 # This is the debugging enabled version of parse(). All changes made to the
274 # parsing engine should be made here. For the non-debugging version,
275 # copy this code to a method parseopt() and delete all of the sections
276 # enclosed in:
277 #
278 # #--! DEBUG
279 # statements
280 # #--! DEBUG
281 #
282 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
283
284 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
285 lookahead = None # Current lookahead symbol
286 lookaheadstack = [ ] # Stack of lookahead symbols
287 actions = self.action # Local reference to action table (to avoid lookup on self.)
288 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
289 prod = self.productions # Local reference to production list (to avoid lookup on self.)
290 pslice = YaccProduction(None) # Production object passed to grammar rules
291 errorcount = 0 # Used during error recovery
292
293 # --! DEBUG
294 debug.info("PLY: PARSE DEBUG START")
295 # --! DEBUG
296
297 # If no lexer was given, we will try to use the lex module
298 if not lexer:
299 lex = load_ply_lex()
300 lexer = lex.lexer
301
302 # Set up the lexer and parser objects on pslice
303 pslice.lexer = lexer
304 pslice.parser = self
305
306 # If input was supplied, pass to lexer
307 if input is not None:
308 lexer.input(input)
309
310 if tokenfunc is None:
311 # Tokenize function
312 get_token = lexer.token
313 else:
314 get_token = tokenfunc
315
316 # Set up the state and symbol stacks
317
318 statestack = [ ] # Stack of parsing states
319 self.statestack = statestack
320 symstack = [ ] # Stack of grammar symbols
321 self.symstack = symstack
322
323 pslice.stack = symstack # Put in the production
324 errtoken = None # Err token
325
326 # The start state is assumed to be (0,$end)
327
328 statestack.append(0)
329 sym = YaccSymbol()
330 sym.type = "$end"
331 symstack.append(sym)
332 state = 0
333 while 1:
334 # Get the next symbol on the input. If a lookahead symbol
335 # is already set, we just use that. Otherwise, we'll pull
336 # the next token off of the lookaheadstack or from the lexer
337
338 # --! DEBUG
339 debug.debug('')
340 debug.debug('State : %s', state)
341 # --! DEBUG
342
343 if not lookahead:
344 if not lookaheadstack:
345 lookahead = get_token() # Get the next token
346 else:
347 lookahead = lookaheadstack.pop()
348 if not lookahead:
349 lookahead = YaccSymbol()
350 lookahead.type = "$end"
351
352 # --! DEBUG
353 debug.debug('Stack : %s',
354 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
355 # --! DEBUG
356
357 # Check the action table
358 ltype = lookahead.type
359 t = actions[state].get(ltype)
360
361 if t is not None:
362 if t > 0:
363 # shift a symbol on the stack
364 statestack.append(t)
365 state = t
366
367 # --! DEBUG
368 debug.debug("Action : Shift and goto state %s", t)
369 # --! DEBUG
370
371 symstack.append(lookahead)
372 lookahead = None
373
374 # Decrease error count on successful shift
375 if errorcount: errorcount -=1
376 continue
377
378 if t < 0:
379 # reduce a symbol on the stack, emit a production
380 p = prod[-t]
381 pname = p.name
382 plen = p.len
383
384 # Get production function
385 sym = YaccSymbol()
386 sym.type = pname # Production name
387 sym.value = None
388
389 # --! DEBUG
390 if plen:
391 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
392 else:
393 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
394
395 # --! DEBUG
396
397 if plen:
398 targ = symstack[-plen-1:]
399 targ[0] = sym
400
401 # --! TRACKING
402 if tracking:
403 t1 = targ[1]
404 sym.lineno = t1.lineno
405 sym.lexpos = t1.lexpos
406 t1 = targ[-1]
407 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
408 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
409
410 # --! TRACKING
411
412 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
413 # The code enclosed in this section is duplicated
414 # below as a performance optimization. Make sure
415 # changes get made in both locations.
416
417 pslice.slice = targ
418
419 try:
420 # Call the grammar rule with our special slice object
421 del symstack[-plen:]
422 del statestack[-plen:]
423 p.callable(pslice)
424 # --! DEBUG
425 debug.info("Result : %s", format_result(pslice[0]))
426 # --! DEBUG
427 symstack.append(sym)
428 state = goto[statestack[-1]][pname]
429 statestack.append(state)
430 except SyntaxError:
431 # If an error was set. Enter error recovery state
432 lookaheadstack.append(lookahead)
433 symstack.pop()
434 statestack.pop()
435 state = statestack[-1]
436 sym.type = 'error'
437 lookahead = sym
438 errorcount = error_count
439 self.errorok = 0
440 continue
441 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
442
443 else:
444
445 # --! TRACKING
446 if tracking:
447 sym.lineno = lexer.lineno
448 sym.lexpos = lexer.lexpos
449 # --! TRACKING
450
451 targ = [ sym ]
452
453 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
454 # The code enclosed in this section is duplicated
455 # above as a performance optimization. Make sure
456 # changes get made in both locations.
457
458 pslice.slice = targ
459
460 try:
461 # Call the grammar rule with our special slice object
462 p.callable(pslice)
463 # --! DEBUG
464 debug.info("Result : %s", format_result(pslice[0]))
465 # --! DEBUG
466 symstack.append(sym)
467 state = goto[statestack[-1]][pname]
468 statestack.append(state)
469 except SyntaxError:
470 # If an error was set. Enter error recovery state
471 lookaheadstack.append(lookahead)
472 symstack.pop()
473 statestack.pop()
474 state = statestack[-1]
475 sym.type = 'error'
476 lookahead = sym
477 errorcount = error_count
478 self.errorok = 0
479 continue
480 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
481
482 if t == 0:
483 n = symstack[-1]
484 result = getattr(n,"value",None)
485 # --! DEBUG
486 debug.info("Done : Returning %s", format_result(result))
487 debug.info("PLY: PARSE DEBUG END")
488 # --! DEBUG
489 return result
490
491 if t == None:
492
493 # --! DEBUG
494 debug.error('Error : %s',
495 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
496 # --! DEBUG
497
498 # We have some kind of parsing error here. To handle
499 # this, we are going to push the current token onto
500 # the tokenstack and replace it with an 'error' token.
501 # If there are any synchronization rules, they may
502 # catch it.
503 #
504 # In addition to pushing the error token, we call call
505 # the user defined p_error() function if this is the
506 # first syntax error. This function is only called if
507 # errorcount == 0.
508 if errorcount == 0 or self.errorok:
509 errorcount = error_count
510 self.errorok = 0
511 errtoken = lookahead
512 if errtoken.type == "$end":
513 errtoken = None # End of file!
514 if self.errorfunc:
515 global errok,token,restart
516 errok = self.errok # Set some special functions available in error recovery
517 token = get_token
518 restart = self.restart
519 if errtoken and not hasattr(errtoken,'lexer'):
520 errtoken.lexer = lexer
521 tok = self.errorfunc(errtoken)
522 del errok, token, restart # Delete special functions
523
524 if self.errorok:
525 # User must have done some kind of panic
526 # mode recovery on their own. The
527 # returned token is the next lookahead
528 lookahead = tok
529 errtoken = None
530 continue
531 else:
532 if errtoken:
533 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
534 else: lineno = 0
535 if lineno:
536 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
537 else:
538 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
539 else:
540 sys.stderr.write("yacc: Parse error in input. EOF\n")
541 return
542
543 else:
544 errorcount = error_count
545
546 # case 1: the statestack only has 1 entry on it. If we're in this state, the
547 # entire parse has been rolled back and we're completely hosed. The token is
548 # discarded and we just keep going.
549
550 if len(statestack) <= 1 and lookahead.type != "$end":
551 lookahead = None
552 errtoken = None
553 state = 0
554 # Nuke the pushback stack
555 del lookaheadstack[:]
556 continue
557
558 # case 2: the statestack has a couple of entries on it, but we're
559 # at the end of the file. nuke the top entry and generate an error token
560
561 # Start nuking entries on the stack
562 if lookahead.type == "$end":
563 # Whoa. We're really hosed here. Bail out
564 return
565
566 if lookahead.type != 'error':
567 sym = symstack[-1]
568 if sym.type == 'error':
569 # Hmmm. Error is on top of stack, we'll just nuke input
570 # symbol and continue
571 lookahead = None
572 continue
573 t = YaccSymbol()
574 t.type = 'error'
575 if hasattr(lookahead,"lineno"):
576 t.lineno = lookahead.lineno
577 t.value = lookahead
578 lookaheadstack.append(lookahead)
579 lookahead = t
580 else:
581 symstack.pop()
582 statestack.pop()
583 state = statestack[-1] # Potential bug fix
584
585 continue
586
587 # Call an error function here
588 raise RuntimeError("yacc: internal parser error!!!\n")
589
590 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
591 # parseopt().
592 #
593 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
594 # Edit the debug version above, then copy any modifications to the method
595 # below while removing #--! DEBUG sections.
596 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
597
598
599 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
600 lookahead = None # Current lookahead symbol
601 lookaheadstack = [ ] # Stack of lookahead symbols
602 actions = self.action # Local reference to action table (to avoid lookup on self.)
603 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
604 prod = self.productions # Local reference to production list (to avoid lookup on self.)
605 pslice = YaccProduction(None) # Production object passed to grammar rules
606 errorcount = 0 # Used during error recovery
607
608 # If no lexer was given, we will try to use the lex module
609 if not lexer:
610 lex = load_ply_lex()
611 lexer = lex.lexer
612
613 # Set up the lexer and parser objects on pslice
614 pslice.lexer = lexer
615 pslice.parser = self
616
617 # If input was supplied, pass to lexer
618 if input is not None:
619 lexer.input(input)
620
621 if tokenfunc is None:
622 # Tokenize function
623 get_token = lexer.token
624 else:
625 get_token = tokenfunc
626
627 # Set up the state and symbol stacks
628
629 statestack = [ ] # Stack of parsing states
630 self.statestack = statestack
631 symstack = [ ] # Stack of grammar symbols
632 self.symstack = symstack
633
634 pslice.stack = symstack # Put in the production
635 errtoken = None # Err token
636
637 # The start state is assumed to be (0,$end)
638
639 statestack.append(0)
640 sym = YaccSymbol()
641 sym.type = '$end'
642 symstack.append(sym)
643 state = 0
644 while 1:
645 # Get the next symbol on the input. If a lookahead symbol
646 # is already set, we just use that. Otherwise, we'll pull
647 # the next token off of the lookaheadstack or from the lexer
648
649 if not lookahead:
650 if not lookaheadstack:
651 lookahead = get_token() # Get the next token
652 else:
653 lookahead = lookaheadstack.pop()
654 if not lookahead:
655 lookahead = YaccSymbol()
656 lookahead.type = '$end'
657
658 # Check the action table
659 ltype = lookahead.type
660 t = actions[state].get(ltype)
661
662 if t is not None:
663 if t > 0:
664 # shift a symbol on the stack
665 statestack.append(t)
666 state = t
667
668 symstack.append(lookahead)
669 lookahead = None
670
671 # Decrease error count on successful shift
672 if errorcount: errorcount -=1
673 continue
674
675 if t < 0:
676 # reduce a symbol on the stack, emit a production
677 p = prod[-t]
678 pname = p.name
679 plen = p.len
680
681 # Get production function
682 sym = YaccSymbol()
683 sym.type = pname # Production name
684 sym.value = None
685
686 if plen:
687 targ = symstack[-plen-1:]
688 targ[0] = sym
689
690 # --! TRACKING
691 if tracking:
692 t1 = targ[1]
693 sym.lineno = t1.lineno
694 sym.lexpos = t1.lexpos
695 t1 = targ[-1]
696 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
697 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
698
699 # --! TRACKING
700
701 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
702 # The code enclosed in this section is duplicated
703 # below as a performance optimization. Make sure
704 # changes get made in both locations.
705
706 pslice.slice = targ
707
708 try:
709 # Call the grammar rule with our special slice object
710 del symstack[-plen:]
711 del statestack[-plen:]
712 p.callable(pslice)
713 symstack.append(sym)
714 state = goto[statestack[-1]][pname]
715 statestack.append(state)
716 except SyntaxError:
717 # If an error was set. Enter error recovery state
718 lookaheadstack.append(lookahead)
719 symstack.pop()
720 statestack.pop()
721 state = statestack[-1]
722 sym.type = 'error'
723 lookahead = sym
724 errorcount = error_count
725 self.errorok = 0
726 continue
727 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
728
729 else:
730
731 # --! TRACKING
732 if tracking:
733 sym.lineno = lexer.lineno
734 sym.lexpos = lexer.lexpos
735 # --! TRACKING
736
737 targ = [ sym ]
738
739 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
740 # The code enclosed in this section is duplicated
741 # above as a performance optimization. Make sure
742 # changes get made in both locations.
743
744 pslice.slice = targ
745
746 try:
747 # Call the grammar rule with our special slice object
748 p.callable(pslice)
749 symstack.append(sym)
750 state = goto[statestack[-1]][pname]
751 statestack.append(state)
752 except SyntaxError:
753 # If an error was set. Enter error recovery state
754 lookaheadstack.append(lookahead)
755 symstack.pop()
756 statestack.pop()
757 state = statestack[-1]
758 sym.type = 'error'
759 lookahead = sym
760 errorcount = error_count
761 self.errorok = 0
762 continue
763 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
764
765 if t == 0:
766 n = symstack[-1]
767 return getattr(n,"value",None)
768
769 if t == None:
770
771 # We have some kind of parsing error here. To handle
772 # this, we are going to push the current token onto
773 # the tokenstack and replace it with an 'error' token.
774 # If there are any synchronization rules, they may
775 # catch it.
776 #
777 # In addition to pushing the error token, we call call
778 # the user defined p_error() function if this is the
779 # first syntax error. This function is only called if
780 # errorcount == 0.
781 if errorcount == 0 or self.errorok:
782 errorcount = error_count
783 self.errorok = 0
784 errtoken = lookahead
785 if errtoken.type == '$end':
786 errtoken = None # End of file!
787 if self.errorfunc:
788 global errok,token,restart
789 errok = self.errok # Set some special functions available in error recovery
790 token = get_token
791 restart = self.restart
792 if errtoken and not hasattr(errtoken,'lexer'):
793 errtoken.lexer = lexer
794 tok = self.errorfunc(errtoken)
795 del errok, token, restart # Delete special functions
796
797 if self.errorok:
798 # User must have done some kind of panic
799 # mode recovery on their own. The
800 # returned token is the next lookahead
801 lookahead = tok
802 errtoken = None
803 continue
804 else:
805 if errtoken:
806 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
807 else: lineno = 0
808 if lineno:
809 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
810 else:
811 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
812 else:
813 sys.stderr.write("yacc: Parse error in input. EOF\n")
814 return
815
816 else:
817 errorcount = error_count
818
819 # case 1: the statestack only has 1 entry on it. If we're in this state, the
820 # entire parse has been rolled back and we're completely hosed. The token is
821 # discarded and we just keep going.
822
823 if len(statestack) <= 1 and lookahead.type != '$end':
824 lookahead = None
825 errtoken = None
826 state = 0
827 # Nuke the pushback stack
828 del lookaheadstack[:]
829 continue
830
831 # case 2: the statestack has a couple of entries on it, but we're
832 # at the end of the file. nuke the top entry and generate an error token
833
834 # Start nuking entries on the stack
835 if lookahead.type == '$end':
836 # Whoa. We're really hosed here. Bail out
837 return
838
839 if lookahead.type != 'error':
840 sym = symstack[-1]
841 if sym.type == 'error':
842 # Hmmm. Error is on top of stack, we'll just nuke input
843 # symbol and continue
844 lookahead = None
845 continue
846 t = YaccSymbol()
847 t.type = 'error'
848 if hasattr(lookahead,"lineno"):
849 t.lineno = lookahead.lineno
850 t.value = lookahead
851 lookaheadstack.append(lookahead)
852 lookahead = t
853 else:
854 symstack.pop()
855 statestack.pop()
856 state = statestack[-1] # Potential bug fix
857
858 continue
859
860 # Call an error function here
861 raise RuntimeError("yacc: internal parser error!!!\n")
862
863 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
864 # parseopt_notrack().
865 #
866 # Optimized version of parseopt() with line number tracking removed.
867 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
868 # code in the #--! TRACKING sections
869 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
870
871 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
872 lookahead = None # Current lookahead symbol
873 lookaheadstack = [ ] # Stack of lookahead symbols
874 actions = self.action # Local reference to action table (to avoid lookup on self.)
875 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
876 prod = self.productions # Local reference to production list (to avoid lookup on self.)
877 pslice = YaccProduction(None) # Production object passed to grammar rules
878 errorcount = 0 # Used during error recovery
879
880 # If no lexer was given, we will try to use the lex module
881 if not lexer:
882 lex = load_ply_lex()
883 lexer = lex.lexer
884
885 # Set up the lexer and parser objects on pslice
886 pslice.lexer = lexer
887 pslice.parser = self
888
889 # If input was supplied, pass to lexer
890 if input is not None:
891 lexer.input(input)
892
893 if tokenfunc is None:
894 # Tokenize function
895 get_token = lexer.token
896 else:
897 get_token = tokenfunc
898
899 # Set up the state and symbol stacks
900
901 statestack = [ ] # Stack of parsing states
902 self.statestack = statestack
903 symstack = [ ] # Stack of grammar symbols
904 self.symstack = symstack
905
906 pslice.stack = symstack # Put in the production
907 errtoken = None # Err token
908
909 # The start state is assumed to be (0,$end)
910
911 statestack.append(0)
912 sym = YaccSymbol()
913 sym.type = '$end'
914 symstack.append(sym)
915 state = 0
916 while 1:
917 # Get the next symbol on the input. If a lookahead symbol
918 # is already set, we just use that. Otherwise, we'll pull
919 # the next token off of the lookaheadstack or from the lexer
920
921 if not lookahead:
922 if not lookaheadstack:
923 lookahead = get_token() # Get the next token
924 else:
925 lookahead = lookaheadstack.pop()
926 if not lookahead:
927 lookahead = YaccSymbol()
928 lookahead.type = '$end'
929
930 # Check the action table
931 ltype = lookahead.type
932 t = actions[state].get(ltype)
933
934 if t is not None:
935 if t > 0:
936 # shift a symbol on the stack
937 statestack.append(t)
938 state = t
939
940 symstack.append(lookahead)
941 lookahead = None
942
943 # Decrease error count on successful shift
944 if errorcount: errorcount -=1
945 continue
946
947 if t < 0:
948 # reduce a symbol on the stack, emit a production
949 p = prod[-t]
950 pname = p.name
951 plen = p.len
952
953 # Get production function
954 sym = YaccSymbol()
955 sym.type = pname # Production name
956 sym.value = None
957
958 if plen:
959 targ = symstack[-plen-1:]
960 targ[0] = sym
961
962 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
963 # The code enclosed in this section is duplicated
964 # below as a performance optimization. Make sure
965 # changes get made in both locations.
966
967 pslice.slice = targ
968
969 try:
970 # Call the grammar rule with our special slice object
971 del symstack[-plen:]
972 del statestack[-plen:]
973 p.callable(pslice)
974 symstack.append(sym)
975 state = goto[statestack[-1]][pname]
976 statestack.append(state)
977 except SyntaxError:
978 # If an error was set. Enter error recovery state
979 lookaheadstack.append(lookahead)
980 symstack.pop()
981 statestack.pop()
982 state = statestack[-1]
983 sym.type = 'error'
984 lookahead = sym
985 errorcount = error_count
986 self.errorok = 0
987 continue
988 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
989
990 else:
991
992 targ = [ sym ]
993
994 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
995 # The code enclosed in this section is duplicated
996 # above as a performance optimization. Make sure
997 # changes get made in both locations.
998
999 pslice.slice = targ
1000
1001 try:
1002 # Call the grammar rule with our special slice object
1003 p.callable(pslice)
1004 symstack.append(sym)
1005 state = goto[statestack[-1]][pname]
1006 statestack.append(state)
1007 except SyntaxError:
1008 # If an error was set. Enter error recovery state
1009 lookaheadstack.append(lookahead)
1010 symstack.pop()
1011 statestack.pop()
1012 state = statestack[-1]
1013 sym.type = 'error'
1014 lookahead = sym
1015 errorcount = error_count
1016 self.errorok = 0
1017 continue
1018 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1019
1020 if t == 0:
1021 n = symstack[-1]
1022 return getattr(n,"value",None)
1023
1024 if t == None:
1025
1026 # We have some kind of parsing error here. To handle
1027 # this, we are going to push the current token onto
1028 # the tokenstack and replace it with an 'error' token.
1029 # If there are any synchronization rules, they may
1030 # catch it.
1031 #
1032 # In addition to pushing the error token, we call call
1033 # the user defined p_error() function if this is the
1034 # first syntax error. This function is only called if
1035 # errorcount == 0.
1036 if errorcount == 0 or self.errorok:
1037 errorcount = error_count
1038 self.errorok = 0
1039 errtoken = lookahead
1040 if errtoken.type == '$end':
1041 errtoken = None # End of file!
1042 if self.errorfunc:
1043 global errok,token,restart
1044 errok = self.errok # Set some special functions available in error recovery
1045 token = get_token
1046 restart = self.restart
1047 if errtoken and not hasattr(errtoken,'lexer'):
1048 errtoken.lexer = lexer
1049 tok = self.errorfunc(errtoken)
1050 del errok, token, restart # Delete special functions
1051
1052 if self.errorok:
1053 # User must have done some kind of panic
1054 # mode recovery on their own. The
1055 # returned token is the next lookahead
1056 lookahead = tok
1057 errtoken = None
1058 continue
1059 else:
1060 if errtoken:
1061 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1062 else: lineno = 0
1063 if lineno:
1064 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1065 else:
1066 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1067 else:
1068 sys.stderr.write("yacc: Parse error in input. EOF\n")
1069 return
1070
1071 else:
1072 errorcount = error_count
1073
1074 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1075 # entire parse has been rolled back and we're completely hosed. The token is
1076 # discarded and we just keep going.
1077
1078 if len(statestack) <= 1 and lookahead.type != '$end':
1079 lookahead = None
1080 errtoken = None
1081 state = 0
1082 # Nuke the pushback stack
1083 del lookaheadstack[:]
1084 continue
1085
1086 # case 2: the statestack has a couple of entries on it, but we're
1087 # at the end of the file. nuke the top entry and generate an error token
1088
1089 # Start nuking entries on the stack
1090 if lookahead.type == '$end':
1091 # Whoa. We're really hosed here. Bail out
1092 return
1093
1094 if lookahead.type != 'error':
1095 sym = symstack[-1]
1096 if sym.type == 'error':
1097 # Hmmm. Error is on top of stack, we'll just nuke input
1098 # symbol and continue
1099 lookahead = None
1100 continue
1101 t = YaccSymbol()
1102 t.type = 'error'
1103 if hasattr(lookahead,"lineno"):
1104 t.lineno = lookahead.lineno
1105 t.value = lookahead
1106 lookaheadstack.append(lookahead)
1107 lookahead = t
1108 else:
1109 symstack.pop()
1110 statestack.pop()
1111 state = statestack[-1] # Potential bug fix
1112
1113 continue
1114
1115 # Call an error function here
1116 raise RuntimeError("yacc: internal parser error!!!\n")
1117
1118# -----------------------------------------------------------------------------
1119# === Grammar Representation ===
1120#
1121# The following functions, classes, and variables are used to represent and
1122# manipulate the rules that make up a grammar.
1123# -----------------------------------------------------------------------------
1124
1125import re
1126
1127# regex matching identifiers
1128_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1129
1130# -----------------------------------------------------------------------------
1131# class Production:
1132#
1133# This class stores the raw information about a single production or grammar rule.
1134# A grammar rule refers to a specification such as this:
1135#
1136# expr : expr PLUS term
1137#
1138# Here are the basic attributes defined on all productions
1139#
1140# name - Name of the production. For example 'expr'
1141# prod - A list of symbols on the right side ['expr','PLUS','term']
1142# prec - Production precedence level
1143# number - Production number.
1144# func - Function that executes on reduce
1145# file - File where production function is defined
1146# lineno - Line number where production function is defined
1147#
1148# The following attributes are defined or optional.
1149#
1150# len - Length of the production (number of symbols on right hand side)
1151# usyms - Set of unique symbols found in the production
1152# -----------------------------------------------------------------------------
1153
1154class Production(object):
1155 reduced = 0
1156 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1157 self.name = name
1158 self.prod = tuple(prod)
1159 self.number = number
1160 self.func = func
1161 self.callable = None
1162 self.file = file
1163 self.line = line
1164 self.prec = precedence
1165
1166 # Internal settings used during table construction
1167
1168 self.len = len(self.prod) # Length of the production
1169
1170 # Create a list of unique production symbols used in the production
1171 self.usyms = [ ]
1172 for s in self.prod:
1173 if s not in self.usyms:
1174 self.usyms.append(s)
1175
1176 # List of all LR items for the production
1177 self.lr_items = []
1178 self.lr_next = None
1179
1180 # Create a string representation
1181 if self.prod:
1182 self.str = "%s -> %s" % (self.name," ".join(self.prod))
1183 else:
1184 self.str = "%s -> <empty>" % self.name
1185
1186 def __str__(self):
1187 return self.str
1188
1189 def __repr__(self):
1190 return "Production("+str(self)+")"
1191
1192 def __len__(self):
1193 return len(self.prod)
1194
1195 def __nonzero__(self):
1196 return 1
1197
1198 def __getitem__(self,index):
1199 return self.prod[index]
1200
1201 # Return the nth lr_item from the production (or None if at the end)
1202 def lr_item(self,n):
1203 if n > len(self.prod): return None
1204 p = LRItem(self,n)
1205
1206 # Precompute the list of productions immediately following. Hack. Remove later
1207 try:
1208 p.lr_after = Prodnames[p.prod[n+1]]
1209 except (IndexError,KeyError):
1210 p.lr_after = []
1211 try:
1212 p.lr_before = p.prod[n-1]
1213 except IndexError:
1214 p.lr_before = None
1215
1216 return p
1217
1218 # Bind the production function name to a callable
1219 def bind(self,pdict):
1220 if self.func:
1221 self.callable = pdict[self.func]
1222
1223# This class serves as a minimal standin for Production objects when
1224# reading table data from files. It only contains information
1225# actually used by the LR parsing engine, plus some additional
1226# debugging information.
1227class MiniProduction(object):
1228 def __init__(self,str,name,len,func,file,line):
1229 self.name = name
1230 self.len = len
1231 self.func = func
1232 self.callable = None
1233 self.file = file
1234 self.line = line
1235 self.str = str
1236 def __str__(self):
1237 return self.str
1238 def __repr__(self):
1239 return "MiniProduction(%s)" % self.str
1240
1241 # Bind the production function name to a callable
1242 def bind(self,pdict):
1243 if self.func:
1244 self.callable = pdict[self.func]
1245
1246
1247# -----------------------------------------------------------------------------
1248# class LRItem
1249#
1250# This class represents a specific stage of parsing a production rule. For
1251# example:
1252#
1253# expr : expr . PLUS term
1254#
1255# In the above, the "." represents the current location of the parse. Here
1256# basic attributes:
1257#
1258# name - Name of the production. For example 'expr'
1259# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1260# number - Production number.
1261#
1262# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1263# then lr_next refers to 'expr -> expr PLUS . term'
1264# lr_index - LR item index (location of the ".") in the prod list.
1265# lookaheads - LALR lookahead symbols for this item
1266# len - Length of the production (number of symbols on right hand side)
1267# lr_after - List of all productions that immediately follow
1268# lr_before - Grammar symbol immediately before
1269# -----------------------------------------------------------------------------
1270
1271class LRItem(object):
1272 def __init__(self,p,n):
1273 self.name = p.name
1274 self.prod = list(p.prod)
1275 self.number = p.number
1276 self.lr_index = n
1277 self.lookaheads = { }
1278 self.prod.insert(n,".")
1279 self.prod = tuple(self.prod)
1280 self.len = len(self.prod)
1281 self.usyms = p.usyms
1282
1283 def __str__(self):
1284 if self.prod:
1285 s = "%s -> %s" % (self.name," ".join(self.prod))
1286 else:
1287 s = "%s -> <empty>" % self.name
1288 return s
1289
1290 def __repr__(self):
1291 return "LRItem("+str(self)+")"
1292
1293# -----------------------------------------------------------------------------
1294# rightmost_terminal()
1295#
1296# Return the rightmost terminal from a list of symbols. Used in add_production()
1297# -----------------------------------------------------------------------------
1298def rightmost_terminal(symbols, terminals):
1299 i = len(symbols) - 1
1300 while i >= 0:
1301 if symbols[i] in terminals:
1302 return symbols[i]
1303 i -= 1
1304 return None
1305
1306# -----------------------------------------------------------------------------
1307# === GRAMMAR CLASS ===
1308#
1309# The following class represents the contents of the specified grammar along
1310# with various computed properties such as first sets, follow sets, LR items, etc.
1311# This data is used for critical parts of the table generation process later.
1312# -----------------------------------------------------------------------------
1313
1314class GrammarError(YaccError): pass
1315
1316class Grammar(object):
1317 def __init__(self,terminals):
1318 self.Productions = [None] # A list of all of the productions. The first
1319 # entry is always reserved for the purpose of
1320 # building an augmented grammar
1321
1322 self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
1323 # productions of that nonterminal.
1324
1325 self.Prodmap = { } # A dictionary that is only used to detect duplicate
1326 # productions.
1327
1328 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
1329 # list of the rules where they are used.
1330
1331 for term in terminals:
1332 self.Terminals[term] = []
1333
1334 self.Terminals['error'] = []
1335
1336 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
1337 # of rule numbers where they are used.
1338
1339 self.First = { } # A dictionary of precomputed FIRST(x) symbols
1340
1341 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
1342
1343 self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the
1344 # form ('right',level) or ('nonassoc', level) or ('left',level)
1345
1346 self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
1347 # This is only used to provide error checking and to generate
1348 # a warning about unused precedence rules.
1349
1350 self.Start = None # Starting symbol for the grammar
1351
1352
1353 def __len__(self):
1354 return len(self.Productions)
1355
1356 def __getitem__(self,index):
1357 return self.Productions[index]
1358
1359 # -----------------------------------------------------------------------------
1360 # set_precedence()
1361 #
1362 # Sets the precedence for a given terminal. assoc is the associativity such as
1363 # 'left','right', or 'nonassoc'. level is a numeric level.
1364 #
1365 # -----------------------------------------------------------------------------
1366
1367 def set_precedence(self,term,assoc,level):
1368 assert self.Productions == [None],"Must call set_precedence() before add_production()"
1369 if term in self.Precedence:
1370 raise GrammarError("Precedence already specified for terminal '%s'" % term)
1371 if assoc not in ['left','right','nonassoc']:
1372 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1373 self.Precedence[term] = (assoc,level)
1374
1375 # -----------------------------------------------------------------------------
1376 # add_production()
1377 #
1378 # Given an action function, this function assembles a production rule and
1379 # computes its precedence level.
1380 #
1381 # The production rule is supplied as a list of symbols. For example,
1382 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1383 # symbols ['expr','PLUS','term'].
1384 #
1385 # Precedence is determined by the precedence of the right-most non-terminal
1386 # or the precedence of a terminal specified by %prec.
1387 #
1388 # A variety of error checks are performed to make sure production symbols
1389 # are valid and that %prec is used correctly.
1390 # -----------------------------------------------------------------------------
1391
1392 def add_production(self,prodname,syms,func=None,file='',line=0):
1393
1394 if prodname in self.Terminals:
1395 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
1396 if prodname == 'error':
1397 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
1398 if not _is_identifier.match(prodname):
1399 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
1400
1401 # Look for literal tokens
1402 for n,s in enumerate(syms):
1403 if s[0] in "'\"":
1404 try:
1405 c = eval(s)
1406 if (len(c) > 1):
1407 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
1408 if not c in self.Terminals:
1409 self.Terminals[c] = []
1410 syms[n] = c
1411 continue
1412 except SyntaxError:
1413 pass
1414 if not _is_identifier.match(s) and s != '%prec':
1415 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
1416
1417 # Determine the precedence level
1418 if '%prec' in syms:
1419 if syms[-1] == '%prec':
1420 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1421 if syms[-2] != '%prec':
1422 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1423 precname = syms[-1]
1424 prodprec = self.Precedence.get(precname,None)
1425 if not prodprec:
1426 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1427 else:
1428 self.UsedPrecedence[precname] = 1
1429 del syms[-2:] # Drop %prec from the rule
1430 else:
1431 # If no %prec, precedence is determined by the rightmost terminal symbol
1432 precname = rightmost_terminal(syms,self.Terminals)
1433 prodprec = self.Precedence.get(precname,('right',0))
1434
1435 # See if the rule is already in the rulemap
1436 map = "%s -> %s" % (prodname,syms)
1437 if map in self.Prodmap:
1438 m = self.Prodmap[map]
1439 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1440 "Previous definition at %s:%d" % (m.file, m.line))
1441
1442 # From this point on, everything is valid. Create a new Production instance
1443 pnumber = len(self.Productions)
1444 if not prodname in self.Nonterminals:
1445 self.Nonterminals[prodname] = [ ]
1446
1447 # Add the production number to Terminals and Nonterminals
1448 for t in syms:
1449 if t in self.Terminals:
1450 self.Terminals[t].append(pnumber)
1451 else:
1452 if not t in self.Nonterminals:
1453 self.Nonterminals[t] = [ ]
1454 self.Nonterminals[t].append(pnumber)
1455
1456 # Create a production and add it to the list of productions
1457 p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1458 self.Productions.append(p)
1459 self.Prodmap[map] = p
1460
1461 # Add to the global productions list
1462 try:
1463 self.Prodnames[prodname].append(p)
1464 except KeyError:
1465 self.Prodnames[prodname] = [ p ]
1466 return 0
1467
1468 # -----------------------------------------------------------------------------
1469 # set_start()
1470 #
1471 # Sets the starting symbol and creates the augmented grammar. Production
1472 # rule 0 is S' -> start where start is the start symbol.
1473 # -----------------------------------------------------------------------------
1474
1475 def set_start(self,start=None):
1476 if not start:
1477 start = self.Productions[1].name
1478 if start not in self.Nonterminals:
1479 raise GrammarError("start symbol %s undefined" % start)
1480 self.Productions[0] = Production(0,"S'",[start])
1481 self.Nonterminals[start].append(0)
1482 self.Start = start
1483
1484 # -----------------------------------------------------------------------------
1485 # find_unreachable()
1486 #
1487 # Find all of the nonterminal symbols that can't be reached from the starting
1488 # symbol. Returns a list of nonterminals that can't be reached.
1489 # -----------------------------------------------------------------------------
1490
1491 def find_unreachable(self):
1492
1493 # Mark all symbols that are reachable from a symbol s
1494 def mark_reachable_from(s):
1495 if reachable[s]:
1496 # We've already reached symbol s.
1497 return
1498 reachable[s] = 1
1499 for p in self.Prodnames.get(s,[]):
1500 for r in p.prod:
1501 mark_reachable_from(r)
1502
1503 reachable = { }
1504 for s in list(self.Terminals) + list(self.Nonterminals):
1505 reachable[s] = 0
1506
1507 mark_reachable_from( self.Productions[0].prod[0] )
1508
1509 return [s for s in list(self.Nonterminals)
1510 if not reachable[s]]
1511
1512 # -----------------------------------------------------------------------------
1513 # infinite_cycles()
1514 #
1515 # This function looks at the various parsing rules and tries to detect
1516 # infinite recursion cycles (grammar rules where there is no possible way
1517 # to derive a string of only terminals).
1518 # -----------------------------------------------------------------------------
1519
1520 def infinite_cycles(self):
1521 terminates = {}
1522
1523 # Terminals:
1524 for t in self.Terminals:
1525 terminates[t] = 1
1526
1527 terminates['$end'] = 1
1528
1529 # Nonterminals:
1530
1531 # Initialize to false:
1532 for n in self.Nonterminals:
1533 terminates[n] = 0
1534
1535 # Then propagate termination until no change:
1536 while 1:
1537 some_change = 0
1538 for (n,pl) in self.Prodnames.items():
1539 # Nonterminal n terminates iff any of its productions terminates.
1540 for p in pl:
1541 # Production p terminates iff all of its rhs symbols terminate.
1542 for s in p.prod:
1543 if not terminates[s]:
1544 # The symbol s does not terminate,
1545 # so production p does not terminate.
1546 p_terminates = 0
1547 break
1548 else:
1549 # didn't break from the loop,
1550 # so every symbol s terminates
1551 # so production p terminates.
1552 p_terminates = 1
1553
1554 if p_terminates:
1555 # symbol n terminates!
1556 if not terminates[n]:
1557 terminates[n] = 1
1558 some_change = 1
1559 # Don't need to consider any more productions for this n.
1560 break
1561
1562 if not some_change:
1563 break
1564
1565 infinite = []
1566 for (s,term) in terminates.items():
1567 if not term:
1568 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1569 # s is used-but-not-defined, and we've already warned of that,
1570 # so it would be overkill to say that it's also non-terminating.
1571 pass
1572 else:
1573 infinite.append(s)
1574
1575 return infinite
1576
1577
1578 # -----------------------------------------------------------------------------
1579 # undefined_symbols()
1580 #
1581 # Find all symbols that were used the grammar, but not defined as tokens or
1582 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1583 # and prod is the production where the symbol was used.
1584 # -----------------------------------------------------------------------------
1585 def undefined_symbols(self):
1586 result = []
1587 for p in self.Productions:
1588 if not p: continue
1589
1590 for s in p.prod:
1591 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1592 result.append((s,p))
1593 return result
1594
1595 # -----------------------------------------------------------------------------
1596 # unused_terminals()
1597 #
1598 # Find all terminals that were defined, but not used by the grammar. Returns
1599 # a list of all symbols.
1600 # -----------------------------------------------------------------------------
1601 def unused_terminals(self):
1602 unused_tok = []
1603 for s,v in self.Terminals.items():
1604 if s != 'error' and not v:
1605 unused_tok.append(s)
1606
1607 return unused_tok
1608
1609 # ------------------------------------------------------------------------------
1610 # unused_rules()
1611 #
1612 # Find all grammar rules that were defined, but not used (maybe not reachable)
1613 # Returns a list of productions.
1614 # ------------------------------------------------------------------------------
1615
1616 def unused_rules(self):
1617 unused_prod = []
1618 for s,v in self.Nonterminals.items():
1619 if not v:
1620 p = self.Prodnames[s][0]
1621 unused_prod.append(p)
1622 return unused_prod
1623
1624 # -----------------------------------------------------------------------------
1625 # unused_precedence()
1626 #
1627 # Returns a list of tuples (term,precedence) corresponding to precedence
1628 # rules that were never used by the grammar. term is the name of the terminal
1629 # on which precedence was applied and precedence is a string such as 'left' or
1630 # 'right' corresponding to the type of precedence.
1631 # -----------------------------------------------------------------------------
1632
1633 def unused_precedence(self):
1634 unused = []
1635 for termname in self.Precedence:
1636 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1637 unused.append((termname,self.Precedence[termname][0]))
1638
1639 return unused
1640
1641 # -------------------------------------------------------------------------
1642 # _first()
1643 #
1644 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1645 #
1646 # During execution of compute_first1, the result may be incomplete.
1647 # Afterward (e.g., when called from compute_follow()), it will be complete.
1648 # -------------------------------------------------------------------------
1649 def _first(self,beta):
1650
1651 # We are computing First(x1,x2,x3,...,xn)
1652 result = [ ]
1653 for x in beta:
1654 x_produces_empty = 0
1655
1656 # Add all the non-<empty> symbols of First[x] to the result.
1657 for f in self.First[x]:
1658 if f == '<empty>':
1659 x_produces_empty = 1
1660 else:
1661 if f not in result: result.append(f)
1662
1663 if x_produces_empty:
1664 # We have to consider the next x in beta,
1665 # i.e. stay in the loop.
1666 pass
1667 else:
1668 # We don't have to consider any further symbols in beta.
1669 break
1670 else:
1671 # There was no 'break' from the loop,
1672 # so x_produces_empty was true for all x in beta,
1673 # so beta produces empty as well.
1674 result.append('<empty>')
1675
1676 return result
1677
1678 # -------------------------------------------------------------------------
1679 # compute_first()
1680 #
1681 # Compute the value of FIRST1(X) for all symbols
1682 # -------------------------------------------------------------------------
1683 def compute_first(self):
1684 if self.First:
1685 return self.First
1686
1687 # Terminals:
1688 for t in self.Terminals:
1689 self.First[t] = [t]
1690
1691 self.First['$end'] = ['$end']
1692
1693 # Nonterminals:
1694
1695 # Initialize to the empty set:
1696 for n in self.Nonterminals:
1697 self.First[n] = []
1698
1699 # Then propagate symbols until no change:
1700 while 1:
1701 some_change = 0
1702 for n in self.Nonterminals:
1703 for p in self.Prodnames[n]:
1704 for f in self._first(p.prod):
1705 if f not in self.First[n]:
1706 self.First[n].append( f )
1707 some_change = 1
1708 if not some_change:
1709 break
1710
1711 return self.First
1712
1713 # ---------------------------------------------------------------------
1714 # compute_follow()
1715 #
1716 # Computes all of the follow sets for every non-terminal symbol. The
1717 # follow set is the set of all symbols that might follow a given
1718 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1719 # ---------------------------------------------------------------------
1720 def compute_follow(self,start=None):
1721 # If already computed, return the result
1722 if self.Follow:
1723 return self.Follow
1724
1725 # If first sets not computed yet, do that first.
1726 if not self.First:
1727 self.compute_first()
1728
1729 # Add '$end' to the follow list of the start symbol
1730 for k in self.Nonterminals:
1731 self.Follow[k] = [ ]
1732
1733 if not start:
1734 start = self.Productions[1].name
1735
1736 self.Follow[start] = [ '$end' ]
1737
1738 while 1:
1739 didadd = 0
1740 for p in self.Productions[1:]:
1741 # Here is the production set
1742 for i in range(len(p.prod)):
1743 B = p.prod[i]
1744 if B in self.Nonterminals:
1745 # Okay. We got a non-terminal in a production
1746 fst = self._first(p.prod[i+1:])
1747 hasempty = 0
1748 for f in fst:
1749 if f != '<empty>' and f not in self.Follow[B]:
1750 self.Follow[B].append(f)
1751 didadd = 1
1752 if f == '<empty>':
1753 hasempty = 1
1754 if hasempty or i == (len(p.prod)-1):
1755 # Add elements of follow(a) to follow(b)
1756 for f in self.Follow[p.name]:
1757 if f not in self.Follow[B]:
1758 self.Follow[B].append(f)
1759 didadd = 1
1760 if not didadd: break
1761 return self.Follow
1762
1763
1764 # -----------------------------------------------------------------------------
1765 # build_lritems()
1766 #
1767 # This function walks the list of productions and builds a complete set of the
1768 # LR items. The LR items are stored in two ways: First, they are uniquely
1769 # numbered and placed in the list _lritems. Second, a linked list of LR items
1770 # is built for each production. For example:
1771 #
1772 # E -> E PLUS E
1773 #
1774 # Creates the list
1775 #
1776 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1777 # -----------------------------------------------------------------------------
1778
1779 def build_lritems(self):
1780 for p in self.Productions:
1781 lastlri = p
1782 i = 0
1783 lr_items = []
1784 while 1:
1785 if i > len(p):
1786 lri = None
1787 else:
1788 lri = LRItem(p,i)
1789 # Precompute the list of productions immediately following
1790 try:
1791 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1792 except (IndexError,KeyError):
1793 lri.lr_after = []
1794 try:
1795 lri.lr_before = lri.prod[i-1]
1796 except IndexError:
1797 lri.lr_before = None
1798
1799 lastlri.lr_next = lri
1800 if not lri: break
1801 lr_items.append(lri)
1802 lastlri = lri
1803 i += 1
1804 p.lr_items = lr_items
1805
1806# -----------------------------------------------------------------------------
1807# == Class LRTable ==
1808#
1809# This basic class represents a basic table of LR parsing information.
1810# Methods for generating the tables are not defined here. They are defined
1811# in the derived class LRGeneratedTable.
1812# -----------------------------------------------------------------------------
1813
1814class VersionError(YaccError): pass
1815
1816class LRTable(object):
1817 def __init__(self):
1818 self.lr_action = None
1819 self.lr_goto = None
1820 self.lr_productions = None
1821 self.lr_method = None
1822
1823 def read_table(self,module):
1824 if isinstance(module,types.ModuleType):
1825 parsetab = module
1826 else:
1827 if sys.version_info[0] < 3:
1828 exec("import %s as parsetab" % module)
1829 else:
1830 env = { }
1831 exec("import %s as parsetab" % module, env, env)
1832 parsetab = env['parsetab']
1833
1834 if parsetab._tabversion != __tabversion__:
1835 raise VersionError("yacc table file version is out of date")
1836
1837 self.lr_action = parsetab._lr_action
1838 self.lr_goto = parsetab._lr_goto
1839
1840 self.lr_productions = []
1841 for p in parsetab._lr_productions:
1842 self.lr_productions.append(MiniProduction(*p))
1843
1844 self.lr_method = parsetab._lr_method
1845 return parsetab._lr_signature
1846
1847 def read_pickle(self,filename):
1848 try:
1849 import cPickle as pickle
1850 except ImportError:
1851 import pickle
1852
1853 in_f = open(filename,"rb")
1854
1855 tabversion = pickle.load(in_f)
1856 if tabversion != __tabversion__:
1857 raise VersionError("yacc table file version is out of date")
1858 self.lr_method = pickle.load(in_f)
1859 signature = pickle.load(in_f)
1860 self.lr_action = pickle.load(in_f)
1861 self.lr_goto = pickle.load(in_f)
1862 productions = pickle.load(in_f)
1863
1864 self.lr_productions = []
1865 for p in productions:
1866 self.lr_productions.append(MiniProduction(*p))
1867
1868 in_f.close()
1869 return signature
1870
1871 # Bind all production function names to callable objects in pdict
1872 def bind_callables(self,pdict):
1873 for p in self.lr_productions:
1874 p.bind(pdict)
1875
1876# -----------------------------------------------------------------------------
1877# === LR Generator ===
1878#
1879# The following classes and functions are used to generate LR parsing tables on
1880# a grammar.
1881# -----------------------------------------------------------------------------
1882
1883# -----------------------------------------------------------------------------
1884# digraph()
1885# traverse()
1886#
1887# The following two functions are used to compute set valued functions
1888# of the form:
1889#
1890# F(x) = F'(x) U U{F(y) | x R y}
1891#
1892# This is used to compute the values of Read() sets as well as FOLLOW sets
1893# in LALR(1) generation.
1894#
1895# Inputs: X - An input set
1896# R - A relation
1897# FP - Set-valued function
1898# ------------------------------------------------------------------------------
1899
1900def digraph(X,R,FP):
1901 N = { }
1902 for x in X:
1903 N[x] = 0
1904 stack = []
1905 F = { }
1906 for x in X:
1907 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1908 return F
1909
1910def traverse(x,N,stack,F,X,R,FP):
1911 stack.append(x)
1912 d = len(stack)
1913 N[x] = d
1914 F[x] = FP(x) # F(X) <- F'(x)
1915
1916 rel = R(x) # Get y's related to x
1917 for y in rel:
1918 if N[y] == 0:
1919 traverse(y,N,stack,F,X,R,FP)
1920 N[x] = min(N[x],N[y])
1921 for a in F.get(y,[]):
1922 if a not in F[x]: F[x].append(a)
1923 if N[x] == d:
1924 N[stack[-1]] = MAXINT
1925 F[stack[-1]] = F[x]
1926 element = stack.pop()
1927 while element != x:
1928 N[stack[-1]] = MAXINT
1929 F[stack[-1]] = F[x]
1930 element = stack.pop()
1931
1932class LALRError(YaccError): pass
1933
1934# -----------------------------------------------------------------------------
1935# == LRGeneratedTable ==
1936#
1937# This class implements the LR table generation algorithm. There are no
1938# public methods except for write()
1939# -----------------------------------------------------------------------------
1940
1941class LRGeneratedTable(LRTable):
1942 def __init__(self,grammar,method='LALR',log=None):
1943 if method not in ['SLR','LALR']:
1944 raise LALRError("Unsupported method %s" % method)
1945
1946 self.grammar = grammar
1947 self.lr_method = method
1948
1949 # Set up the logger
1950 if not log:
1951 log = NullLogger()
1952 self.log = log
1953
1954 # Internal attributes
1955 self.lr_action = {} # Action table
1956 self.lr_goto = {} # Goto table
1957 self.lr_productions = grammar.Productions # Copy of grammar Production array
1958 self.lr_goto_cache = {} # Cache of computed gotos
1959 self.lr0_cidhash = {} # Cache of closures
1960
1961 self._add_count = 0 # Internal counter used to detect cycles
1962
1963 # Diagonistic information filled in by the table generator
1964 self.sr_conflict = 0
1965 self.rr_conflict = 0
1966 self.conflicts = [] # List of conflicts
1967
1968 self.sr_conflicts = []
1969 self.rr_conflicts = []
1970
1971 # Build the tables
1972 self.grammar.build_lritems()
1973 self.grammar.compute_first()
1974 self.grammar.compute_follow()
1975 self.lr_parse_table()
1976
1977 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1978
1979 def lr0_closure(self,I):
1980 self._add_count += 1
1981
1982 # Add everything in I to J
1983 J = I[:]
1984 didadd = 1
1985 while didadd:
1986 didadd = 0
1987 for j in J:
1988 for x in j.lr_after:
1989 if getattr(x,"lr0_added",0) == self._add_count: continue
1990 # Add B --> .G to J
1991 J.append(x.lr_next)
1992 x.lr0_added = self._add_count
1993 didadd = 1
1994
1995 return J
1996
1997 # Compute the LR(0) goto function goto(I,X) where I is a set
1998 # of LR(0) items and X is a grammar symbol. This function is written
1999 # in a way that guarantees uniqueness of the generated goto sets
2000 # (i.e. the same goto set will never be returned as two different Python
2001 # objects). With uniqueness, we can later do fast set comparisons using
2002 # id(obj) instead of element-wise comparison.
2003
2004 def lr0_goto(self,I,x):
2005 # First we look for a previously cached entry
2006 g = self.lr_goto_cache.get((id(I),x),None)
2007 if g: return g
2008
2009 # Now we generate the goto set in a way that guarantees uniqueness
2010 # of the result
2011
2012 s = self.lr_goto_cache.get(x,None)
2013 if not s:
2014 s = { }
2015 self.lr_goto_cache[x] = s
2016
2017 gs = [ ]
2018 for p in I:
2019 n = p.lr_next
2020 if n and n.lr_before == x:
2021 s1 = s.get(id(n),None)
2022 if not s1:
2023 s1 = { }
2024 s[id(n)] = s1
2025 gs.append(n)
2026 s = s1
2027 g = s.get('$end',None)
2028 if not g:
2029 if gs:
2030 g = self.lr0_closure(gs)
2031 s['$end'] = g
2032 else:
2033 s['$end'] = gs
2034 self.lr_goto_cache[(id(I),x)] = g
2035 return g
2036
2037 # Compute the LR(0) sets of item function
2038 def lr0_items(self):
2039
2040 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2041 i = 0
2042 for I in C:
2043 self.lr0_cidhash[id(I)] = i
2044 i += 1
2045
2046 # Loop over the items in C and each grammar symbols
2047 i = 0
2048 while i < len(C):
2049 I = C[i]
2050 i += 1
2051
2052 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2053 asyms = { }
2054 for ii in I:
2055 for s in ii.usyms:
2056 asyms[s] = None
2057
2058 for x in asyms:
2059 g = self.lr0_goto(I,x)
2060 if not g: continue
2061 if id(g) in self.lr0_cidhash: continue
2062 self.lr0_cidhash[id(g)] = len(C)
2063 C.append(g)
2064
2065 return C
2066
2067 # -----------------------------------------------------------------------------
2068 # ==== LALR(1) Parsing ====
2069 #
2070 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2071 # relying upon Follow() sets when performing reductions, a more selective
2072 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2073 # Thus, we mainly just have to focus on calculating the lookahead sets.
2074 #
2075 # The method used here is due to DeRemer and Pennelo (1982).
2076 #
2077 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2078 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2079 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2080 #
2081 # Further details can also be found in:
2082 #
2083 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2084 # McGraw-Hill Book Company, (1985).
2085 #
2086 # -----------------------------------------------------------------------------
2087
2088 # -----------------------------------------------------------------------------
2089 # compute_nullable_nonterminals()
2090 #
2091 # Creates a dictionary containing all of the non-terminals that might produce
2092 # an empty production.
2093 # -----------------------------------------------------------------------------
2094
2095 def compute_nullable_nonterminals(self):
2096 nullable = {}
2097 num_nullable = 0
2098 while 1:
2099 for p in self.grammar.Productions[1:]:
2100 if p.len == 0:
2101 nullable[p.name] = 1
2102 continue
2103 for t in p.prod:
2104 if not t in nullable: break
2105 else:
2106 nullable[p.name] = 1
2107 if len(nullable) == num_nullable: break
2108 num_nullable = len(nullable)
2109 return nullable
2110
2111 # -----------------------------------------------------------------------------
2112 # find_nonterminal_trans(C)
2113 #
2114 # Given a set of LR(0) items, this functions finds all of the non-terminal
2115 # transitions. These are transitions in which a dot appears immediately before
2116 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2117 # is the state number and N is the nonterminal symbol.
2118 #
2119 # The input C is the set of LR(0) items.
2120 # -----------------------------------------------------------------------------
2121
2122 def find_nonterminal_transitions(self,C):
2123 trans = []
2124 for state in range(len(C)):
2125 for p in C[state]:
2126 if p.lr_index < p.len - 1:
2127 t = (state,p.prod[p.lr_index+1])
2128 if t[1] in self.grammar.Nonterminals:
2129 if t not in trans: trans.append(t)
2130 state = state + 1
2131 return trans
2132
2133 # -----------------------------------------------------------------------------
2134 # dr_relation()
2135 #
2136 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2137 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2138 #
2139 # Returns a list of terminals.
2140 # -----------------------------------------------------------------------------
2141
2142 def dr_relation(self,C,trans,nullable):
2143 dr_set = { }
2144 state,N = trans
2145 terms = []
2146
2147 g = self.lr0_goto(C[state],N)
2148 for p in g:
2149 if p.lr_index < p.len - 1:
2150 a = p.prod[p.lr_index+1]
2151 if a in self.grammar.Terminals:
2152 if a not in terms: terms.append(a)
2153
2154 # This extra bit is to handle the start state
2155 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2156 terms.append('$end')
2157
2158 return terms
2159
2160 # -----------------------------------------------------------------------------
2161 # reads_relation()
2162 #
2163 # Computes the READS() relation (p,A) READS (t,C).
2164 # -----------------------------------------------------------------------------
2165
2166 def reads_relation(self,C, trans, empty):
2167 # Look for empty transitions
2168 rel = []
2169 state, N = trans
2170
2171 g = self.lr0_goto(C[state],N)
2172 j = self.lr0_cidhash.get(id(g),-1)
2173 for p in g:
2174 if p.lr_index < p.len - 1:
2175 a = p.prod[p.lr_index + 1]
2176 if a in empty:
2177 rel.append((j,a))
2178
2179 return rel
2180
2181 # -----------------------------------------------------------------------------
2182 # compute_lookback_includes()
2183 #
2184 # Determines the lookback and includes relations
2185 #
2186 # LOOKBACK:
2187 #
2188 # This relation is determined by running the LR(0) state machine forward.
2189 # For example, starting with a production "N : . A B C", we run it forward
2190 # to obtain "N : A B C ." We then build a relationship between this final
2191 # state and the starting state. These relationships are stored in a dictionary
2192 # lookdict.
2193 #
2194 # INCLUDES:
2195 #
2196 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2197 #
2198 # This relation is used to determine non-terminal transitions that occur
2199 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2200 # if the following holds:
2201 #
2202 # B -> LAT, where T -> epsilon and p' -L-> p
2203 #
2204 # L is essentially a prefix (which may be empty), T is a suffix that must be
2205 # able to derive an empty string. State p' must lead to state p with the string L.
2206 #
2207 # -----------------------------------------------------------------------------
2208
2209 def compute_lookback_includes(self,C,trans,nullable):
2210
2211 lookdict = {} # Dictionary of lookback relations
2212 includedict = {} # Dictionary of include relations
2213
2214 # Make a dictionary of non-terminal transitions
2215 dtrans = {}
2216 for t in trans:
2217 dtrans[t] = 1
2218
2219 # Loop over all transitions and compute lookbacks and includes
2220 for state,N in trans:
2221 lookb = []
2222 includes = []
2223 for p in C[state]:
2224 if p.name != N: continue
2225
2226 # Okay, we have a name match. We now follow the production all the way
2227 # through the state machine until we get the . on the right hand side
2228
2229 lr_index = p.lr_index
2230 j = state
2231 while lr_index < p.len - 1:
2232 lr_index = lr_index + 1
2233 t = p.prod[lr_index]
2234
2235 # Check to see if this symbol and state are a non-terminal transition
2236 if (j,t) in dtrans:
2237 # Yes. Okay, there is some chance that this is an includes relation
2238 # the only way to know for certain is whether the rest of the
2239 # production derives empty
2240
2241 li = lr_index + 1
2242 while li < p.len:
2243 if p.prod[li] in self.grammar.Terminals: break # No forget it
2244 if not p.prod[li] in nullable: break
2245 li = li + 1
2246 else:
2247 # Appears to be a relation between (j,t) and (state,N)
2248 includes.append((j,t))
2249
2250 g = self.lr0_goto(C[j],t) # Go to next set
2251 j = self.lr0_cidhash.get(id(g),-1) # Go to next state
2252
2253 # When we get here, j is the final state, now we have to locate the production
2254 for r in C[j]:
2255 if r.name != p.name: continue
2256 if r.len != p.len: continue
2257 i = 0
2258 # This look is comparing a production ". A B C" with "A B C ."
2259 while i < r.lr_index:
2260 if r.prod[i] != p.prod[i+1]: break
2261 i = i + 1
2262 else:
2263 lookb.append((j,r))
2264 for i in includes:
2265 if not i in includedict: includedict[i] = []
2266 includedict[i].append((state,N))
2267 lookdict[(state,N)] = lookb
2268
2269 return lookdict,includedict
2270
2271 # -----------------------------------------------------------------------------
2272 # compute_read_sets()
2273 #
2274 # Given a set of LR(0) items, this function computes the read sets.
2275 #
2276 # Inputs: C = Set of LR(0) items
2277 # ntrans = Set of nonterminal transitions
2278 # nullable = Set of empty transitions
2279 #
2280 # Returns a set containing the read sets
2281 # -----------------------------------------------------------------------------
2282
2283 def compute_read_sets(self,C, ntrans, nullable):
2284 FP = lambda x: self.dr_relation(C,x,nullable)
2285 R = lambda x: self.reads_relation(C,x,nullable)
2286 F = digraph(ntrans,R,FP)
2287 return F
2288
2289 # -----------------------------------------------------------------------------
2290 # compute_follow_sets()
2291 #
2292 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2293 # and an include set, this function computes the follow sets
2294 #
2295 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2296 #
2297 # Inputs:
2298 # ntrans = Set of nonterminal transitions
2299 # readsets = Readset (previously computed)
2300 # inclsets = Include sets (previously computed)
2301 #
2302 # Returns a set containing the follow sets
2303 # -----------------------------------------------------------------------------
2304
2305 def compute_follow_sets(self,ntrans,readsets,inclsets):
2306 FP = lambda x: readsets[x]
2307 R = lambda x: inclsets.get(x,[])
2308 F = digraph(ntrans,R,FP)
2309 return F
2310
2311 # -----------------------------------------------------------------------------
2312 # add_lookaheads()
2313 #
2314 # Attaches the lookahead symbols to grammar rules.
2315 #
2316 # Inputs: lookbacks - Set of lookback relations
2317 # followset - Computed follow set
2318 #
2319 # This function directly attaches the lookaheads to productions contained
2320 # in the lookbacks set
2321 # -----------------------------------------------------------------------------
2322
2323 def add_lookaheads(self,lookbacks,followset):
2324 for trans,lb in lookbacks.items():
2325 # Loop over productions in lookback
2326 for state,p in lb:
2327 if not state in p.lookaheads:
2328 p.lookaheads[state] = []
2329 f = followset.get(trans,[])
2330 for a in f:
2331 if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2332
2333 # -----------------------------------------------------------------------------
2334 # add_lalr_lookaheads()
2335 #
2336 # This function does all of the work of adding lookahead information for use
2337 # with LALR parsing
2338 # -----------------------------------------------------------------------------
2339
2340 def add_lalr_lookaheads(self,C):
2341 # Determine all of the nullable nonterminals
2342 nullable = self.compute_nullable_nonterminals()
2343
2344 # Find all non-terminal transitions
2345 trans = self.find_nonterminal_transitions(C)
2346
2347 # Compute read sets
2348 readsets = self.compute_read_sets(C,trans,nullable)
2349
2350 # Compute lookback/includes relations
2351 lookd, included = self.compute_lookback_includes(C,trans,nullable)
2352
2353 # Compute LALR FOLLOW sets
2354 followsets = self.compute_follow_sets(trans,readsets,included)
2355
2356 # Add all of the lookaheads
2357 self.add_lookaheads(lookd,followsets)
2358
2359 # -----------------------------------------------------------------------------
2360 # lr_parse_table()
2361 #
2362 # This function constructs the parse tables for SLR or LALR
2363 # -----------------------------------------------------------------------------
2364 def lr_parse_table(self):
2365 Productions = self.grammar.Productions
2366 Precedence = self.grammar.Precedence
2367 goto = self.lr_goto # Goto array
2368 action = self.lr_action # Action array
2369 log = self.log # Logger for output
2370
2371 actionp = { } # Action production array (temporary)
2372
2373 log.info("Parsing method: %s", self.lr_method)
2374
2375 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2376 # This determines the number of states
2377
2378 C = self.lr0_items()
2379
2380 if self.lr_method == 'LALR':
2381 self.add_lalr_lookaheads(C)
2382
2383 # Build the parser table, state by state
2384 st = 0
2385 for I in C:
2386 # Loop over each production in I
2387 actlist = [ ] # List of actions
2388 st_action = { }
2389 st_actionp = { }
2390 st_goto = { }
2391 log.info("")
2392 log.info("state %d", st)
2393 log.info("")
2394 for p in I:
2395 log.info(" (%d) %s", p.number, str(p))
2396 log.info("")
2397
2398 for p in I:
2399 if p.len == p.lr_index + 1:
2400 if p.name == "S'":
2401 # Start symbol. Accept!
2402 st_action["$end"] = 0
2403 st_actionp["$end"] = p
2404 else:
2405 # We are at the end of a production. Reduce!
2406 if self.lr_method == 'LALR':
2407 laheads = p.lookaheads[st]
2408 else:
2409 laheads = self.grammar.Follow[p.name]
2410 for a in laheads:
2411 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2412 r = st_action.get(a,None)
2413 if r is not None:
2414 # Whoa. Have a shift/reduce or reduce/reduce conflict
2415 if r > 0:
2416 # Need to decide on shift or reduce here
2417 # By default we favor shifting. Need to add
2418 # some precedence rules here.
2419 sprec,slevel = Productions[st_actionp[a].number].prec
2420 rprec,rlevel = Precedence.get(a,('right',0))
2421 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2422 # We really need to reduce here.
2423 st_action[a] = -p.number
2424 st_actionp[a] = p
2425 if not slevel and not rlevel:
2426 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2427 self.sr_conflicts.append((st,a,'reduce'))
2428 Productions[p.number].reduced += 1
2429 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2430 st_action[a] = None
2431 else:
2432 # Hmmm. Guess we'll keep the shift
2433 if not rlevel:
2434 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2435 self.sr_conflicts.append((st,a,'shift'))
2436 elif r < 0:
2437 # Reduce/reduce conflict. In this case, we favor the rule
2438 # that was defined first in the grammar file
2439 oldp = Productions[-r]
2440 pp = Productions[p.number]
2441 if oldp.line > pp.line:
2442 st_action[a] = -p.number
2443 st_actionp[a] = p
2444 chosenp,rejectp = pp,oldp
2445 Productions[p.number].reduced += 1
2446 Productions[oldp.number].reduced -= 1
2447 else:
2448 chosenp,rejectp = oldp,pp
2449 self.rr_conflicts.append((st,chosenp,rejectp))
2450 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2451 else:
2452 raise LALRError("Unknown conflict in state %d" % st)
2453 else:
2454 st_action[a] = -p.number
2455 st_actionp[a] = p
2456 Productions[p.number].reduced += 1
2457 else:
2458 i = p.lr_index
2459 a = p.prod[i+1] # Get symbol right after the "."
2460 if a in self.grammar.Terminals:
2461 g = self.lr0_goto(I,a)
2462 j = self.lr0_cidhash.get(id(g),-1)
2463 if j >= 0:
2464 # We are in a shift state
2465 actlist.append((a,p,"shift and go to state %d" % j))
2466 r = st_action.get(a,None)
2467 if r is not None:
2468 # Whoa have a shift/reduce or shift/shift conflict
2469 if r > 0:
2470 if r != j:
2471 raise LALRError("Shift/shift conflict in state %d" % st)
2472 elif r < 0:
2473 # Do a precedence check.
2474 # - if precedence of reduce rule is higher, we reduce.
2475 # - if precedence of reduce is same and left assoc, we reduce.
2476 # - otherwise we shift
2477 rprec,rlevel = Productions[st_actionp[a].number].prec
2478 sprec,slevel = Precedence.get(a,('right',0))
2479 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2480 # We decide to shift here... highest precedence to shift
2481 Productions[st_actionp[a].number].reduced -= 1
2482 st_action[a] = j
2483 st_actionp[a] = p
2484 if not rlevel:
2485 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2486 self.sr_conflicts.append((st,a,'shift'))
2487 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2488 st_action[a] = None
2489 else:
2490 # Hmmm. Guess we'll keep the reduce
2491 if not slevel and not rlevel:
2492 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2493 self.sr_conflicts.append((st,a,'reduce'))
2494
2495 else:
2496 raise LALRError("Unknown conflict in state %d" % st)
2497 else:
2498 st_action[a] = j
2499 st_actionp[a] = p
2500
2501 # Print the actions associated with each terminal
2502 _actprint = { }
2503 for a,p,m in actlist:
2504 if a in st_action:
2505 if p is st_actionp[a]:
2506 log.info(" %-15s %s",a,m)
2507 _actprint[(a,m)] = 1
2508 log.info("")
2509 # Print the actions that were not used. (debugging)
2510 not_used = 0
2511 for a,p,m in actlist:
2512 if a in st_action:
2513 if p is not st_actionp[a]:
2514 if not (a,m) in _actprint:
2515 log.debug(" ! %-15s [ %s ]",a,m)
2516 not_used = 1
2517 _actprint[(a,m)] = 1
2518 if not_used:
2519 log.debug("")
2520
2521 # Construct the goto table for this state
2522
2523 nkeys = { }
2524 for ii in I:
2525 for s in ii.usyms:
2526 if s in self.grammar.Nonterminals:
2527 nkeys[s] = None
2528 for n in nkeys:
2529 g = self.lr0_goto(I,n)
2530 j = self.lr0_cidhash.get(id(g),-1)
2531 if j >= 0:
2532 st_goto[n] = j
2533 log.info(" %-30s shift and go to state %d",n,j)
2534
2535 action[st] = st_action
2536 actionp[st] = st_actionp
2537 goto[st] = st_goto
2538 st += 1
2539
2540
2541 # -----------------------------------------------------------------------------
2542 # write()
2543 #
2544 # This function writes the LR parsing tables to a file
2545 # -----------------------------------------------------------------------------
2546
2547 def write_table(self,modulename,outputdir='',signature=""):
2548 basemodulename = modulename.split(".")[-1]
2549 filename = os.path.join(outputdir,basemodulename) + ".py"
2550 try:
2551 f = open(filename,"w")
2552
2553 f.write("""
2554# %s
2555# This file is automatically generated. Do not edit.
2556_tabversion = %r
2557
2558_lr_method = %r
2559
2560_lr_signature = %r
2561 """ % (filename, __tabversion__, self.lr_method, signature))
2562
2563 # Change smaller to 0 to go back to original tables
2564 smaller = 1
2565
2566 # Factor out names to try and make smaller
2567 if smaller:
2568 items = { }
2569
2570 for s,nd in self.lr_action.items():
2571 for name,v in nd.items():
2572 i = items.get(name)
2573 if not i:
2574 i = ([],[])
2575 items[name] = i
2576 i[0].append(s)
2577 i[1].append(v)
2578
2579 f.write("\n_lr_action_items = {")
2580 for k,v in items.items():
2581 f.write("%r:([" % k)
2582 for i in v[0]:
2583 f.write("%r," % i)
2584 f.write("],[")
2585 for i in v[1]:
2586 f.write("%r," % i)
2587
2588 f.write("]),")
2589 f.write("}\n")
2590
2591 f.write("""
2592_lr_action = { }
2593for _k, _v in _lr_action_items.items():
2594 for _x,_y in zip(_v[0],_v[1]):
2595 if not _x in _lr_action: _lr_action[_x] = { }
2596 _lr_action[_x][_k] = _y
2597del _lr_action_items
2598""")
2599
2600 else:
2601 f.write("\n_lr_action = { ");
2602 for k,v in self.lr_action.items():
2603 f.write("(%r,%r):%r," % (k[0],k[1],v))
2604 f.write("}\n");
2605
2606 if smaller:
2607 # Factor out names to try and make smaller
2608 items = { }
2609
2610 for s,nd in self.lr_goto.items():
2611 for name,v in nd.items():
2612 i = items.get(name)
2613 if not i:
2614 i = ([],[])
2615 items[name] = i
2616 i[0].append(s)
2617 i[1].append(v)
2618
2619 f.write("\n_lr_goto_items = {")
2620 for k,v in items.items():
2621 f.write("%r:([" % k)
2622 for i in v[0]:
2623 f.write("%r," % i)
2624 f.write("],[")
2625 for i in v[1]:
2626 f.write("%r," % i)
2627
2628 f.write("]),")
2629 f.write("}\n")
2630
2631 f.write("""
2632_lr_goto = { }
2633for _k, _v in _lr_goto_items.items():
2634 for _x,_y in zip(_v[0],_v[1]):
2635 if not _x in _lr_goto: _lr_goto[_x] = { }
2636 _lr_goto[_x][_k] = _y
2637del _lr_goto_items
2638""")
2639 else:
2640 f.write("\n_lr_goto = { ");
2641 for k,v in self.lr_goto.items():
2642 f.write("(%r,%r):%r," % (k[0],k[1],v))
2643 f.write("}\n");
2644
2645 # Write production table
2646 f.write("_lr_productions = [\n")
2647 for p in self.lr_productions:
2648 if p.func:
2649 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2650 else:
2651 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2652 f.write("]\n")
2653 f.close()
2654
2655 except IOError:
2656 e = sys.exc_info()[1]
2657 sys.stderr.write("Unable to create '%s'\n" % filename)
2658 sys.stderr.write(str(e)+"\n")
2659 return
2660
2661
2662 # -----------------------------------------------------------------------------
2663 # pickle_table()
2664 #
2665 # This function pickles the LR parsing tables to a supplied file object
2666 # -----------------------------------------------------------------------------
2667
2668 def pickle_table(self,filename,signature=""):
2669 try:
2670 import cPickle as pickle
2671 except ImportError:
2672 import pickle
2673 outf = open(filename,"wb")
2674 pickle.dump(__tabversion__,outf,pickle_protocol)
2675 pickle.dump(self.lr_method,outf,pickle_protocol)
2676 pickle.dump(signature,outf,pickle_protocol)
2677 pickle.dump(self.lr_action,outf,pickle_protocol)
2678 pickle.dump(self.lr_goto,outf,pickle_protocol)
2679
2680 outp = []
2681 for p in self.lr_productions:
2682 if p.func:
2683 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2684 else:
2685 outp.append((str(p),p.name,p.len,None,None,None))
2686 pickle.dump(outp,outf,pickle_protocol)
2687 outf.close()
2688
2689# -----------------------------------------------------------------------------
2690# === INTROSPECTION ===
2691#
2692# The following functions and classes are used to implement the PLY
2693# introspection features followed by the yacc() function itself.
2694# -----------------------------------------------------------------------------
2695
2696# -----------------------------------------------------------------------------
2697# get_caller_module_dict()
2698#
2699# This function returns a dictionary containing all of the symbols defined within
2700# a caller further down the call stack. This is used to get the environment
2701# associated with the yacc() call if none was provided.
2702# -----------------------------------------------------------------------------
2703
2704def get_caller_module_dict(levels):
2705 try:
2706 raise RuntimeError
2707 except RuntimeError:
2708 e,b,t = sys.exc_info()
2709 f = t.tb_frame
2710 while levels > 0:
2711 f = f.f_back
2712 levels -= 1
2713 ldict = f.f_globals.copy()
2714 if f.f_globals != f.f_locals:
2715 ldict.update(f.f_locals)
2716
2717 return ldict
2718
2719# -----------------------------------------------------------------------------
2720# parse_grammar()
2721#
2722# This takes a raw grammar rule string and parses it into production data
2723# -----------------------------------------------------------------------------
2724def parse_grammar(doc,file,line):
2725 grammar = []
2726 # Split the doc string into lines
2727 pstrings = doc.splitlines()
2728 lastp = None
2729 dline = line
2730 for ps in pstrings:
2731 dline += 1
2732 p = ps.split()
2733 if not p: continue
2734 try:
2735 if p[0] == '|':
2736 # This is a continuation of a previous rule
2737 if not lastp:
2738 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2739 prodname = lastp
2740 syms = p[1:]
2741 else:
2742 prodname = p[0]
2743 lastp = prodname
2744 syms = p[2:]
2745 assign = p[1]
2746 if assign != ':' and assign != '::=':
2747 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2748
2749 grammar.append((file,dline,prodname,syms))
2750 except SyntaxError:
2751 raise
2752 except Exception:
2753 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
2754
2755 return grammar
2756
2757# -----------------------------------------------------------------------------
2758# ParserReflect()
2759#
2760# This class represents information extracted for building a parser including
2761# start symbol, error function, tokens, precedence list, action functions,
2762# etc.
2763# -----------------------------------------------------------------------------
2764class ParserReflect(object):
2765 def __init__(self,pdict,log=None):
2766 self.pdict = pdict
2767 self.start = None
2768 self.error_func = None
2769 self.tokens = None
2770 self.files = {}
2771 self.grammar = []
2772 self.error = 0
2773
2774 if log is None:
2775 self.log = PlyLogger(sys.stderr)
2776 else:
2777 self.log = log
2778
2779 # Get all of the basic information
2780 def get_all(self):
2781 self.get_start()
2782 self.get_error_func()
2783 self.get_tokens()
2784 self.get_precedence()
2785 self.get_pfunctions()
2786
2787 # Validate all of the information
2788 def validate_all(self):
2789 self.validate_start()
2790 self.validate_error_func()
2791 self.validate_tokens()
2792 self.validate_precedence()
2793 self.validate_pfunctions()
2794 self.validate_files()
2795 return self.error
2796
2797 # Compute a signature over the grammar
2798 def signature(self):
2799 try:
2800 from hashlib import md5
2801 except ImportError:
2802 from md5 import md5
2803 try:
2804 sig = md5()
2805 if self.start:
2806 sig.update(self.start.encode('latin-1'))
2807 if self.prec:
2808 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2809 if self.tokens:
2810 sig.update(" ".join(self.tokens).encode('latin-1'))
2811 for f in self.pfuncs:
2812 if f[3]:
2813 sig.update(f[3].encode('latin-1'))
2814 except (TypeError,ValueError):
2815 pass
2816 return sig.digest()
2817
2818 # -----------------------------------------------------------------------------
2819 # validate_file()
2820 #
2821 # This method checks to see if there are duplicated p_rulename() functions
2822 # in the parser module file. Without this function, it is really easy for
2823 # users to make mistakes by cutting and pasting code fragments (and it's a real
2824 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2825 # we just do a little regular expression pattern matching of def statements
2826 # to try and detect duplicates.
2827 # -----------------------------------------------------------------------------
2828
2829 def validate_files(self):
2830 # Match def p_funcname(
2831 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2832
2833 for filename in self.files.keys():
2834 base,ext = os.path.splitext(filename)
2835 if ext != '.py': return 1 # No idea. Assume it's okay.
2836
2837 try:
2838 f = open(filename)
2839 lines = f.readlines()
2840 f.close()
2841 except IOError:
2842 continue
2843
2844 counthash = { }
2845 for linen,l in enumerate(lines):
2846 linen += 1
2847 m = fre.match(l)
2848 if m:
2849 name = m.group(1)
2850 prev = counthash.get(name)
2851 if not prev:
2852 counthash[name] = linen
2853 else:
2854 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2855
2856 # Get the start symbol
2857 def get_start(self):
2858 self.start = self.pdict.get('start')
2859
2860 # Validate the start symbol
2861 def validate_start(self):
2862 if self.start is not None:
2863 if not isinstance(self.start,str):
2864 self.log.error("'start' must be a string")
2865
2866 # Look for error handler
2867 def get_error_func(self):
2868 self.error_func = self.pdict.get('p_error')
2869
2870 # Validate the error function
2871 def validate_error_func(self):
2872 if self.error_func:
2873 if isinstance(self.error_func,types.FunctionType):
2874 ismethod = 0
2875 elif isinstance(self.error_func, types.MethodType):
2876 ismethod = 1
2877 else:
2878 self.log.error("'p_error' defined, but is not a function or method")
2879 self.error = 1
2880 return
2881
2882 eline = func_code(self.error_func).co_firstlineno
2883 efile = func_code(self.error_func).co_filename
2884 self.files[efile] = 1
2885
2886 if (func_code(self.error_func).co_argcount != 1+ismethod):
2887 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2888 self.error = 1
2889
2890 # Get the tokens map
2891 def get_tokens(self):
2892 tokens = self.pdict.get("tokens",None)
2893 if not tokens:
2894 self.log.error("No token list is defined")
2895 self.error = 1
2896 return
2897
2898 if not isinstance(tokens,(list, tuple)):
2899 self.log.error("tokens must be a list or tuple")
2900 self.error = 1
2901 return
2902
2903 if not tokens:
2904 self.log.error("tokens is empty")
2905 self.error = 1
2906 return
2907
2908 self.tokens = tokens
2909
2910 # Validate the tokens
2911 def validate_tokens(self):
2912 # Validate the tokens.
2913 if 'error' in self.tokens:
2914 self.log.error("Illegal token name 'error'. Is a reserved word")
2915 self.error = 1
2916 return
2917
2918 terminals = {}
2919 for n in self.tokens:
2920 if n in terminals:
2921 self.log.warning("Token '%s' multiply defined", n)
2922 terminals[n] = 1
2923
2924 # Get the precedence map (if any)
2925 def get_precedence(self):
2926 self.prec = self.pdict.get("precedence",None)
2927
2928 # Validate and parse the precedence map
2929 def validate_precedence(self):
2930 preclist = []
2931 if self.prec:
2932 if not isinstance(self.prec,(list,tuple)):
2933 self.log.error("precedence must be a list or tuple")
2934 self.error = 1
2935 return
2936 for level,p in enumerate(self.prec):
2937 if not isinstance(p,(list,tuple)):
2938 self.log.error("Bad precedence table")
2939 self.error = 1
2940 return
2941
2942 if len(p) < 2:
2943 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2944 self.error = 1
2945 return
2946 assoc = p[0]
2947 if not isinstance(assoc,str):
2948 self.log.error("precedence associativity must be a string")
2949 self.error = 1
2950 return
2951 for term in p[1:]:
2952 if not isinstance(term,str):
2953 self.log.error("precedence items must be strings")
2954 self.error = 1
2955 return
2956 preclist.append((term,assoc,level+1))
2957 self.preclist = preclist
2958
2959 # Get all p_functions from the grammar
2960 def get_pfunctions(self):
2961 p_functions = []
2962 for name, item in self.pdict.items():
2963 if name[:2] != 'p_': continue
2964 if name == 'p_error': continue
2965 if isinstance(item,(types.FunctionType,types.MethodType)):
2966 line = func_code(item).co_firstlineno
2967 file = func_code(item).co_filename
2968 p_functions.append((line,file,name,item.__doc__))
2969
2970 # Sort all of the actions by line number
2971 p_functions.sort()
2972 self.pfuncs = p_functions
2973
2974
2975 # Validate all of the p_functions
2976 def validate_pfunctions(self):
2977 grammar = []
2978 # Check for non-empty symbols
2979 if len(self.pfuncs) == 0:
2980 self.log.error("no rules of the form p_rulename are defined")
2981 self.error = 1
2982 return
2983
2984 for line, file, name, doc in self.pfuncs:
2985 func = self.pdict[name]
2986 if isinstance(func, types.MethodType):
2987 reqargs = 2
2988 else:
2989 reqargs = 1
2990 if func_code(func).co_argcount > reqargs:
2991 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
2992 self.error = 1
2993 elif func_code(func).co_argcount < reqargs:
2994 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
2995 self.error = 1
2996 elif not func.__doc__:
2997 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
2998 else:
2999 try:
3000 parsed_g = parse_grammar(doc,file,line)
3001 for g in parsed_g:
3002 grammar.append((name, g))
3003 except SyntaxError:
3004 e = sys.exc_info()[1]
3005 self.log.error(str(e))
3006 self.error = 1
3007
3008 # Looks like a valid grammar rule
3009 # Mark the file in which defined.
3010 self.files[file] = 1
3011
3012 # Secondary validation step that looks for p_ definitions that are not functions
3013 # or functions that look like they might be grammar rules.
3014
3015 for n,v in self.pdict.items():
3016 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
3017 if n[0:2] == 't_': continue
3018 if n[0:2] == 'p_' and n != 'p_error':
3019 self.log.warning("'%s' not defined as a function", n)
3020 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3021 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3022 try:
3023 doc = v.__doc__.split(" ")
3024 if doc[1] == ':':
3025 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3026 func_code(v).co_filename, func_code(v).co_firstlineno,n)
3027 except Exception:
3028 pass
3029
3030 self.grammar = grammar
3031
3032# -----------------------------------------------------------------------------
3033# yacc(module)
3034#
3035# Build a parser
3036# -----------------------------------------------------------------------------
3037
3038def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3039 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3040 debuglog=None, errorlog = None, picklefile=None):
3041
3042 global parse # Reference to the parsing method of the last built parser
3043
3044 # If pickling is enabled, table files are not created
3045
3046 if picklefile:
3047 write_tables = 0
3048
3049 if errorlog is None:
3050 errorlog = PlyLogger(sys.stderr)
3051
3052 # Get the module dictionary used for the parser
3053 if module:
3054 _items = [(k,getattr(module,k)) for k in dir(module)]
3055 pdict = dict(_items)
3056 else:
3057 pdict = get_caller_module_dict(2)
3058
3059 # Collect parser information from the dictionary
3060 pinfo = ParserReflect(pdict,log=errorlog)
3061 pinfo.get_all()
3062
3063 if pinfo.error:
3064 raise YaccError("Unable to build parser")
3065
3066 # Check signature against table files (if any)
3067 signature = pinfo.signature()
3068
3069 # Read the tables
3070 try:
3071 lr = LRTable()
3072 if picklefile:
3073 read_signature = lr.read_pickle(picklefile)
3074 else:
3075 read_signature = lr.read_table(tabmodule)
3076 if optimize or (read_signature == signature):
3077 try:
3078 lr.bind_callables(pinfo.pdict)
3079 parser = LRParser(lr,pinfo.error_func)
3080 parse = parser.parse
3081 return parser
3082 except Exception:
3083 e = sys.exc_info()[1]
3084 errorlog.warning("There was a problem loading the table file: %s", repr(e))
3085 except VersionError:
3086 e = sys.exc_info()
3087 errorlog.warning(str(e))
3088 except Exception:
3089 pass
3090
3091 if debuglog is None:
3092 if debug:
3093 debuglog = PlyLogger(open(debugfile,"w"))
3094 else:
3095 debuglog = NullLogger()
3096
3097 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3098
3099
3100 errors = 0
3101
3102 # Validate the parser information
3103 if pinfo.validate_all():
3104 raise YaccError("Unable to build parser")
3105
3106 if not pinfo.error_func:
3107 errorlog.warning("no p_error() function is defined")
3108
3109 # Create a grammar object
3110 grammar = Grammar(pinfo.tokens)
3111
3112 # Set precedence level for terminals
3113 for term, assoc, level in pinfo.preclist:
3114 try:
3115 grammar.set_precedence(term,assoc,level)
3116 except GrammarError:
3117 e = sys.exc_info()[1]
3118 errorlog.warning("%s",str(e))
3119
3120 # Add productions to the grammar
3121 for funcname, gram in pinfo.grammar:
3122 file, line, prodname, syms = gram
3123 try:
3124 grammar.add_production(prodname,syms,funcname,file,line)
3125 except GrammarError:
3126 e = sys.exc_info()[1]
3127 errorlog.error("%s",str(e))
3128 errors = 1
3129
3130 # Set the grammar start symbols
3131 try:
3132 if start is None:
3133 grammar.set_start(pinfo.start)
3134 else:
3135 grammar.set_start(start)
3136 except GrammarError:
3137 e = sys.exc_info()[1]
3138 errorlog.error(str(e))
3139 errors = 1
3140
3141 if errors:
3142 raise YaccError("Unable to build parser")
3143
3144 # Verify the grammar structure
3145 undefined_symbols = grammar.undefined_symbols()
3146 for sym, prod in undefined_symbols:
3147 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3148 errors = 1
3149
3150 unused_terminals = grammar.unused_terminals()
3151 if unused_terminals:
3152 debuglog.info("")
3153 debuglog.info("Unused terminals:")
3154 debuglog.info("")
3155 for term in unused_terminals:
3156 errorlog.warning("Token '%s' defined, but not used", term)
3157 debuglog.info(" %s", term)
3158
3159 # Print out all productions to the debug log
3160 if debug:
3161 debuglog.info("")
3162 debuglog.info("Grammar")
3163 debuglog.info("")
3164 for n,p in enumerate(grammar.Productions):
3165 debuglog.info("Rule %-5d %s", n, p)
3166
3167 # Find unused non-terminals
3168 unused_rules = grammar.unused_rules()
3169 for prod in unused_rules:
3170 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
3171
3172 if len(unused_terminals) == 1:
3173 errorlog.warning("There is 1 unused token")
3174 if len(unused_terminals) > 1:
3175 errorlog.warning("There are %d unused tokens", len(unused_terminals))
3176
3177 if len(unused_rules) == 1:
3178 errorlog.warning("There is 1 unused rule")
3179 if len(unused_rules) > 1:
3180 errorlog.warning("There are %d unused rules", len(unused_rules))
3181
3182 if debug:
3183 debuglog.info("")
3184 debuglog.info("Terminals, with rules where they appear")
3185 debuglog.info("")
3186 terms = list(grammar.Terminals)
3187 terms.sort()
3188 for term in terms:
3189 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3190
3191 debuglog.info("")
3192 debuglog.info("Nonterminals, with rules where they appear")
3193 debuglog.info("")
3194 nonterms = list(grammar.Nonterminals)
3195 nonterms.sort()
3196 for nonterm in nonterms:
3197 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3198 debuglog.info("")
3199
3200 if check_recursion:
3201 unreachable = grammar.find_unreachable()
3202 for u in unreachable:
3203 errorlog.warning("Symbol '%s' is unreachable",u)
3204
3205 infinite = grammar.infinite_cycles()
3206 for inf in infinite:
3207 errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3208 errors = 1
3209
3210 unused_prec = grammar.unused_precedence()
3211 for term, assoc in unused_prec:
3212 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
3213 errors = 1
3214
3215 if errors:
3216 raise YaccError("Unable to build parser")
3217
3218 # Run the LRGeneratedTable on the grammar
3219 if debug:
3220 errorlog.debug("Generating %s tables", method)
3221
3222 lr = LRGeneratedTable(grammar,method,debuglog)
3223
3224 if debug:
3225 num_sr = len(lr.sr_conflicts)
3226
3227 # Report shift/reduce and reduce/reduce conflicts
3228 if num_sr == 1:
3229 errorlog.warning("1 shift/reduce conflict")
3230 elif num_sr > 1:
3231 errorlog.warning("%d shift/reduce conflicts", num_sr)
3232
3233 num_rr = len(lr.rr_conflicts)
3234 if num_rr == 1:
3235 errorlog.warning("1 reduce/reduce conflict")
3236 elif num_rr > 1:
3237 errorlog.warning("%d reduce/reduce conflicts", num_rr)
3238
3239 # Write out conflicts to the output file
3240 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3241 debuglog.warning("")
3242 debuglog.warning("Conflicts:")
3243 debuglog.warning("")
3244
3245 for state, tok, resolution in lr.sr_conflicts:
3246 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
3247
3248 already_reported = {}
3249 for state, rule, rejected in lr.rr_conflicts:
3250 if (state,id(rule),id(rejected)) in already_reported:
3251 continue
3252 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3253 debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3254 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3255 errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3256 already_reported[state,id(rule),id(rejected)] = 1
3257
3258 warned_never = []
3259 for state, rule, rejected in lr.rr_conflicts:
3260 if not rejected.reduced and (rejected not in warned_never):
3261 debuglog.warning("Rule (%s) is never reduced", rejected)
3262 errorlog.warning("Rule (%s) is never reduced", rejected)
3263 warned_never.append(rejected)
3264
3265 # Write the table file if requested
3266 if write_tables:
3267 lr.write_table(tabmodule,outputdir,signature)
3268
3269 # Write a pickled version of the tables
3270 if picklefile:
3271 lr.pickle_table(picklefile,signature)
3272
3273 # Build the parser
3274 lr.bind_callables(pinfo.pdict)
3275 parser = LRParser(lr,pinfo.error_func)
3276
3277 parse = parser.parse
3278 return parser