blob: 6168fd9a037621b03720d65fd1aa4e4a81b3f0de [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2009,
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammer is specified by supplying the BNF inside
36# Python documentation strings. The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist). However, most of the variables used during table
43# construction are defined in terms of global variables. Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style." Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
61
62__version__ = "3.3"
63__tabversion__ = "3.2" # Table version
64
65#-----------------------------------------------------------------------------
66# === User configurable parameters ===
67#
68# Change these to modify the default behavior of yacc (if you wish)
69#-----------------------------------------------------------------------------
70
71yaccdebug = 0 # Debugging mode. If set, yacc generates a
72 # a 'parser.out' file in the current directory
73
74debug_file = 'parser.out' # Default name of the debugging file
75tab_module = 'parsetab' # Default name of the table module
76default_lr = 'LALR' # Default LR table generation method
77
78error_count = 3 # Number of symbols that must be shifted to leave recovery mode
79
80yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
81 # implementations of certain functions.
82
83resultlimit = 40 # Size limit of results when running in debug mode.
84
85pickle_protocol = 0 # Protocol to use when writing pickle files
86
87import re, types, sys, os.path
88
89# Compatibility function for python 2.6/3.0
90if sys.version_info[0] < 3:
91 def func_code(f):
92 return f.func_code
93else:
94 def func_code(f):
95 return f.__code__
96
97# Compatibility
98try:
99 MAXINT = sys.maxint
100except AttributeError:
101 MAXINT = sys.maxsize
102
103# Python 2.x/3.0 compatibility.
104def load_ply_lex():
105 if sys.version_info[0] < 3:
106 import lex
107 else:
108 import ply.lex as lex
109 return lex
110
111# This object is a stand-in for a logging object created by the
112# logging module. PLY will use this by default to create things
113# such as the parser.out file. If a user wants more detailed
114# information, they can create their own logging object and pass
115# it into PLY.
116
117class PlyLogger(object):
118 def __init__(self,f):
119 self.f = f
120 def debug(self,msg,*args,**kwargs):
121 self.f.write((msg % args) + "\n")
122 info = debug
123
124 def warning(self,msg,*args,**kwargs):
125 self.f.write("WARNING: "+ (msg % args) + "\n")
126
127 def error(self,msg,*args,**kwargs):
128 self.f.write("ERROR: " + (msg % args) + "\n")
129
130 critical = debug
131
132# Null logger is used when no output is generated. Does nothing.
133class NullLogger(object):
134 def __getattribute__(self,name):
135 return self
136 def __call__(self,*args,**kwargs):
137 return self
138
139# Exception raised for yacc-related errors
140class YaccError(Exception): pass
141
142# Format the result message that the parser produces when running in debug mode.
143def format_result(r):
144 repr_str = repr(r)
145 if '\n' in repr_str: repr_str = repr(repr_str)
146 if len(repr_str) > resultlimit:
147 repr_str = repr_str[:resultlimit]+" ..."
148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
149 return result
150
151
152# Format stack entries when the parser is running in debug mode
153def format_stack_entry(r):
154 repr_str = repr(r)
155 if '\n' in repr_str: repr_str = repr(repr_str)
156 if len(repr_str) < 16:
157 return repr_str
158 else:
159 return "<%s @ 0x%x>" % (type(r).__name__,id(r))
160
161#-----------------------------------------------------------------------------
162# === LR Parsing Engine ===
163#
164# The following classes are used for the LR parser itself. These are not
165# used during table construction and are independent of the actual LR
166# table generation algorithm
167#-----------------------------------------------------------------------------
168
169# This class is used to hold non-terminal grammar symbols during parsing.
170# It normally has the following attributes set:
171# .type = Grammar symbol type
172# .value = Symbol value
173# .lineno = Starting line number
174# .endlineno = Ending line number (optional, set automatically)
175# .lexpos = Starting lex position
176# .endlexpos = Ending lex position (optional, set automatically)
177
178class YaccSymbol:
179 def __str__(self): return self.type
180 def __repr__(self): return str(self)
181
182# This class is a wrapper around the objects actually passed to each
183# grammar rule. Index lookup and assignment actually assign the
184# .value attribute of the underlying YaccSymbol object.
185# The lineno() method returns the line number of a given
186# item (or 0 if not defined). The linespan() method returns
187# a tuple of (startline,endline) representing the range of lines
188# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
189# representing the range of positional information for a symbol.
190
191class YaccProduction:
192 def __init__(self,s,stack=None):
193 self.slice = s
194 self.stack = stack
195 self.lexer = None
196 self.parser= None
197 def __getitem__(self,n):
198 if n >= 0: return self.slice[n].value
199 else: return self.stack[n].value
200
201 def __setitem__(self,n,v):
202 self.slice[n].value = v
203
204 def __getslice__(self,i,j):
205 return [s.value for s in self.slice[i:j]]
206
207 def __len__(self):
208 return len(self.slice)
209
210 def lineno(self,n):
211 return getattr(self.slice[n],"lineno",0)
212
213 def set_lineno(self,n,lineno):
214 self.slice[n].lineno = lineno
215
216 def linespan(self,n):
217 startline = getattr(self.slice[n],"lineno",0)
218 endline = getattr(self.slice[n],"endlineno",startline)
219 return startline,endline
220
221 def lexpos(self,n):
222 return getattr(self.slice[n],"lexpos",0)
223
224 def lexspan(self,n):
225 startpos = getattr(self.slice[n],"lexpos",0)
226 endpos = getattr(self.slice[n],"endlexpos",startpos)
227 return startpos,endpos
228
229 def error(self):
230 raise SyntaxError
231
232
233# -----------------------------------------------------------------------------
234# == LRParser ==
235#
236# The LR Parsing engine.
237# -----------------------------------------------------------------------------
238
239class LRParser:
240 def __init__(self,lrtab,errorf):
241 self.productions = lrtab.lr_productions
242 self.action = lrtab.lr_action
243 self.goto = lrtab.lr_goto
244 self.errorfunc = errorf
245
246 def errok(self):
247 self.errorok = 1
248
249 def restart(self):
250 del self.statestack[:]
251 del self.symstack[:]
252 sym = YaccSymbol()
253 sym.type = '$end'
254 self.symstack.append(sym)
255 self.statestack.append(0)
256
257 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
258 if debug or yaccdevel:
259 if isinstance(debug,int):
260 debug = PlyLogger(sys.stderr)
261 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
262 elif tracking:
263 return self.parseopt(input,lexer,debug,tracking,tokenfunc)
264 else:
265 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
266
267
268 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
269 # parsedebug().
270 #
271 # This is the debugging enabled version of parse(). All changes made to the
272 # parsing engine should be made here. For the non-debugging version,
273 # copy this code to a method parseopt() and delete all of the sections
274 # enclosed in:
275 #
276 # #--! DEBUG
277 # statements
278 # #--! DEBUG
279 #
280 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
281
282 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
283 lookahead = None # Current lookahead symbol
284 lookaheadstack = [ ] # Stack of lookahead symbols
285 actions = self.action # Local reference to action table (to avoid lookup on self.)
286 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
287 prod = self.productions # Local reference to production list (to avoid lookup on self.)
288 pslice = YaccProduction(None) # Production object passed to grammar rules
289 errorcount = 0 # Used during error recovery
290
291 # --! DEBUG
292 debug.info("PLY: PARSE DEBUG START")
293 # --! DEBUG
294
295 # If no lexer was given, we will try to use the lex module
296 if not lexer:
297 lex = load_ply_lex()
298 lexer = lex.lexer
299
300 # Set up the lexer and parser objects on pslice
301 pslice.lexer = lexer
302 pslice.parser = self
303
304 # If input was supplied, pass to lexer
305 if input is not None:
306 lexer.input(input)
307
308 if tokenfunc is None:
309 # Tokenize function
310 get_token = lexer.token
311 else:
312 get_token = tokenfunc
313
314 # Set up the state and symbol stacks
315
316 statestack = [ ] # Stack of parsing states
317 self.statestack = statestack
318 symstack = [ ] # Stack of grammar symbols
319 self.symstack = symstack
320
321 pslice.stack = symstack # Put in the production
322 errtoken = None # Err token
323
324 # The start state is assumed to be (0,$end)
325
326 statestack.append(0)
327 sym = YaccSymbol()
328 sym.type = "$end"
329 symstack.append(sym)
330 state = 0
331 while 1:
332 # Get the next symbol on the input. If a lookahead symbol
333 # is already set, we just use that. Otherwise, we'll pull
334 # the next token off of the lookaheadstack or from the lexer
335
336 # --! DEBUG
337 debug.debug('')
338 debug.debug('State : %s', state)
339 # --! DEBUG
340
341 if not lookahead:
342 if not lookaheadstack:
343 lookahead = get_token() # Get the next token
344 else:
345 lookahead = lookaheadstack.pop()
346 if not lookahead:
347 lookahead = YaccSymbol()
348 lookahead.type = "$end"
349
350 # --! DEBUG
351 debug.debug('Stack : %s',
352 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
353 # --! DEBUG
354
355 # Check the action table
356 ltype = lookahead.type
357 t = actions[state].get(ltype)
358
359 if t is not None:
360 if t > 0:
361 # shift a symbol on the stack
362 statestack.append(t)
363 state = t
364
365 # --! DEBUG
366 debug.debug("Action : Shift and goto state %s", t)
367 # --! DEBUG
368
369 symstack.append(lookahead)
370 lookahead = None
371
372 # Decrease error count on successful shift
373 if errorcount: errorcount -=1
374 continue
375
376 if t < 0:
377 # reduce a symbol on the stack, emit a production
378 p = prod[-t]
379 pname = p.name
380 plen = p.len
381
382 # Get production function
383 sym = YaccSymbol()
384 sym.type = pname # Production name
385 sym.value = None
386
387 # --! DEBUG
388 if plen:
389 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
390 else:
391 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
392
393 # --! DEBUG
394
395 if plen:
396 targ = symstack[-plen-1:]
397 targ[0] = sym
398
399 # --! TRACKING
400 if tracking:
401 t1 = targ[1]
402 sym.lineno = t1.lineno
403 sym.lexpos = t1.lexpos
404 t1 = targ[-1]
405 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
406 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
407
408 # --! TRACKING
409
410 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
411 # The code enclosed in this section is duplicated
412 # below as a performance optimization. Make sure
413 # changes get made in both locations.
414
415 pslice.slice = targ
416
417 try:
418 # Call the grammar rule with our special slice object
419 del symstack[-plen:]
420 del statestack[-plen:]
421 p.callable(pslice)
422 # --! DEBUG
423 debug.info("Result : %s", format_result(pslice[0]))
424 # --! DEBUG
425 symstack.append(sym)
426 state = goto[statestack[-1]][pname]
427 statestack.append(state)
428 except SyntaxError:
429 # If an error was set. Enter error recovery state
430 lookaheadstack.append(lookahead)
431 symstack.pop()
432 statestack.pop()
433 state = statestack[-1]
434 sym.type = 'error'
435 lookahead = sym
436 errorcount = error_count
437 self.errorok = 0
438 continue
439 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
440
441 else:
442
443 # --! TRACKING
444 if tracking:
445 sym.lineno = lexer.lineno
446 sym.lexpos = lexer.lexpos
447 # --! TRACKING
448
449 targ = [ sym ]
450
451 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
452 # The code enclosed in this section is duplicated
453 # above as a performance optimization. Make sure
454 # changes get made in both locations.
455
456 pslice.slice = targ
457
458 try:
459 # Call the grammar rule with our special slice object
460 p.callable(pslice)
461 # --! DEBUG
462 debug.info("Result : %s", format_result(pslice[0]))
463 # --! DEBUG
464 symstack.append(sym)
465 state = goto[statestack[-1]][pname]
466 statestack.append(state)
467 except SyntaxError:
468 # If an error was set. Enter error recovery state
469 lookaheadstack.append(lookahead)
470 symstack.pop()
471 statestack.pop()
472 state = statestack[-1]
473 sym.type = 'error'
474 lookahead = sym
475 errorcount = error_count
476 self.errorok = 0
477 continue
478 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
479
480 if t == 0:
481 n = symstack[-1]
482 result = getattr(n,"value",None)
483 # --! DEBUG
484 debug.info("Done : Returning %s", format_result(result))
485 debug.info("PLY: PARSE DEBUG END")
486 # --! DEBUG
487 return result
488
489 if t == None:
490
491 # --! DEBUG
492 debug.error('Error : %s',
493 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
494 # --! DEBUG
495
496 # We have some kind of parsing error here. To handle
497 # this, we are going to push the current token onto
498 # the tokenstack and replace it with an 'error' token.
499 # If there are any synchronization rules, they may
500 # catch it.
501 #
502 # In addition to pushing the error token, we call call
503 # the user defined p_error() function if this is the
504 # first syntax error. This function is only called if
505 # errorcount == 0.
506 if errorcount == 0 or self.errorok:
507 errorcount = error_count
508 self.errorok = 0
509 errtoken = lookahead
510 if errtoken.type == "$end":
511 errtoken = None # End of file!
512 if self.errorfunc:
513 global errok,token,restart
514 errok = self.errok # Set some special functions available in error recovery
515 token = get_token
516 restart = self.restart
517 if errtoken and not hasattr(errtoken,'lexer'):
518 errtoken.lexer = lexer
519 tok = self.errorfunc(errtoken)
520 del errok, token, restart # Delete special functions
521
522 if self.errorok:
523 # User must have done some kind of panic
524 # mode recovery on their own. The
525 # returned token is the next lookahead
526 lookahead = tok
527 errtoken = None
528 continue
529 else:
530 if errtoken:
531 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
532 else: lineno = 0
533 if lineno:
534 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
535 else:
536 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
537 else:
538 sys.stderr.write("yacc: Parse error in input. EOF\n")
539 return
540
541 else:
542 errorcount = error_count
543
544 # case 1: the statestack only has 1 entry on it. If we're in this state, the
545 # entire parse has been rolled back and we're completely hosed. The token is
546 # discarded and we just keep going.
547
548 if len(statestack) <= 1 and lookahead.type != "$end":
549 lookahead = None
550 errtoken = None
551 state = 0
552 # Nuke the pushback stack
553 del lookaheadstack[:]
554 continue
555
556 # case 2: the statestack has a couple of entries on it, but we're
557 # at the end of the file. nuke the top entry and generate an error token
558
559 # Start nuking entries on the stack
560 if lookahead.type == "$end":
561 # Whoa. We're really hosed here. Bail out
562 return
563
564 if lookahead.type != 'error':
565 sym = symstack[-1]
566 if sym.type == 'error':
567 # Hmmm. Error is on top of stack, we'll just nuke input
568 # symbol and continue
569 lookahead = None
570 continue
571 t = YaccSymbol()
572 t.type = 'error'
573 if hasattr(lookahead,"lineno"):
574 t.lineno = lookahead.lineno
575 t.value = lookahead
576 lookaheadstack.append(lookahead)
577 lookahead = t
578 else:
579 symstack.pop()
580 statestack.pop()
581 state = statestack[-1] # Potential bug fix
582
583 continue
584
585 # Call an error function here
586 raise RuntimeError("yacc: internal parser error!!!\n")
587
588 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
589 # parseopt().
590 #
591 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
592 # Edit the debug version above, then copy any modifications to the method
593 # below while removing #--! DEBUG sections.
594 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
595
596
597 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
598 lookahead = None # Current lookahead symbol
599 lookaheadstack = [ ] # Stack of lookahead symbols
600 actions = self.action # Local reference to action table (to avoid lookup on self.)
601 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
602 prod = self.productions # Local reference to production list (to avoid lookup on self.)
603 pslice = YaccProduction(None) # Production object passed to grammar rules
604 errorcount = 0 # Used during error recovery
605
606 # If no lexer was given, we will try to use the lex module
607 if not lexer:
608 lex = load_ply_lex()
609 lexer = lex.lexer
610
611 # Set up the lexer and parser objects on pslice
612 pslice.lexer = lexer
613 pslice.parser = self
614
615 # If input was supplied, pass to lexer
616 if input is not None:
617 lexer.input(input)
618
619 if tokenfunc is None:
620 # Tokenize function
621 get_token = lexer.token
622 else:
623 get_token = tokenfunc
624
625 # Set up the state and symbol stacks
626
627 statestack = [ ] # Stack of parsing states
628 self.statestack = statestack
629 symstack = [ ] # Stack of grammar symbols
630 self.symstack = symstack
631
632 pslice.stack = symstack # Put in the production
633 errtoken = None # Err token
634
635 # The start state is assumed to be (0,$end)
636
637 statestack.append(0)
638 sym = YaccSymbol()
639 sym.type = '$end'
640 symstack.append(sym)
641 state = 0
642 while 1:
643 # Get the next symbol on the input. If a lookahead symbol
644 # is already set, we just use that. Otherwise, we'll pull
645 # the next token off of the lookaheadstack or from the lexer
646
647 if not lookahead:
648 if not lookaheadstack:
649 lookahead = get_token() # Get the next token
650 else:
651 lookahead = lookaheadstack.pop()
652 if not lookahead:
653 lookahead = YaccSymbol()
654 lookahead.type = '$end'
655
656 # Check the action table
657 ltype = lookahead.type
658 t = actions[state].get(ltype)
659
660 if t is not None:
661 if t > 0:
662 # shift a symbol on the stack
663 statestack.append(t)
664 state = t
665
666 symstack.append(lookahead)
667 lookahead = None
668
669 # Decrease error count on successful shift
670 if errorcount: errorcount -=1
671 continue
672
673 if t < 0:
674 # reduce a symbol on the stack, emit a production
675 p = prod[-t]
676 pname = p.name
677 plen = p.len
678
679 # Get production function
680 sym = YaccSymbol()
681 sym.type = pname # Production name
682 sym.value = None
683
684 if plen:
685 targ = symstack[-plen-1:]
686 targ[0] = sym
687
688 # --! TRACKING
689 if tracking:
690 t1 = targ[1]
691 sym.lineno = t1.lineno
692 sym.lexpos = t1.lexpos
693 t1 = targ[-1]
694 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
695 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
696
697 # --! TRACKING
698
699 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
700 # The code enclosed in this section is duplicated
701 # below as a performance optimization. Make sure
702 # changes get made in both locations.
703
704 pslice.slice = targ
705
706 try:
707 # Call the grammar rule with our special slice object
708 del symstack[-plen:]
709 del statestack[-plen:]
710 p.callable(pslice)
711 symstack.append(sym)
712 state = goto[statestack[-1]][pname]
713 statestack.append(state)
714 except SyntaxError:
715 # If an error was set. Enter error recovery state
716 lookaheadstack.append(lookahead)
717 symstack.pop()
718 statestack.pop()
719 state = statestack[-1]
720 sym.type = 'error'
721 lookahead = sym
722 errorcount = error_count
723 self.errorok = 0
724 continue
725 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
726
727 else:
728
729 # --! TRACKING
730 if tracking:
731 sym.lineno = lexer.lineno
732 sym.lexpos = lexer.lexpos
733 # --! TRACKING
734
735 targ = [ sym ]
736
737 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
738 # The code enclosed in this section is duplicated
739 # above as a performance optimization. Make sure
740 # changes get made in both locations.
741
742 pslice.slice = targ
743
744 try:
745 # Call the grammar rule with our special slice object
746 p.callable(pslice)
747 symstack.append(sym)
748 state = goto[statestack[-1]][pname]
749 statestack.append(state)
750 except SyntaxError:
751 # If an error was set. Enter error recovery state
752 lookaheadstack.append(lookahead)
753 symstack.pop()
754 statestack.pop()
755 state = statestack[-1]
756 sym.type = 'error'
757 lookahead = sym
758 errorcount = error_count
759 self.errorok = 0
760 continue
761 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
762
763 if t == 0:
764 n = symstack[-1]
765 return getattr(n,"value",None)
766
767 if t == None:
768
769 # We have some kind of parsing error here. To handle
770 # this, we are going to push the current token onto
771 # the tokenstack and replace it with an 'error' token.
772 # If there are any synchronization rules, they may
773 # catch it.
774 #
775 # In addition to pushing the error token, we call call
776 # the user defined p_error() function if this is the
777 # first syntax error. This function is only called if
778 # errorcount == 0.
779 if errorcount == 0 or self.errorok:
780 errorcount = error_count
781 self.errorok = 0
782 errtoken = lookahead
783 if errtoken.type == '$end':
784 errtoken = None # End of file!
785 if self.errorfunc:
786 global errok,token,restart
787 errok = self.errok # Set some special functions available in error recovery
788 token = get_token
789 restart = self.restart
790 if errtoken and not hasattr(errtoken,'lexer'):
791 errtoken.lexer = lexer
792 tok = self.errorfunc(errtoken)
793 del errok, token, restart # Delete special functions
794
795 if self.errorok:
796 # User must have done some kind of panic
797 # mode recovery on their own. The
798 # returned token is the next lookahead
799 lookahead = tok
800 errtoken = None
801 continue
802 else:
803 if errtoken:
804 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
805 else: lineno = 0
806 if lineno:
807 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
808 else:
809 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
810 else:
811 sys.stderr.write("yacc: Parse error in input. EOF\n")
812 return
813
814 else:
815 errorcount = error_count
816
817 # case 1: the statestack only has 1 entry on it. If we're in this state, the
818 # entire parse has been rolled back and we're completely hosed. The token is
819 # discarded and we just keep going.
820
821 if len(statestack) <= 1 and lookahead.type != '$end':
822 lookahead = None
823 errtoken = None
824 state = 0
825 # Nuke the pushback stack
826 del lookaheadstack[:]
827 continue
828
829 # case 2: the statestack has a couple of entries on it, but we're
830 # at the end of the file. nuke the top entry and generate an error token
831
832 # Start nuking entries on the stack
833 if lookahead.type == '$end':
834 # Whoa. We're really hosed here. Bail out
835 return
836
837 if lookahead.type != 'error':
838 sym = symstack[-1]
839 if sym.type == 'error':
840 # Hmmm. Error is on top of stack, we'll just nuke input
841 # symbol and continue
842 lookahead = None
843 continue
844 t = YaccSymbol()
845 t.type = 'error'
846 if hasattr(lookahead,"lineno"):
847 t.lineno = lookahead.lineno
848 t.value = lookahead
849 lookaheadstack.append(lookahead)
850 lookahead = t
851 else:
852 symstack.pop()
853 statestack.pop()
854 state = statestack[-1] # Potential bug fix
855
856 continue
857
858 # Call an error function here
859 raise RuntimeError("yacc: internal parser error!!!\n")
860
861 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
862 # parseopt_notrack().
863 #
864 # Optimized version of parseopt() with line number tracking removed.
865 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
866 # code in the #--! TRACKING sections
867 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
868
869 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
870 lookahead = None # Current lookahead symbol
871 lookaheadstack = [ ] # Stack of lookahead symbols
872 actions = self.action # Local reference to action table (to avoid lookup on self.)
873 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
874 prod = self.productions # Local reference to production list (to avoid lookup on self.)
875 pslice = YaccProduction(None) # Production object passed to grammar rules
876 errorcount = 0 # Used during error recovery
877
878 # If no lexer was given, we will try to use the lex module
879 if not lexer:
880 lex = load_ply_lex()
881 lexer = lex.lexer
882
883 # Set up the lexer and parser objects on pslice
884 pslice.lexer = lexer
885 pslice.parser = self
886
887 # If input was supplied, pass to lexer
888 if input is not None:
889 lexer.input(input)
890
891 if tokenfunc is None:
892 # Tokenize function
893 get_token = lexer.token
894 else:
895 get_token = tokenfunc
896
897 # Set up the state and symbol stacks
898
899 statestack = [ ] # Stack of parsing states
900 self.statestack = statestack
901 symstack = [ ] # Stack of grammar symbols
902 self.symstack = symstack
903
904 pslice.stack = symstack # Put in the production
905 errtoken = None # Err token
906
907 # The start state is assumed to be (0,$end)
908
909 statestack.append(0)
910 sym = YaccSymbol()
911 sym.type = '$end'
912 symstack.append(sym)
913 state = 0
914 while 1:
915 # Get the next symbol on the input. If a lookahead symbol
916 # is already set, we just use that. Otherwise, we'll pull
917 # the next token off of the lookaheadstack or from the lexer
918
919 if not lookahead:
920 if not lookaheadstack:
921 lookahead = get_token() # Get the next token
922 else:
923 lookahead = lookaheadstack.pop()
924 if not lookahead:
925 lookahead = YaccSymbol()
926 lookahead.type = '$end'
927
928 # Check the action table
929 ltype = lookahead.type
930 t = actions[state].get(ltype)
931
932 if t is not None:
933 if t > 0:
934 # shift a symbol on the stack
935 statestack.append(t)
936 state = t
937
938 symstack.append(lookahead)
939 lookahead = None
940
941 # Decrease error count on successful shift
942 if errorcount: errorcount -=1
943 continue
944
945 if t < 0:
946 # reduce a symbol on the stack, emit a production
947 p = prod[-t]
948 pname = p.name
949 plen = p.len
950
951 # Get production function
952 sym = YaccSymbol()
953 sym.type = pname # Production name
954 sym.value = None
955
956 if plen:
957 targ = symstack[-plen-1:]
958 targ[0] = sym
959
960 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
961 # The code enclosed in this section is duplicated
962 # below as a performance optimization. Make sure
963 # changes get made in both locations.
964
965 pslice.slice = targ
966
967 try:
968 # Call the grammar rule with our special slice object
969 del symstack[-plen:]
970 del statestack[-plen:]
971 p.callable(pslice)
972 symstack.append(sym)
973 state = goto[statestack[-1]][pname]
974 statestack.append(state)
975 except SyntaxError:
976 # If an error was set. Enter error recovery state
977 lookaheadstack.append(lookahead)
978 symstack.pop()
979 statestack.pop()
980 state = statestack[-1]
981 sym.type = 'error'
982 lookahead = sym
983 errorcount = error_count
984 self.errorok = 0
985 continue
986 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
987
988 else:
989
990 targ = [ sym ]
991
992 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
993 # The code enclosed in this section is duplicated
994 # above as a performance optimization. Make sure
995 # changes get made in both locations.
996
997 pslice.slice = targ
998
999 try:
1000 # Call the grammar rule with our special slice object
1001 p.callable(pslice)
1002 symstack.append(sym)
1003 state = goto[statestack[-1]][pname]
1004 statestack.append(state)
1005 except SyntaxError:
1006 # If an error was set. Enter error recovery state
1007 lookaheadstack.append(lookahead)
1008 symstack.pop()
1009 statestack.pop()
1010 state = statestack[-1]
1011 sym.type = 'error'
1012 lookahead = sym
1013 errorcount = error_count
1014 self.errorok = 0
1015 continue
1016 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1017
1018 if t == 0:
1019 n = symstack[-1]
1020 return getattr(n,"value",None)
1021
1022 if t == None:
1023
1024 # We have some kind of parsing error here. To handle
1025 # this, we are going to push the current token onto
1026 # the tokenstack and replace it with an 'error' token.
1027 # If there are any synchronization rules, they may
1028 # catch it.
1029 #
1030 # In addition to pushing the error token, we call call
1031 # the user defined p_error() function if this is the
1032 # first syntax error. This function is only called if
1033 # errorcount == 0.
1034 if errorcount == 0 or self.errorok:
1035 errorcount = error_count
1036 self.errorok = 0
1037 errtoken = lookahead
1038 if errtoken.type == '$end':
1039 errtoken = None # End of file!
1040 if self.errorfunc:
1041 global errok,token,restart
1042 errok = self.errok # Set some special functions available in error recovery
1043 token = get_token
1044 restart = self.restart
1045 if errtoken and not hasattr(errtoken,'lexer'):
1046 errtoken.lexer = lexer
1047 tok = self.errorfunc(errtoken)
1048 del errok, token, restart # Delete special functions
1049
1050 if self.errorok:
1051 # User must have done some kind of panic
1052 # mode recovery on their own. The
1053 # returned token is the next lookahead
1054 lookahead = tok
1055 errtoken = None
1056 continue
1057 else:
1058 if errtoken:
1059 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1060 else: lineno = 0
1061 if lineno:
1062 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1063 else:
1064 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1065 else:
1066 sys.stderr.write("yacc: Parse error in input. EOF\n")
1067 return
1068
1069 else:
1070 errorcount = error_count
1071
1072 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1073 # entire parse has been rolled back and we're completely hosed. The token is
1074 # discarded and we just keep going.
1075
1076 if len(statestack) <= 1 and lookahead.type != '$end':
1077 lookahead = None
1078 errtoken = None
1079 state = 0
1080 # Nuke the pushback stack
1081 del lookaheadstack[:]
1082 continue
1083
1084 # case 2: the statestack has a couple of entries on it, but we're
1085 # at the end of the file. nuke the top entry and generate an error token
1086
1087 # Start nuking entries on the stack
1088 if lookahead.type == '$end':
1089 # Whoa. We're really hosed here. Bail out
1090 return
1091
1092 if lookahead.type != 'error':
1093 sym = symstack[-1]
1094 if sym.type == 'error':
1095 # Hmmm. Error is on top of stack, we'll just nuke input
1096 # symbol and continue
1097 lookahead = None
1098 continue
1099 t = YaccSymbol()
1100 t.type = 'error'
1101 if hasattr(lookahead,"lineno"):
1102 t.lineno = lookahead.lineno
1103 t.value = lookahead
1104 lookaheadstack.append(lookahead)
1105 lookahead = t
1106 else:
1107 symstack.pop()
1108 statestack.pop()
1109 state = statestack[-1] # Potential bug fix
1110
1111 continue
1112
1113 # Call an error function here
1114 raise RuntimeError("yacc: internal parser error!!!\n")
1115
1116# -----------------------------------------------------------------------------
1117# === Grammar Representation ===
1118#
1119# The following functions, classes, and variables are used to represent and
1120# manipulate the rules that make up a grammar.
1121# -----------------------------------------------------------------------------
1122
1123import re
1124
1125# regex matching identifiers
1126_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1127
1128# -----------------------------------------------------------------------------
1129# class Production:
1130#
1131# This class stores the raw information about a single production or grammar rule.
1132# A grammar rule refers to a specification such as this:
1133#
1134# expr : expr PLUS term
1135#
1136# Here are the basic attributes defined on all productions
1137#
1138# name - Name of the production. For example 'expr'
1139# prod - A list of symbols on the right side ['expr','PLUS','term']
1140# prec - Production precedence level
1141# number - Production number.
1142# func - Function that executes on reduce
1143# file - File where production function is defined
1144# lineno - Line number where production function is defined
1145#
1146# The following attributes are defined or optional.
1147#
1148# len - Length of the production (number of symbols on right hand side)
1149# usyms - Set of unique symbols found in the production
1150# -----------------------------------------------------------------------------
1151
1152class Production(object):
1153 reduced = 0
1154 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1155 self.name = name
1156 self.prod = tuple(prod)
1157 self.number = number
1158 self.func = func
1159 self.callable = None
1160 self.file = file
1161 self.line = line
1162 self.prec = precedence
1163
1164 # Internal settings used during table construction
1165
1166 self.len = len(self.prod) # Length of the production
1167
1168 # Create a list of unique production symbols used in the production
1169 self.usyms = [ ]
1170 for s in self.prod:
1171 if s not in self.usyms:
1172 self.usyms.append(s)
1173
1174 # List of all LR items for the production
1175 self.lr_items = []
1176 self.lr_next = None
1177
1178 # Create a string representation
1179 if self.prod:
1180 self.str = "%s -> %s" % (self.name," ".join(self.prod))
1181 else:
1182 self.str = "%s -> <empty>" % self.name
1183
1184 def __str__(self):
1185 return self.str
1186
1187 def __repr__(self):
1188 return "Production("+str(self)+")"
1189
1190 def __len__(self):
1191 return len(self.prod)
1192
1193 def __nonzero__(self):
1194 return 1
1195
1196 def __getitem__(self,index):
1197 return self.prod[index]
1198
1199 # Return the nth lr_item from the production (or None if at the end)
1200 def lr_item(self,n):
1201 if n > len(self.prod): return None
1202 p = LRItem(self,n)
1203
1204 # Precompute the list of productions immediately following. Hack. Remove later
1205 try:
1206 p.lr_after = Prodnames[p.prod[n+1]]
1207 except (IndexError,KeyError):
1208 p.lr_after = []
1209 try:
1210 p.lr_before = p.prod[n-1]
1211 except IndexError:
1212 p.lr_before = None
1213
1214 return p
1215
1216 # Bind the production function name to a callable
1217 def bind(self,pdict):
1218 if self.func:
1219 self.callable = pdict[self.func]
1220
1221# This class serves as a minimal standin for Production objects when
1222# reading table data from files. It only contains information
1223# actually used by the LR parsing engine, plus some additional
1224# debugging information.
1225class MiniProduction(object):
1226 def __init__(self,str,name,len,func,file,line):
1227 self.name = name
1228 self.len = len
1229 self.func = func
1230 self.callable = None
1231 self.file = file
1232 self.line = line
1233 self.str = str
1234 def __str__(self):
1235 return self.str
1236 def __repr__(self):
1237 return "MiniProduction(%s)" % self.str
1238
1239 # Bind the production function name to a callable
1240 def bind(self,pdict):
1241 if self.func:
1242 self.callable = pdict[self.func]
1243
1244
1245# -----------------------------------------------------------------------------
1246# class LRItem
1247#
1248# This class represents a specific stage of parsing a production rule. For
1249# example:
1250#
1251# expr : expr . PLUS term
1252#
1253# In the above, the "." represents the current location of the parse. Here
1254# basic attributes:
1255#
1256# name - Name of the production. For example 'expr'
1257# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1258# number - Production number.
1259#
1260# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1261# then lr_next refers to 'expr -> expr PLUS . term'
1262# lr_index - LR item index (location of the ".") in the prod list.
1263# lookaheads - LALR lookahead symbols for this item
1264# len - Length of the production (number of symbols on right hand side)
1265# lr_after - List of all productions that immediately follow
1266# lr_before - Grammar symbol immediately before
1267# -----------------------------------------------------------------------------
1268
1269class LRItem(object):
1270 def __init__(self,p,n):
1271 self.name = p.name
1272 self.prod = list(p.prod)
1273 self.number = p.number
1274 self.lr_index = n
1275 self.lookaheads = { }
1276 self.prod.insert(n,".")
1277 self.prod = tuple(self.prod)
1278 self.len = len(self.prod)
1279 self.usyms = p.usyms
1280
1281 def __str__(self):
1282 if self.prod:
1283 s = "%s -> %s" % (self.name," ".join(self.prod))
1284 else:
1285 s = "%s -> <empty>" % self.name
1286 return s
1287
1288 def __repr__(self):
1289 return "LRItem("+str(self)+")"
1290
1291# -----------------------------------------------------------------------------
1292# rightmost_terminal()
1293#
1294# Return the rightmost terminal from a list of symbols. Used in add_production()
1295# -----------------------------------------------------------------------------
1296def rightmost_terminal(symbols, terminals):
1297 i = len(symbols) - 1
1298 while i >= 0:
1299 if symbols[i] in terminals:
1300 return symbols[i]
1301 i -= 1
1302 return None
1303
1304# -----------------------------------------------------------------------------
1305# === GRAMMAR CLASS ===
1306#
1307# The following class represents the contents of the specified grammar along
1308# with various computed properties such as first sets, follow sets, LR items, etc.
1309# This data is used for critical parts of the table generation process later.
1310# -----------------------------------------------------------------------------
1311
1312class GrammarError(YaccError): pass
1313
1314class Grammar(object):
1315 def __init__(self,terminals):
1316 self.Productions = [None] # A list of all of the productions. The first
1317 # entry is always reserved for the purpose of
1318 # building an augmented grammar
1319
1320 self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
1321 # productions of that nonterminal.
1322
1323 self.Prodmap = { } # A dictionary that is only used to detect duplicate
1324 # productions.
1325
1326 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
1327 # list of the rules where they are used.
1328
1329 for term in terminals:
1330 self.Terminals[term] = []
1331
1332 self.Terminals['error'] = []
1333
1334 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
1335 # of rule numbers where they are used.
1336
1337 self.First = { } # A dictionary of precomputed FIRST(x) symbols
1338
1339 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
1340
1341 self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the
1342 # form ('right',level) or ('nonassoc', level) or ('left',level)
1343
1344 self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
1345 # This is only used to provide error checking and to generate
1346 # a warning about unused precedence rules.
1347
1348 self.Start = None # Starting symbol for the grammar
1349
1350
1351 def __len__(self):
1352 return len(self.Productions)
1353
1354 def __getitem__(self,index):
1355 return self.Productions[index]
1356
1357 # -----------------------------------------------------------------------------
1358 # set_precedence()
1359 #
1360 # Sets the precedence for a given terminal. assoc is the associativity such as
1361 # 'left','right', or 'nonassoc'. level is a numeric level.
1362 #
1363 # -----------------------------------------------------------------------------
1364
1365 def set_precedence(self,term,assoc,level):
1366 assert self.Productions == [None],"Must call set_precedence() before add_production()"
1367 if term in self.Precedence:
1368 raise GrammarError("Precedence already specified for terminal '%s'" % term)
1369 if assoc not in ['left','right','nonassoc']:
1370 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1371 self.Precedence[term] = (assoc,level)
1372
1373 # -----------------------------------------------------------------------------
1374 # add_production()
1375 #
1376 # Given an action function, this function assembles a production rule and
1377 # computes its precedence level.
1378 #
1379 # The production rule is supplied as a list of symbols. For example,
1380 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1381 # symbols ['expr','PLUS','term'].
1382 #
1383 # Precedence is determined by the precedence of the right-most non-terminal
1384 # or the precedence of a terminal specified by %prec.
1385 #
1386 # A variety of error checks are performed to make sure production symbols
1387 # are valid and that %prec is used correctly.
1388 # -----------------------------------------------------------------------------
1389
1390 def add_production(self,prodname,syms,func=None,file='',line=0):
1391
1392 if prodname in self.Terminals:
1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
1394 if prodname == 'error':
1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
1396 if not _is_identifier.match(prodname):
1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
1398
1399 # Look for literal tokens
1400 for n,s in enumerate(syms):
1401 if s[0] in "'\"":
1402 try:
1403 c = eval(s)
1404 if (len(c) > 1):
1405 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
1406 if not c in self.Terminals:
1407 self.Terminals[c] = []
1408 syms[n] = c
1409 continue
1410 except SyntaxError:
1411 pass
1412 if not _is_identifier.match(s) and s != '%prec':
1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
1414
1415 # Determine the precedence level
1416 if '%prec' in syms:
1417 if syms[-1] == '%prec':
1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1419 if syms[-2] != '%prec':
1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1421 precname = syms[-1]
1422 prodprec = self.Precedence.get(precname,None)
1423 if not prodprec:
1424 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1425 else:
1426 self.UsedPrecedence[precname] = 1
1427 del syms[-2:] # Drop %prec from the rule
1428 else:
1429 # If no %prec, precedence is determined by the rightmost terminal symbol
1430 precname = rightmost_terminal(syms,self.Terminals)
1431 prodprec = self.Precedence.get(precname,('right',0))
1432
1433 # See if the rule is already in the rulemap
1434 map = "%s -> %s" % (prodname,syms)
1435 if map in self.Prodmap:
1436 m = self.Prodmap[map]
1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1438 "Previous definition at %s:%d" % (m.file, m.line))
1439
1440 # From this point on, everything is valid. Create a new Production instance
1441 pnumber = len(self.Productions)
1442 if not prodname in self.Nonterminals:
1443 self.Nonterminals[prodname] = [ ]
1444
1445 # Add the production number to Terminals and Nonterminals
1446 for t in syms:
1447 if t in self.Terminals:
1448 self.Terminals[t].append(pnumber)
1449 else:
1450 if not t in self.Nonterminals:
1451 self.Nonterminals[t] = [ ]
1452 self.Nonterminals[t].append(pnumber)
1453
1454 # Create a production and add it to the list of productions
1455 p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1456 self.Productions.append(p)
1457 self.Prodmap[map] = p
1458
1459 # Add to the global productions list
1460 try:
1461 self.Prodnames[prodname].append(p)
1462 except KeyError:
1463 self.Prodnames[prodname] = [ p ]
1464 return 0
1465
1466 # -----------------------------------------------------------------------------
1467 # set_start()
1468 #
1469 # Sets the starting symbol and creates the augmented grammar. Production
1470 # rule 0 is S' -> start where start is the start symbol.
1471 # -----------------------------------------------------------------------------
1472
1473 def set_start(self,start=None):
1474 if not start:
1475 start = self.Productions[1].name
1476 if start not in self.Nonterminals:
1477 raise GrammarError("start symbol %s undefined" % start)
1478 self.Productions[0] = Production(0,"S'",[start])
1479 self.Nonterminals[start].append(0)
1480 self.Start = start
1481
1482 # -----------------------------------------------------------------------------
1483 # find_unreachable()
1484 #
1485 # Find all of the nonterminal symbols that can't be reached from the starting
1486 # symbol. Returns a list of nonterminals that can't be reached.
1487 # -----------------------------------------------------------------------------
1488
1489 def find_unreachable(self):
1490
1491 # Mark all symbols that are reachable from a symbol s
1492 def mark_reachable_from(s):
1493 if reachable[s]:
1494 # We've already reached symbol s.
1495 return
1496 reachable[s] = 1
1497 for p in self.Prodnames.get(s,[]):
1498 for r in p.prod:
1499 mark_reachable_from(r)
1500
1501 reachable = { }
1502 for s in list(self.Terminals) + list(self.Nonterminals):
1503 reachable[s] = 0
1504
1505 mark_reachable_from( self.Productions[0].prod[0] )
1506
1507 return [s for s in list(self.Nonterminals)
1508 if not reachable[s]]
1509
1510 # -----------------------------------------------------------------------------
1511 # infinite_cycles()
1512 #
1513 # This function looks at the various parsing rules and tries to detect
1514 # infinite recursion cycles (grammar rules where there is no possible way
1515 # to derive a string of only terminals).
1516 # -----------------------------------------------------------------------------
1517
1518 def infinite_cycles(self):
1519 terminates = {}
1520
1521 # Terminals:
1522 for t in self.Terminals:
1523 terminates[t] = 1
1524
1525 terminates['$end'] = 1
1526
1527 # Nonterminals:
1528
1529 # Initialize to false:
1530 for n in self.Nonterminals:
1531 terminates[n] = 0
1532
1533 # Then propagate termination until no change:
1534 while 1:
1535 some_change = 0
1536 for (n,pl) in self.Prodnames.items():
1537 # Nonterminal n terminates iff any of its productions terminates.
1538 for p in pl:
1539 # Production p terminates iff all of its rhs symbols terminate.
1540 for s in p.prod:
1541 if not terminates[s]:
1542 # The symbol s does not terminate,
1543 # so production p does not terminate.
1544 p_terminates = 0
1545 break
1546 else:
1547 # didn't break from the loop,
1548 # so every symbol s terminates
1549 # so production p terminates.
1550 p_terminates = 1
1551
1552 if p_terminates:
1553 # symbol n terminates!
1554 if not terminates[n]:
1555 terminates[n] = 1
1556 some_change = 1
1557 # Don't need to consider any more productions for this n.
1558 break
1559
1560 if not some_change:
1561 break
1562
1563 infinite = []
1564 for (s,term) in terminates.items():
1565 if not term:
1566 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1567 # s is used-but-not-defined, and we've already warned of that,
1568 # so it would be overkill to say that it's also non-terminating.
1569 pass
1570 else:
1571 infinite.append(s)
1572
1573 return infinite
1574
1575
1576 # -----------------------------------------------------------------------------
1577 # undefined_symbols()
1578 #
1579 # Find all symbols that were used the grammar, but not defined as tokens or
1580 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1581 # and prod is the production where the symbol was used.
1582 # -----------------------------------------------------------------------------
1583 def undefined_symbols(self):
1584 result = []
1585 for p in self.Productions:
1586 if not p: continue
1587
1588 for s in p.prod:
1589 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1590 result.append((s,p))
1591 return result
1592
1593 # -----------------------------------------------------------------------------
1594 # unused_terminals()
1595 #
1596 # Find all terminals that were defined, but not used by the grammar. Returns
1597 # a list of all symbols.
1598 # -----------------------------------------------------------------------------
1599 def unused_terminals(self):
1600 unused_tok = []
1601 for s,v in self.Terminals.items():
1602 if s != 'error' and not v:
1603 unused_tok.append(s)
1604
1605 return unused_tok
1606
1607 # ------------------------------------------------------------------------------
1608 # unused_rules()
1609 #
1610 # Find all grammar rules that were defined, but not used (maybe not reachable)
1611 # Returns a list of productions.
1612 # ------------------------------------------------------------------------------
1613
1614 def unused_rules(self):
1615 unused_prod = []
1616 for s,v in self.Nonterminals.items():
1617 if not v:
1618 p = self.Prodnames[s][0]
1619 unused_prod.append(p)
1620 return unused_prod
1621
1622 # -----------------------------------------------------------------------------
1623 # unused_precedence()
1624 #
1625 # Returns a list of tuples (term,precedence) corresponding to precedence
1626 # rules that were never used by the grammar. term is the name of the terminal
1627 # on which precedence was applied and precedence is a string such as 'left' or
1628 # 'right' corresponding to the type of precedence.
1629 # -----------------------------------------------------------------------------
1630
1631 def unused_precedence(self):
1632 unused = []
1633 for termname in self.Precedence:
1634 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1635 unused.append((termname,self.Precedence[termname][0]))
1636
1637 return unused
1638
1639 # -------------------------------------------------------------------------
1640 # _first()
1641 #
1642 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1643 #
1644 # During execution of compute_first1, the result may be incomplete.
1645 # Afterward (e.g., when called from compute_follow()), it will be complete.
1646 # -------------------------------------------------------------------------
1647 def _first(self,beta):
1648
1649 # We are computing First(x1,x2,x3,...,xn)
1650 result = [ ]
1651 for x in beta:
1652 x_produces_empty = 0
1653
1654 # Add all the non-<empty> symbols of First[x] to the result.
1655 for f in self.First[x]:
1656 if f == '<empty>':
1657 x_produces_empty = 1
1658 else:
1659 if f not in result: result.append(f)
1660
1661 if x_produces_empty:
1662 # We have to consider the next x in beta,
1663 # i.e. stay in the loop.
1664 pass
1665 else:
1666 # We don't have to consider any further symbols in beta.
1667 break
1668 else:
1669 # There was no 'break' from the loop,
1670 # so x_produces_empty was true for all x in beta,
1671 # so beta produces empty as well.
1672 result.append('<empty>')
1673
1674 return result
1675
1676 # -------------------------------------------------------------------------
1677 # compute_first()
1678 #
1679 # Compute the value of FIRST1(X) for all symbols
1680 # -------------------------------------------------------------------------
1681 def compute_first(self):
1682 if self.First:
1683 return self.First
1684
1685 # Terminals:
1686 for t in self.Terminals:
1687 self.First[t] = [t]
1688
1689 self.First['$end'] = ['$end']
1690
1691 # Nonterminals:
1692
1693 # Initialize to the empty set:
1694 for n in self.Nonterminals:
1695 self.First[n] = []
1696
1697 # Then propagate symbols until no change:
1698 while 1:
1699 some_change = 0
1700 for n in self.Nonterminals:
1701 for p in self.Prodnames[n]:
1702 for f in self._first(p.prod):
1703 if f not in self.First[n]:
1704 self.First[n].append( f )
1705 some_change = 1
1706 if not some_change:
1707 break
1708
1709 return self.First
1710
1711 # ---------------------------------------------------------------------
1712 # compute_follow()
1713 #
1714 # Computes all of the follow sets for every non-terminal symbol. The
1715 # follow set is the set of all symbols that might follow a given
1716 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1717 # ---------------------------------------------------------------------
1718 def compute_follow(self,start=None):
1719 # If already computed, return the result
1720 if self.Follow:
1721 return self.Follow
1722
1723 # If first sets not computed yet, do that first.
1724 if not self.First:
1725 self.compute_first()
1726
1727 # Add '$end' to the follow list of the start symbol
1728 for k in self.Nonterminals:
1729 self.Follow[k] = [ ]
1730
1731 if not start:
1732 start = self.Productions[1].name
1733
1734 self.Follow[start] = [ '$end' ]
1735
1736 while 1:
1737 didadd = 0
1738 for p in self.Productions[1:]:
1739 # Here is the production set
1740 for i in range(len(p.prod)):
1741 B = p.prod[i]
1742 if B in self.Nonterminals:
1743 # Okay. We got a non-terminal in a production
1744 fst = self._first(p.prod[i+1:])
1745 hasempty = 0
1746 for f in fst:
1747 if f != '<empty>' and f not in self.Follow[B]:
1748 self.Follow[B].append(f)
1749 didadd = 1
1750 if f == '<empty>':
1751 hasempty = 1
1752 if hasempty or i == (len(p.prod)-1):
1753 # Add elements of follow(a) to follow(b)
1754 for f in self.Follow[p.name]:
1755 if f not in self.Follow[B]:
1756 self.Follow[B].append(f)
1757 didadd = 1
1758 if not didadd: break
1759 return self.Follow
1760
1761
1762 # -----------------------------------------------------------------------------
1763 # build_lritems()
1764 #
1765 # This function walks the list of productions and builds a complete set of the
1766 # LR items. The LR items are stored in two ways: First, they are uniquely
1767 # numbered and placed in the list _lritems. Second, a linked list of LR items
1768 # is built for each production. For example:
1769 #
1770 # E -> E PLUS E
1771 #
1772 # Creates the list
1773 #
1774 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1775 # -----------------------------------------------------------------------------
1776
1777 def build_lritems(self):
1778 for p in self.Productions:
1779 lastlri = p
1780 i = 0
1781 lr_items = []
1782 while 1:
1783 if i > len(p):
1784 lri = None
1785 else:
1786 lri = LRItem(p,i)
1787 # Precompute the list of productions immediately following
1788 try:
1789 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1790 except (IndexError,KeyError):
1791 lri.lr_after = []
1792 try:
1793 lri.lr_before = lri.prod[i-1]
1794 except IndexError:
1795 lri.lr_before = None
1796
1797 lastlri.lr_next = lri
1798 if not lri: break
1799 lr_items.append(lri)
1800 lastlri = lri
1801 i += 1
1802 p.lr_items = lr_items
1803
1804# -----------------------------------------------------------------------------
1805# == Class LRTable ==
1806#
1807# This basic class represents a basic table of LR parsing information.
1808# Methods for generating the tables are not defined here. They are defined
1809# in the derived class LRGeneratedTable.
1810# -----------------------------------------------------------------------------
1811
1812class VersionError(YaccError): pass
1813
1814class LRTable(object):
1815 def __init__(self):
1816 self.lr_action = None
1817 self.lr_goto = None
1818 self.lr_productions = None
1819 self.lr_method = None
1820
1821 def read_table(self,module):
1822 if isinstance(module,types.ModuleType):
1823 parsetab = module
1824 else:
1825 if sys.version_info[0] < 3:
1826 exec("import %s as parsetab" % module)
1827 else:
1828 env = { }
1829 exec("import %s as parsetab" % module, env, env)
1830 parsetab = env['parsetab']
1831
1832 if parsetab._tabversion != __tabversion__:
1833 raise VersionError("yacc table file version is out of date")
1834
1835 self.lr_action = parsetab._lr_action
1836 self.lr_goto = parsetab._lr_goto
1837
1838 self.lr_productions = []
1839 for p in parsetab._lr_productions:
1840 self.lr_productions.append(MiniProduction(*p))
1841
1842 self.lr_method = parsetab._lr_method
1843 return parsetab._lr_signature
1844
1845 def read_pickle(self,filename):
1846 try:
1847 import cPickle as pickle
1848 except ImportError:
1849 import pickle
1850
1851 in_f = open(filename,"rb")
1852
1853 tabversion = pickle.load(in_f)
1854 if tabversion != __tabversion__:
1855 raise VersionError("yacc table file version is out of date")
1856 self.lr_method = pickle.load(in_f)
1857 signature = pickle.load(in_f)
1858 self.lr_action = pickle.load(in_f)
1859 self.lr_goto = pickle.load(in_f)
1860 productions = pickle.load(in_f)
1861
1862 self.lr_productions = []
1863 for p in productions:
1864 self.lr_productions.append(MiniProduction(*p))
1865
1866 in_f.close()
1867 return signature
1868
1869 # Bind all production function names to callable objects in pdict
1870 def bind_callables(self,pdict):
1871 for p in self.lr_productions:
1872 p.bind(pdict)
1873
1874# -----------------------------------------------------------------------------
1875# === LR Generator ===
1876#
1877# The following classes and functions are used to generate LR parsing tables on
1878# a grammar.
1879# -----------------------------------------------------------------------------
1880
1881# -----------------------------------------------------------------------------
1882# digraph()
1883# traverse()
1884#
1885# The following two functions are used to compute set valued functions
1886# of the form:
1887#
1888# F(x) = F'(x) U U{F(y) | x R y}
1889#
1890# This is used to compute the values of Read() sets as well as FOLLOW sets
1891# in LALR(1) generation.
1892#
1893# Inputs: X - An input set
1894# R - A relation
1895# FP - Set-valued function
1896# ------------------------------------------------------------------------------
1897
1898def digraph(X,R,FP):
1899 N = { }
1900 for x in X:
1901 N[x] = 0
1902 stack = []
1903 F = { }
1904 for x in X:
1905 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1906 return F
1907
1908def traverse(x,N,stack,F,X,R,FP):
1909 stack.append(x)
1910 d = len(stack)
1911 N[x] = d
1912 F[x] = FP(x) # F(X) <- F'(x)
1913
1914 rel = R(x) # Get y's related to x
1915 for y in rel:
1916 if N[y] == 0:
1917 traverse(y,N,stack,F,X,R,FP)
1918 N[x] = min(N[x],N[y])
1919 for a in F.get(y,[]):
1920 if a not in F[x]: F[x].append(a)
1921 if N[x] == d:
1922 N[stack[-1]] = MAXINT
1923 F[stack[-1]] = F[x]
1924 element = stack.pop()
1925 while element != x:
1926 N[stack[-1]] = MAXINT
1927 F[stack[-1]] = F[x]
1928 element = stack.pop()
1929
1930class LALRError(YaccError): pass
1931
1932# -----------------------------------------------------------------------------
1933# == LRGeneratedTable ==
1934#
1935# This class implements the LR table generation algorithm. There are no
1936# public methods except for write()
1937# -----------------------------------------------------------------------------
1938
1939class LRGeneratedTable(LRTable):
1940 def __init__(self,grammar,method='LALR',log=None):
1941 if method not in ['SLR','LALR']:
1942 raise LALRError("Unsupported method %s" % method)
1943
1944 self.grammar = grammar
1945 self.lr_method = method
1946
1947 # Set up the logger
1948 if not log:
1949 log = NullLogger()
1950 self.log = log
1951
1952 # Internal attributes
1953 self.lr_action = {} # Action table
1954 self.lr_goto = {} # Goto table
1955 self.lr_productions = grammar.Productions # Copy of grammar Production array
1956 self.lr_goto_cache = {} # Cache of computed gotos
1957 self.lr0_cidhash = {} # Cache of closures
1958
1959 self._add_count = 0 # Internal counter used to detect cycles
1960
1961 # Diagonistic information filled in by the table generator
1962 self.sr_conflict = 0
1963 self.rr_conflict = 0
1964 self.conflicts = [] # List of conflicts
1965
1966 self.sr_conflicts = []
1967 self.rr_conflicts = []
1968
1969 # Build the tables
1970 self.grammar.build_lritems()
1971 self.grammar.compute_first()
1972 self.grammar.compute_follow()
1973 self.lr_parse_table()
1974
1975 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1976
1977 def lr0_closure(self,I):
1978 self._add_count += 1
1979
1980 # Add everything in I to J
1981 J = I[:]
1982 didadd = 1
1983 while didadd:
1984 didadd = 0
1985 for j in J:
1986 for x in j.lr_after:
1987 if getattr(x,"lr0_added",0) == self._add_count: continue
1988 # Add B --> .G to J
1989 J.append(x.lr_next)
1990 x.lr0_added = self._add_count
1991 didadd = 1
1992
1993 return J
1994
1995 # Compute the LR(0) goto function goto(I,X) where I is a set
1996 # of LR(0) items and X is a grammar symbol. This function is written
1997 # in a way that guarantees uniqueness of the generated goto sets
1998 # (i.e. the same goto set will never be returned as two different Python
1999 # objects). With uniqueness, we can later do fast set comparisons using
2000 # id(obj) instead of element-wise comparison.
2001
2002 def lr0_goto(self,I,x):
2003 # First we look for a previously cached entry
2004 g = self.lr_goto_cache.get((id(I),x),None)
2005 if g: return g
2006
2007 # Now we generate the goto set in a way that guarantees uniqueness
2008 # of the result
2009
2010 s = self.lr_goto_cache.get(x,None)
2011 if not s:
2012 s = { }
2013 self.lr_goto_cache[x] = s
2014
2015 gs = [ ]
2016 for p in I:
2017 n = p.lr_next
2018 if n and n.lr_before == x:
2019 s1 = s.get(id(n),None)
2020 if not s1:
2021 s1 = { }
2022 s[id(n)] = s1
2023 gs.append(n)
2024 s = s1
2025 g = s.get('$end',None)
2026 if not g:
2027 if gs:
2028 g = self.lr0_closure(gs)
2029 s['$end'] = g
2030 else:
2031 s['$end'] = gs
2032 self.lr_goto_cache[(id(I),x)] = g
2033 return g
2034
2035 # Compute the LR(0) sets of item function
2036 def lr0_items(self):
2037
2038 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2039 i = 0
2040 for I in C:
2041 self.lr0_cidhash[id(I)] = i
2042 i += 1
2043
2044 # Loop over the items in C and each grammar symbols
2045 i = 0
2046 while i < len(C):
2047 I = C[i]
2048 i += 1
2049
2050 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2051 asyms = { }
2052 for ii in I:
2053 for s in ii.usyms:
2054 asyms[s] = None
2055
2056 for x in asyms:
2057 g = self.lr0_goto(I,x)
2058 if not g: continue
2059 if id(g) in self.lr0_cidhash: continue
2060 self.lr0_cidhash[id(g)] = len(C)
2061 C.append(g)
2062
2063 return C
2064
2065 # -----------------------------------------------------------------------------
2066 # ==== LALR(1) Parsing ====
2067 #
2068 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2069 # relying upon Follow() sets when performing reductions, a more selective
2070 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2071 # Thus, we mainly just have to focus on calculating the lookahead sets.
2072 #
2073 # The method used here is due to DeRemer and Pennelo (1982).
2074 #
2075 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2076 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2077 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2078 #
2079 # Further details can also be found in:
2080 #
2081 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2082 # McGraw-Hill Book Company, (1985).
2083 #
2084 # -----------------------------------------------------------------------------
2085
2086 # -----------------------------------------------------------------------------
2087 # compute_nullable_nonterminals()
2088 #
2089 # Creates a dictionary containing all of the non-terminals that might produce
2090 # an empty production.
2091 # -----------------------------------------------------------------------------
2092
2093 def compute_nullable_nonterminals(self):
2094 nullable = {}
2095 num_nullable = 0
2096 while 1:
2097 for p in self.grammar.Productions[1:]:
2098 if p.len == 0:
2099 nullable[p.name] = 1
2100 continue
2101 for t in p.prod:
2102 if not t in nullable: break
2103 else:
2104 nullable[p.name] = 1
2105 if len(nullable) == num_nullable: break
2106 num_nullable = len(nullable)
2107 return nullable
2108
2109 # -----------------------------------------------------------------------------
2110 # find_nonterminal_trans(C)
2111 #
2112 # Given a set of LR(0) items, this functions finds all of the non-terminal
2113 # transitions. These are transitions in which a dot appears immediately before
2114 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2115 # is the state number and N is the nonterminal symbol.
2116 #
2117 # The input C is the set of LR(0) items.
2118 # -----------------------------------------------------------------------------
2119
2120 def find_nonterminal_transitions(self,C):
2121 trans = []
2122 for state in range(len(C)):
2123 for p in C[state]:
2124 if p.lr_index < p.len - 1:
2125 t = (state,p.prod[p.lr_index+1])
2126 if t[1] in self.grammar.Nonterminals:
2127 if t not in trans: trans.append(t)
2128 state = state + 1
2129 return trans
2130
2131 # -----------------------------------------------------------------------------
2132 # dr_relation()
2133 #
2134 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2135 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2136 #
2137 # Returns a list of terminals.
2138 # -----------------------------------------------------------------------------
2139
2140 def dr_relation(self,C,trans,nullable):
2141 dr_set = { }
2142 state,N = trans
2143 terms = []
2144
2145 g = self.lr0_goto(C[state],N)
2146 for p in g:
2147 if p.lr_index < p.len - 1:
2148 a = p.prod[p.lr_index+1]
2149 if a in self.grammar.Terminals:
2150 if a not in terms: terms.append(a)
2151
2152 # This extra bit is to handle the start state
2153 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2154 terms.append('$end')
2155
2156 return terms
2157
2158 # -----------------------------------------------------------------------------
2159 # reads_relation()
2160 #
2161 # Computes the READS() relation (p,A) READS (t,C).
2162 # -----------------------------------------------------------------------------
2163
2164 def reads_relation(self,C, trans, empty):
2165 # Look for empty transitions
2166 rel = []
2167 state, N = trans
2168
2169 g = self.lr0_goto(C[state],N)
2170 j = self.lr0_cidhash.get(id(g),-1)
2171 for p in g:
2172 if p.lr_index < p.len - 1:
2173 a = p.prod[p.lr_index + 1]
2174 if a in empty:
2175 rel.append((j,a))
2176
2177 return rel
2178
2179 # -----------------------------------------------------------------------------
2180 # compute_lookback_includes()
2181 #
2182 # Determines the lookback and includes relations
2183 #
2184 # LOOKBACK:
2185 #
2186 # This relation is determined by running the LR(0) state machine forward.
2187 # For example, starting with a production "N : . A B C", we run it forward
2188 # to obtain "N : A B C ." We then build a relationship between this final
2189 # state and the starting state. These relationships are stored in a dictionary
2190 # lookdict.
2191 #
2192 # INCLUDES:
2193 #
2194 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2195 #
2196 # This relation is used to determine non-terminal transitions that occur
2197 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2198 # if the following holds:
2199 #
2200 # B -> LAT, where T -> epsilon and p' -L-> p
2201 #
2202 # L is essentially a prefix (which may be empty), T is a suffix that must be
2203 # able to derive an empty string. State p' must lead to state p with the string L.
2204 #
2205 # -----------------------------------------------------------------------------
2206
2207 def compute_lookback_includes(self,C,trans,nullable):
2208
2209 lookdict = {} # Dictionary of lookback relations
2210 includedict = {} # Dictionary of include relations
2211
2212 # Make a dictionary of non-terminal transitions
2213 dtrans = {}
2214 for t in trans:
2215 dtrans[t] = 1
2216
2217 # Loop over all transitions and compute lookbacks and includes
2218 for state,N in trans:
2219 lookb = []
2220 includes = []
2221 for p in C[state]:
2222 if p.name != N: continue
2223
2224 # Okay, we have a name match. We now follow the production all the way
2225 # through the state machine until we get the . on the right hand side
2226
2227 lr_index = p.lr_index
2228 j = state
2229 while lr_index < p.len - 1:
2230 lr_index = lr_index + 1
2231 t = p.prod[lr_index]
2232
2233 # Check to see if this symbol and state are a non-terminal transition
2234 if (j,t) in dtrans:
2235 # Yes. Okay, there is some chance that this is an includes relation
2236 # the only way to know for certain is whether the rest of the
2237 # production derives empty
2238
2239 li = lr_index + 1
2240 while li < p.len:
2241 if p.prod[li] in self.grammar.Terminals: break # No forget it
2242 if not p.prod[li] in nullable: break
2243 li = li + 1
2244 else:
2245 # Appears to be a relation between (j,t) and (state,N)
2246 includes.append((j,t))
2247
2248 g = self.lr0_goto(C[j],t) # Go to next set
2249 j = self.lr0_cidhash.get(id(g),-1) # Go to next state
2250
2251 # When we get here, j is the final state, now we have to locate the production
2252 for r in C[j]:
2253 if r.name != p.name: continue
2254 if r.len != p.len: continue
2255 i = 0
2256 # This look is comparing a production ". A B C" with "A B C ."
2257 while i < r.lr_index:
2258 if r.prod[i] != p.prod[i+1]: break
2259 i = i + 1
2260 else:
2261 lookb.append((j,r))
2262 for i in includes:
2263 if not i in includedict: includedict[i] = []
2264 includedict[i].append((state,N))
2265 lookdict[(state,N)] = lookb
2266
2267 return lookdict,includedict
2268
2269 # -----------------------------------------------------------------------------
2270 # compute_read_sets()
2271 #
2272 # Given a set of LR(0) items, this function computes the read sets.
2273 #
2274 # Inputs: C = Set of LR(0) items
2275 # ntrans = Set of nonterminal transitions
2276 # nullable = Set of empty transitions
2277 #
2278 # Returns a set containing the read sets
2279 # -----------------------------------------------------------------------------
2280
2281 def compute_read_sets(self,C, ntrans, nullable):
2282 FP = lambda x: self.dr_relation(C,x,nullable)
2283 R = lambda x: self.reads_relation(C,x,nullable)
2284 F = digraph(ntrans,R,FP)
2285 return F
2286
2287 # -----------------------------------------------------------------------------
2288 # compute_follow_sets()
2289 #
2290 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2291 # and an include set, this function computes the follow sets
2292 #
2293 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2294 #
2295 # Inputs:
2296 # ntrans = Set of nonterminal transitions
2297 # readsets = Readset (previously computed)
2298 # inclsets = Include sets (previously computed)
2299 #
2300 # Returns a set containing the follow sets
2301 # -----------------------------------------------------------------------------
2302
2303 def compute_follow_sets(self,ntrans,readsets,inclsets):
2304 FP = lambda x: readsets[x]
2305 R = lambda x: inclsets.get(x,[])
2306 F = digraph(ntrans,R,FP)
2307 return F
2308
2309 # -----------------------------------------------------------------------------
2310 # add_lookaheads()
2311 #
2312 # Attaches the lookahead symbols to grammar rules.
2313 #
2314 # Inputs: lookbacks - Set of lookback relations
2315 # followset - Computed follow set
2316 #
2317 # This function directly attaches the lookaheads to productions contained
2318 # in the lookbacks set
2319 # -----------------------------------------------------------------------------
2320
2321 def add_lookaheads(self,lookbacks,followset):
2322 for trans,lb in lookbacks.items():
2323 # Loop over productions in lookback
2324 for state,p in lb:
2325 if not state in p.lookaheads:
2326 p.lookaheads[state] = []
2327 f = followset.get(trans,[])
2328 for a in f:
2329 if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2330
2331 # -----------------------------------------------------------------------------
2332 # add_lalr_lookaheads()
2333 #
2334 # This function does all of the work of adding lookahead information for use
2335 # with LALR parsing
2336 # -----------------------------------------------------------------------------
2337
2338 def add_lalr_lookaheads(self,C):
2339 # Determine all of the nullable nonterminals
2340 nullable = self.compute_nullable_nonterminals()
2341
2342 # Find all non-terminal transitions
2343 trans = self.find_nonterminal_transitions(C)
2344
2345 # Compute read sets
2346 readsets = self.compute_read_sets(C,trans,nullable)
2347
2348 # Compute lookback/includes relations
2349 lookd, included = self.compute_lookback_includes(C,trans,nullable)
2350
2351 # Compute LALR FOLLOW sets
2352 followsets = self.compute_follow_sets(trans,readsets,included)
2353
2354 # Add all of the lookaheads
2355 self.add_lookaheads(lookd,followsets)
2356
2357 # -----------------------------------------------------------------------------
2358 # lr_parse_table()
2359 #
2360 # This function constructs the parse tables for SLR or LALR
2361 # -----------------------------------------------------------------------------
2362 def lr_parse_table(self):
2363 Productions = self.grammar.Productions
2364 Precedence = self.grammar.Precedence
2365 goto = self.lr_goto # Goto array
2366 action = self.lr_action # Action array
2367 log = self.log # Logger for output
2368
2369 actionp = { } # Action production array (temporary)
2370
2371 log.info("Parsing method: %s", self.lr_method)
2372
2373 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2374 # This determines the number of states
2375
2376 C = self.lr0_items()
2377
2378 if self.lr_method == 'LALR':
2379 self.add_lalr_lookaheads(C)
2380
2381 # Build the parser table, state by state
2382 st = 0
2383 for I in C:
2384 # Loop over each production in I
2385 actlist = [ ] # List of actions
2386 st_action = { }
2387 st_actionp = { }
2388 st_goto = { }
2389 log.info("")
2390 log.info("state %d", st)
2391 log.info("")
2392 for p in I:
2393 log.info(" (%d) %s", p.number, str(p))
2394 log.info("")
2395
2396 for p in I:
2397 if p.len == p.lr_index + 1:
2398 if p.name == "S'":
2399 # Start symbol. Accept!
2400 st_action["$end"] = 0
2401 st_actionp["$end"] = p
2402 else:
2403 # We are at the end of a production. Reduce!
2404 if self.lr_method == 'LALR':
2405 laheads = p.lookaheads[st]
2406 else:
2407 laheads = self.grammar.Follow[p.name]
2408 for a in laheads:
2409 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2410 r = st_action.get(a,None)
2411 if r is not None:
2412 # Whoa. Have a shift/reduce or reduce/reduce conflict
2413 if r > 0:
2414 # Need to decide on shift or reduce here
2415 # By default we favor shifting. Need to add
2416 # some precedence rules here.
2417 sprec,slevel = Productions[st_actionp[a].number].prec
2418 rprec,rlevel = Precedence.get(a,('right',0))
2419 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2420 # We really need to reduce here.
2421 st_action[a] = -p.number
2422 st_actionp[a] = p
2423 if not slevel and not rlevel:
2424 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2425 self.sr_conflicts.append((st,a,'reduce'))
2426 Productions[p.number].reduced += 1
2427 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2428 st_action[a] = None
2429 else:
2430 # Hmmm. Guess we'll keep the shift
2431 if not rlevel:
2432 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2433 self.sr_conflicts.append((st,a,'shift'))
2434 elif r < 0:
2435 # Reduce/reduce conflict. In this case, we favor the rule
2436 # that was defined first in the grammar file
2437 oldp = Productions[-r]
2438 pp = Productions[p.number]
2439 if oldp.line > pp.line:
2440 st_action[a] = -p.number
2441 st_actionp[a] = p
2442 chosenp,rejectp = pp,oldp
2443 Productions[p.number].reduced += 1
2444 Productions[oldp.number].reduced -= 1
2445 else:
2446 chosenp,rejectp = oldp,pp
2447 self.rr_conflicts.append((st,chosenp,rejectp))
2448 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2449 else:
2450 raise LALRError("Unknown conflict in state %d" % st)
2451 else:
2452 st_action[a] = -p.number
2453 st_actionp[a] = p
2454 Productions[p.number].reduced += 1
2455 else:
2456 i = p.lr_index
2457 a = p.prod[i+1] # Get symbol right after the "."
2458 if a in self.grammar.Terminals:
2459 g = self.lr0_goto(I,a)
2460 j = self.lr0_cidhash.get(id(g),-1)
2461 if j >= 0:
2462 # We are in a shift state
2463 actlist.append((a,p,"shift and go to state %d" % j))
2464 r = st_action.get(a,None)
2465 if r is not None:
2466 # Whoa have a shift/reduce or shift/shift conflict
2467 if r > 0:
2468 if r != j:
2469 raise LALRError("Shift/shift conflict in state %d" % st)
2470 elif r < 0:
2471 # Do a precedence check.
2472 # - if precedence of reduce rule is higher, we reduce.
2473 # - if precedence of reduce is same and left assoc, we reduce.
2474 # - otherwise we shift
2475 rprec,rlevel = Productions[st_actionp[a].number].prec
2476 sprec,slevel = Precedence.get(a,('right',0))
2477 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2478 # We decide to shift here... highest precedence to shift
2479 Productions[st_actionp[a].number].reduced -= 1
2480 st_action[a] = j
2481 st_actionp[a] = p
2482 if not rlevel:
2483 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2484 self.sr_conflicts.append((st,a,'shift'))
2485 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2486 st_action[a] = None
2487 else:
2488 # Hmmm. Guess we'll keep the reduce
2489 if not slevel and not rlevel:
2490 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2491 self.sr_conflicts.append((st,a,'reduce'))
2492
2493 else:
2494 raise LALRError("Unknown conflict in state %d" % st)
2495 else:
2496 st_action[a] = j
2497 st_actionp[a] = p
2498
2499 # Print the actions associated with each terminal
2500 _actprint = { }
2501 for a,p,m in actlist:
2502 if a in st_action:
2503 if p is st_actionp[a]:
2504 log.info(" %-15s %s",a,m)
2505 _actprint[(a,m)] = 1
2506 log.info("")
2507 # Print the actions that were not used. (debugging)
2508 not_used = 0
2509 for a,p,m in actlist:
2510 if a in st_action:
2511 if p is not st_actionp[a]:
2512 if not (a,m) in _actprint:
2513 log.debug(" ! %-15s [ %s ]",a,m)
2514 not_used = 1
2515 _actprint[(a,m)] = 1
2516 if not_used:
2517 log.debug("")
2518
2519 # Construct the goto table for this state
2520
2521 nkeys = { }
2522 for ii in I:
2523 for s in ii.usyms:
2524 if s in self.grammar.Nonterminals:
2525 nkeys[s] = None
2526 for n in nkeys:
2527 g = self.lr0_goto(I,n)
2528 j = self.lr0_cidhash.get(id(g),-1)
2529 if j >= 0:
2530 st_goto[n] = j
2531 log.info(" %-30s shift and go to state %d",n,j)
2532
2533 action[st] = st_action
2534 actionp[st] = st_actionp
2535 goto[st] = st_goto
2536 st += 1
2537
2538
2539 # -----------------------------------------------------------------------------
2540 # write()
2541 #
2542 # This function writes the LR parsing tables to a file
2543 # -----------------------------------------------------------------------------
2544
2545 def write_table(self,modulename,outputdir='',signature=""):
2546 basemodulename = modulename.split(".")[-1]
2547 filename = os.path.join(outputdir,basemodulename) + ".py"
2548 try:
2549 f = open(filename,"w")
2550
2551 f.write("""
2552# %s
2553# This file is automatically generated. Do not edit.
2554_tabversion = %r
2555
2556_lr_method = %r
2557
2558_lr_signature = %r
2559 """ % (filename, __tabversion__, self.lr_method, signature))
2560
2561 # Change smaller to 0 to go back to original tables
2562 smaller = 1
2563
2564 # Factor out names to try and make smaller
2565 if smaller:
2566 items = { }
2567
2568 for s,nd in self.lr_action.items():
2569 for name,v in nd.items():
2570 i = items.get(name)
2571 if not i:
2572 i = ([],[])
2573 items[name] = i
2574 i[0].append(s)
2575 i[1].append(v)
2576
2577 f.write("\n_lr_action_items = {")
2578 for k,v in items.items():
2579 f.write("%r:([" % k)
2580 for i in v[0]:
2581 f.write("%r," % i)
2582 f.write("],[")
2583 for i in v[1]:
2584 f.write("%r," % i)
2585
2586 f.write("]),")
2587 f.write("}\n")
2588
2589 f.write("""
2590_lr_action = { }
2591for _k, _v in _lr_action_items.items():
2592 for _x,_y in zip(_v[0],_v[1]):
2593 if not _x in _lr_action: _lr_action[_x] = { }
2594 _lr_action[_x][_k] = _y
2595del _lr_action_items
2596""")
2597
2598 else:
2599 f.write("\n_lr_action = { ");
2600 for k,v in self.lr_action.items():
2601 f.write("(%r,%r):%r," % (k[0],k[1],v))
2602 f.write("}\n");
2603
2604 if smaller:
2605 # Factor out names to try and make smaller
2606 items = { }
2607
2608 for s,nd in self.lr_goto.items():
2609 for name,v in nd.items():
2610 i = items.get(name)
2611 if not i:
2612 i = ([],[])
2613 items[name] = i
2614 i[0].append(s)
2615 i[1].append(v)
2616
2617 f.write("\n_lr_goto_items = {")
2618 for k,v in items.items():
2619 f.write("%r:([" % k)
2620 for i in v[0]:
2621 f.write("%r," % i)
2622 f.write("],[")
2623 for i in v[1]:
2624 f.write("%r," % i)
2625
2626 f.write("]),")
2627 f.write("}\n")
2628
2629 f.write("""
2630_lr_goto = { }
2631for _k, _v in _lr_goto_items.items():
2632 for _x,_y in zip(_v[0],_v[1]):
2633 if not _x in _lr_goto: _lr_goto[_x] = { }
2634 _lr_goto[_x][_k] = _y
2635del _lr_goto_items
2636""")
2637 else:
2638 f.write("\n_lr_goto = { ");
2639 for k,v in self.lr_goto.items():
2640 f.write("(%r,%r):%r," % (k[0],k[1],v))
2641 f.write("}\n");
2642
2643 # Write production table
2644 f.write("_lr_productions = [\n")
2645 for p in self.lr_productions:
2646 if p.func:
2647 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2648 else:
2649 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2650 f.write("]\n")
2651 f.close()
2652
2653 except IOError:
2654 e = sys.exc_info()[1]
2655 sys.stderr.write("Unable to create '%s'\n" % filename)
2656 sys.stderr.write(str(e)+"\n")
2657 return
2658
2659
2660 # -----------------------------------------------------------------------------
2661 # pickle_table()
2662 #
2663 # This function pickles the LR parsing tables to a supplied file object
2664 # -----------------------------------------------------------------------------
2665
2666 def pickle_table(self,filename,signature=""):
2667 try:
2668 import cPickle as pickle
2669 except ImportError:
2670 import pickle
2671 outf = open(filename,"wb")
2672 pickle.dump(__tabversion__,outf,pickle_protocol)
2673 pickle.dump(self.lr_method,outf,pickle_protocol)
2674 pickle.dump(signature,outf,pickle_protocol)
2675 pickle.dump(self.lr_action,outf,pickle_protocol)
2676 pickle.dump(self.lr_goto,outf,pickle_protocol)
2677
2678 outp = []
2679 for p in self.lr_productions:
2680 if p.func:
2681 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2682 else:
2683 outp.append((str(p),p.name,p.len,None,None,None))
2684 pickle.dump(outp,outf,pickle_protocol)
2685 outf.close()
2686
2687# -----------------------------------------------------------------------------
2688# === INTROSPECTION ===
2689#
2690# The following functions and classes are used to implement the PLY
2691# introspection features followed by the yacc() function itself.
2692# -----------------------------------------------------------------------------
2693
2694# -----------------------------------------------------------------------------
2695# get_caller_module_dict()
2696#
2697# This function returns a dictionary containing all of the symbols defined within
2698# a caller further down the call stack. This is used to get the environment
2699# associated with the yacc() call if none was provided.
2700# -----------------------------------------------------------------------------
2701
2702def get_caller_module_dict(levels):
2703 try:
2704 raise RuntimeError
2705 except RuntimeError:
2706 e,b,t = sys.exc_info()
2707 f = t.tb_frame
2708 while levels > 0:
2709 f = f.f_back
2710 levels -= 1
2711 ldict = f.f_globals.copy()
2712 if f.f_globals != f.f_locals:
2713 ldict.update(f.f_locals)
2714
2715 return ldict
2716
2717# -----------------------------------------------------------------------------
2718# parse_grammar()
2719#
2720# This takes a raw grammar rule string and parses it into production data
2721# -----------------------------------------------------------------------------
2722def parse_grammar(doc,file,line):
2723 grammar = []
2724 # Split the doc string into lines
2725 pstrings = doc.splitlines()
2726 lastp = None
2727 dline = line
2728 for ps in pstrings:
2729 dline += 1
2730 p = ps.split()
2731 if not p: continue
2732 try:
2733 if p[0] == '|':
2734 # This is a continuation of a previous rule
2735 if not lastp:
2736 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2737 prodname = lastp
2738 syms = p[1:]
2739 else:
2740 prodname = p[0]
2741 lastp = prodname
2742 syms = p[2:]
2743 assign = p[1]
2744 if assign != ':' and assign != '::=':
2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2746
2747 grammar.append((file,dline,prodname,syms))
2748 except SyntaxError:
2749 raise
2750 except Exception:
2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
2752
2753 return grammar
2754
2755# -----------------------------------------------------------------------------
2756# ParserReflect()
2757#
2758# This class represents information extracted for building a parser including
2759# start symbol, error function, tokens, precedence list, action functions,
2760# etc.
2761# -----------------------------------------------------------------------------
2762class ParserReflect(object):
2763 def __init__(self,pdict,log=None):
2764 self.pdict = pdict
2765 self.start = None
2766 self.error_func = None
2767 self.tokens = None
2768 self.files = {}
2769 self.grammar = []
2770 self.error = 0
2771
2772 if log is None:
2773 self.log = PlyLogger(sys.stderr)
2774 else:
2775 self.log = log
2776
2777 # Get all of the basic information
2778 def get_all(self):
2779 self.get_start()
2780 self.get_error_func()
2781 self.get_tokens()
2782 self.get_precedence()
2783 self.get_pfunctions()
2784
2785 # Validate all of the information
2786 def validate_all(self):
2787 self.validate_start()
2788 self.validate_error_func()
2789 self.validate_tokens()
2790 self.validate_precedence()
2791 self.validate_pfunctions()
2792 self.validate_files()
2793 return self.error
2794
2795 # Compute a signature over the grammar
2796 def signature(self):
2797 try:
2798 from hashlib import md5
2799 except ImportError:
2800 from md5 import md5
2801 try:
2802 sig = md5()
2803 if self.start:
2804 sig.update(self.start.encode('latin-1'))
2805 if self.prec:
2806 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2807 if self.tokens:
2808 sig.update(" ".join(self.tokens).encode('latin-1'))
2809 for f in self.pfuncs:
2810 if f[3]:
2811 sig.update(f[3].encode('latin-1'))
2812 except (TypeError,ValueError):
2813 pass
2814 return sig.digest()
2815
2816 # -----------------------------------------------------------------------------
2817 # validate_file()
2818 #
2819 # This method checks to see if there are duplicated p_rulename() functions
2820 # in the parser module file. Without this function, it is really easy for
2821 # users to make mistakes by cutting and pasting code fragments (and it's a real
2822 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2823 # we just do a little regular expression pattern matching of def statements
2824 # to try and detect duplicates.
2825 # -----------------------------------------------------------------------------
2826
2827 def validate_files(self):
2828 # Match def p_funcname(
2829 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2830
2831 for filename in self.files.keys():
2832 base,ext = os.path.splitext(filename)
2833 if ext != '.py': return 1 # No idea. Assume it's okay.
2834
2835 try:
2836 f = open(filename)
2837 lines = f.readlines()
2838 f.close()
2839 except IOError:
2840 continue
2841
2842 counthash = { }
2843 for linen,l in enumerate(lines):
2844 linen += 1
2845 m = fre.match(l)
2846 if m:
2847 name = m.group(1)
2848 prev = counthash.get(name)
2849 if not prev:
2850 counthash[name] = linen
2851 else:
2852 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2853
2854 # Get the start symbol
2855 def get_start(self):
2856 self.start = self.pdict.get('start')
2857
2858 # Validate the start symbol
2859 def validate_start(self):
2860 if self.start is not None:
2861 if not isinstance(self.start,str):
2862 self.log.error("'start' must be a string")
2863
2864 # Look for error handler
2865 def get_error_func(self):
2866 self.error_func = self.pdict.get('p_error')
2867
2868 # Validate the error function
2869 def validate_error_func(self):
2870 if self.error_func:
2871 if isinstance(self.error_func,types.FunctionType):
2872 ismethod = 0
2873 elif isinstance(self.error_func, types.MethodType):
2874 ismethod = 1
2875 else:
2876 self.log.error("'p_error' defined, but is not a function or method")
2877 self.error = 1
2878 return
2879
2880 eline = func_code(self.error_func).co_firstlineno
2881 efile = func_code(self.error_func).co_filename
2882 self.files[efile] = 1
2883
2884 if (func_code(self.error_func).co_argcount != 1+ismethod):
2885 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2886 self.error = 1
2887
2888 # Get the tokens map
2889 def get_tokens(self):
2890 tokens = self.pdict.get("tokens",None)
2891 if not tokens:
2892 self.log.error("No token list is defined")
2893 self.error = 1
2894 return
2895
2896 if not isinstance(tokens,(list, tuple)):
2897 self.log.error("tokens must be a list or tuple")
2898 self.error = 1
2899 return
2900
2901 if not tokens:
2902 self.log.error("tokens is empty")
2903 self.error = 1
2904 return
2905
2906 self.tokens = tokens
2907
2908 # Validate the tokens
2909 def validate_tokens(self):
2910 # Validate the tokens.
2911 if 'error' in self.tokens:
2912 self.log.error("Illegal token name 'error'. Is a reserved word")
2913 self.error = 1
2914 return
2915
2916 terminals = {}
2917 for n in self.tokens:
2918 if n in terminals:
2919 self.log.warning("Token '%s' multiply defined", n)
2920 terminals[n] = 1
2921
2922 # Get the precedence map (if any)
2923 def get_precedence(self):
2924 self.prec = self.pdict.get("precedence",None)
2925
2926 # Validate and parse the precedence map
2927 def validate_precedence(self):
2928 preclist = []
2929 if self.prec:
2930 if not isinstance(self.prec,(list,tuple)):
2931 self.log.error("precedence must be a list or tuple")
2932 self.error = 1
2933 return
2934 for level,p in enumerate(self.prec):
2935 if not isinstance(p,(list,tuple)):
2936 self.log.error("Bad precedence table")
2937 self.error = 1
2938 return
2939
2940 if len(p) < 2:
2941 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2942 self.error = 1
2943 return
2944 assoc = p[0]
2945 if not isinstance(assoc,str):
2946 self.log.error("precedence associativity must be a string")
2947 self.error = 1
2948 return
2949 for term in p[1:]:
2950 if not isinstance(term,str):
2951 self.log.error("precedence items must be strings")
2952 self.error = 1
2953 return
2954 preclist.append((term,assoc,level+1))
2955 self.preclist = preclist
2956
2957 # Get all p_functions from the grammar
2958 def get_pfunctions(self):
2959 p_functions = []
2960 for name, item in self.pdict.items():
2961 if name[:2] != 'p_': continue
2962 if name == 'p_error': continue
2963 if isinstance(item,(types.FunctionType,types.MethodType)):
2964 line = func_code(item).co_firstlineno
2965 file = func_code(item).co_filename
2966 p_functions.append((line,file,name,item.__doc__))
2967
2968 # Sort all of the actions by line number
2969 p_functions.sort()
2970 self.pfuncs = p_functions
2971
2972
2973 # Validate all of the p_functions
2974 def validate_pfunctions(self):
2975 grammar = []
2976 # Check for non-empty symbols
2977 if len(self.pfuncs) == 0:
2978 self.log.error("no rules of the form p_rulename are defined")
2979 self.error = 1
2980 return
2981
2982 for line, file, name, doc in self.pfuncs:
2983 func = self.pdict[name]
2984 if isinstance(func, types.MethodType):
2985 reqargs = 2
2986 else:
2987 reqargs = 1
2988 if func_code(func).co_argcount > reqargs:
2989 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
2990 self.error = 1
2991 elif func_code(func).co_argcount < reqargs:
2992 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
2993 self.error = 1
2994 elif not func.__doc__:
2995 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
2996 else:
2997 try:
2998 parsed_g = parse_grammar(doc,file,line)
2999 for g in parsed_g:
3000 grammar.append((name, g))
3001 except SyntaxError:
3002 e = sys.exc_info()[1]
3003 self.log.error(str(e))
3004 self.error = 1
3005
3006 # Looks like a valid grammar rule
3007 # Mark the file in which defined.
3008 self.files[file] = 1
3009
3010 # Secondary validation step that looks for p_ definitions that are not functions
3011 # or functions that look like they might be grammar rules.
3012
3013 for n,v in self.pdict.items():
3014 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
3015 if n[0:2] == 't_': continue
3016 if n[0:2] == 'p_' and n != 'p_error':
3017 self.log.warning("'%s' not defined as a function", n)
3018 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3019 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3020 try:
3021 doc = v.__doc__.split(" ")
3022 if doc[1] == ':':
3023 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3024 func_code(v).co_filename, func_code(v).co_firstlineno,n)
3025 except Exception:
3026 pass
3027
3028 self.grammar = grammar
3029
3030# -----------------------------------------------------------------------------
3031# yacc(module)
3032#
3033# Build a parser
3034# -----------------------------------------------------------------------------
3035
3036def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3037 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3038 debuglog=None, errorlog = None, picklefile=None):
3039
3040 global parse # Reference to the parsing method of the last built parser
3041
3042 # If pickling is enabled, table files are not created
3043
3044 if picklefile:
3045 write_tables = 0
3046
3047 if errorlog is None:
3048 errorlog = PlyLogger(sys.stderr)
3049
3050 # Get the module dictionary used for the parser
3051 if module:
3052 _items = [(k,getattr(module,k)) for k in dir(module)]
3053 pdict = dict(_items)
3054 else:
3055 pdict = get_caller_module_dict(2)
3056
3057 # Collect parser information from the dictionary
3058 pinfo = ParserReflect(pdict,log=errorlog)
3059 pinfo.get_all()
3060
3061 if pinfo.error:
3062 raise YaccError("Unable to build parser")
3063
3064 # Check signature against table files (if any)
3065 signature = pinfo.signature()
3066
3067 # Read the tables
3068 try:
3069 lr = LRTable()
3070 if picklefile:
3071 read_signature = lr.read_pickle(picklefile)
3072 else:
3073 read_signature = lr.read_table(tabmodule)
3074 if optimize or (read_signature == signature):
3075 try:
3076 lr.bind_callables(pinfo.pdict)
3077 parser = LRParser(lr,pinfo.error_func)
3078 parse = parser.parse
3079 return parser
3080 except Exception:
3081 e = sys.exc_info()[1]
3082 errorlog.warning("There was a problem loading the table file: %s", repr(e))
3083 except VersionError:
3084 e = sys.exc_info()
3085 errorlog.warning(str(e))
3086 except Exception:
3087 pass
3088
3089 if debuglog is None:
3090 if debug:
3091 debuglog = PlyLogger(open(debugfile,"w"))
3092 else:
3093 debuglog = NullLogger()
3094
3095 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3096
3097
3098 errors = 0
3099
3100 # Validate the parser information
3101 if pinfo.validate_all():
3102 raise YaccError("Unable to build parser")
3103
3104 if not pinfo.error_func:
3105 errorlog.warning("no p_error() function is defined")
3106
3107 # Create a grammar object
3108 grammar = Grammar(pinfo.tokens)
3109
3110 # Set precedence level for terminals
3111 for term, assoc, level in pinfo.preclist:
3112 try:
3113 grammar.set_precedence(term,assoc,level)
3114 except GrammarError:
3115 e = sys.exc_info()[1]
3116 errorlog.warning("%s",str(e))
3117
3118 # Add productions to the grammar
3119 for funcname, gram in pinfo.grammar:
3120 file, line, prodname, syms = gram
3121 try:
3122 grammar.add_production(prodname,syms,funcname,file,line)
3123 except GrammarError:
3124 e = sys.exc_info()[1]
3125 errorlog.error("%s",str(e))
3126 errors = 1
3127
3128 # Set the grammar start symbols
3129 try:
3130 if start is None:
3131 grammar.set_start(pinfo.start)
3132 else:
3133 grammar.set_start(start)
3134 except GrammarError:
3135 e = sys.exc_info()[1]
3136 errorlog.error(str(e))
3137 errors = 1
3138
3139 if errors:
3140 raise YaccError("Unable to build parser")
3141
3142 # Verify the grammar structure
3143 undefined_symbols = grammar.undefined_symbols()
3144 for sym, prod in undefined_symbols:
3145 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3146 errors = 1
3147
3148 unused_terminals = grammar.unused_terminals()
3149 if unused_terminals:
3150 debuglog.info("")
3151 debuglog.info("Unused terminals:")
3152 debuglog.info("")
3153 for term in unused_terminals:
3154 errorlog.warning("Token '%s' defined, but not used", term)
3155 debuglog.info(" %s", term)
3156
3157 # Print out all productions to the debug log
3158 if debug:
3159 debuglog.info("")
3160 debuglog.info("Grammar")
3161 debuglog.info("")
3162 for n,p in enumerate(grammar.Productions):
3163 debuglog.info("Rule %-5d %s", n, p)
3164
3165 # Find unused non-terminals
3166 unused_rules = grammar.unused_rules()
3167 for prod in unused_rules:
3168 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
3169
3170 if len(unused_terminals) == 1:
3171 errorlog.warning("There is 1 unused token")
3172 if len(unused_terminals) > 1:
3173 errorlog.warning("There are %d unused tokens", len(unused_terminals))
3174
3175 if len(unused_rules) == 1:
3176 errorlog.warning("There is 1 unused rule")
3177 if len(unused_rules) > 1:
3178 errorlog.warning("There are %d unused rules", len(unused_rules))
3179
3180 if debug:
3181 debuglog.info("")
3182 debuglog.info("Terminals, with rules where they appear")
3183 debuglog.info("")
3184 terms = list(grammar.Terminals)
3185 terms.sort()
3186 for term in terms:
3187 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3188
3189 debuglog.info("")
3190 debuglog.info("Nonterminals, with rules where they appear")
3191 debuglog.info("")
3192 nonterms = list(grammar.Nonterminals)
3193 nonterms.sort()
3194 for nonterm in nonterms:
3195 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3196 debuglog.info("")
3197
3198 if check_recursion:
3199 unreachable = grammar.find_unreachable()
3200 for u in unreachable:
3201 errorlog.warning("Symbol '%s' is unreachable",u)
3202
3203 infinite = grammar.infinite_cycles()
3204 for inf in infinite:
3205 errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3206 errors = 1
3207
3208 unused_prec = grammar.unused_precedence()
3209 for term, assoc in unused_prec:
3210 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
3211 errors = 1
3212
3213 if errors:
3214 raise YaccError("Unable to build parser")
3215
3216 # Run the LRGeneratedTable on the grammar
3217 if debug:
3218 errorlog.debug("Generating %s tables", method)
3219
3220 lr = LRGeneratedTable(grammar,method,debuglog)
3221
3222 if debug:
3223 num_sr = len(lr.sr_conflicts)
3224
3225 # Report shift/reduce and reduce/reduce conflicts
3226 if num_sr == 1:
3227 errorlog.warning("1 shift/reduce conflict")
3228 elif num_sr > 1:
3229 errorlog.warning("%d shift/reduce conflicts", num_sr)
3230
3231 num_rr = len(lr.rr_conflicts)
3232 if num_rr == 1:
3233 errorlog.warning("1 reduce/reduce conflict")
3234 elif num_rr > 1:
3235 errorlog.warning("%d reduce/reduce conflicts", num_rr)
3236
3237 # Write out conflicts to the output file
3238 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3239 debuglog.warning("")
3240 debuglog.warning("Conflicts:")
3241 debuglog.warning("")
3242
3243 for state, tok, resolution in lr.sr_conflicts:
3244 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
3245
3246 already_reported = {}
3247 for state, rule, rejected in lr.rr_conflicts:
3248 if (state,id(rule),id(rejected)) in already_reported:
3249 continue
3250 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3251 debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3252 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3253 errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3254 already_reported[state,id(rule),id(rejected)] = 1
3255
3256 warned_never = []
3257 for state, rule, rejected in lr.rr_conflicts:
3258 if not rejected.reduced and (rejected not in warned_never):
3259 debuglog.warning("Rule (%s) is never reduced", rejected)
3260 errorlog.warning("Rule (%s) is never reduced", rejected)
3261 warned_never.append(rejected)
3262
3263 # Write the table file if requested
3264 if write_tables:
3265 lr.write_table(tabmodule,outputdir,signature)
3266
3267 # Write a pickled version of the tables
3268 if picklefile:
3269 lr.pickle_table(picklefile,signature)
3270
3271 # Build the parser
3272 lr.bind_callables(pinfo.pdict)
3273 parser = LRParser(lr,pinfo.error_func)
3274
3275 parse = parser.parse
3276 return parser