Blame - import-layers/yocto-poky/bitbake/lib/ply/yacc.py - mdmillerii/openbmc

blob: 6168fd9a037621b03720d65fd1aa4e4a81b3f0de [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	# -----------------------------------------------------------------------------
				2	# ply: yacc.py
				3	#
				4	# Copyright (C) 2001-2009,
				5	# David M. Beazley (Dabeaz LLC)
				6	# All rights reserved.
				7	#
				8	# Redistribution and use in source and binary forms, with or without
				9	# modification, are permitted provided that the following conditions are
				10	# met:
				11	#
				12	# * Redistributions of source code must retain the above copyright notice,
				13	# this list of conditions and the following disclaimer.
				14	# * Redistributions in binary form must reproduce the above copyright notice,
				15	# this list of conditions and the following disclaimer in the documentation
				16	# and/or other materials provided with the distribution.
				17	# * Neither the name of the David Beazley or Dabeaz LLC may be used to
				18	# endorse or promote products derived from this software without
				19	# specific prior written permission.
				20	#
				21	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				22	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				23	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				24	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				25	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				26	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				27	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				28	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				29	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				30	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				31	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				32	# -----------------------------------------------------------------------------
				33	#
				34	# This implements an LR parser that is constructed from grammar rules defined
				35	# as Python functions. The grammer is specified by supplying the BNF inside
				36	# Python documentation strings. The inspiration for this technique was borrowed
				37	# from John Aycock's Spark parsing system. PLY might be viewed as cross between
				38	# Spark and the GNU bison utility.
				39	#
				40	# The current implementation is only somewhat object-oriented. The
				41	# LR parser itself is defined in terms of an object (which allows multiple
				42	# parsers to co-exist). However, most of the variables used during table
				43	# construction are defined in terms of global variables. Users shouldn't
				44	# notice unless they are trying to define multiple parsers at the same
				45	# time using threads (in which case they should have their head examined).
				46	#
				47	# This implementation supports both SLR and LALR(1) parsing. LALR(1)
				48	# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
				49	# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
				50	# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
				51	# by the more efficient DeRemer and Pennello algorithm.
				52	#
				53	# :::::::: WARNING :::::::
				54	#
				55	# Construction of LR parsing tables is fairly complicated and expensive.
				56	# To make this module run fast, a LOT of work has been put into
				57	# optimization---often at the expensive of readability and what might
				58	# consider to be good Python "coding style." Modify the code at your
				59	# own risk!
				60	# ----------------------------------------------------------------------------
				61
				62	__version__ = "3.3"
				63	__tabversion__ = "3.2" # Table version
				64
				65	#-----------------------------------------------------------------------------
				66	# === User configurable parameters ===
				67	#
				68	# Change these to modify the default behavior of yacc (if you wish)
				69	#-----------------------------------------------------------------------------
				70
				71	yaccdebug = 0 # Debugging mode. If set, yacc generates a
				72	# a 'parser.out' file in the current directory
				73
				74	debug_file = 'parser.out' # Default name of the debugging file
				75	tab_module = 'parsetab' # Default name of the table module
				76	default_lr = 'LALR' # Default LR table generation method
				77
				78	error_count = 3 # Number of symbols that must be shifted to leave recovery mode
				79
				80	yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
				81	# implementations of certain functions.
				82
				83	resultlimit = 40 # Size limit of results when running in debug mode.
				84
				85	pickle_protocol = 0 # Protocol to use when writing pickle files
				86
				87	import re, types, sys, os.path
				88
				89	# Compatibility function for python 2.6/3.0
				90	if sys.version_info[0] < 3:
				91	def func_code(f):
				92	return f.func_code
				93	else:
				94	def func_code(f):
				95	return f.__code__
				96
				97	# Compatibility
				98	try:
				99	MAXINT = sys.maxint
				100	except AttributeError:
				101	MAXINT = sys.maxsize
				102
				103	# Python 2.x/3.0 compatibility.
				104	def load_ply_lex():
				105	if sys.version_info[0] < 3:
				106	import lex
				107	else:
				108	import ply.lex as lex
				109	return lex
				110
				111	# This object is a stand-in for a logging object created by the
				112	# logging module. PLY will use this by default to create things
				113	# such as the parser.out file. If a user wants more detailed
				114	# information, they can create their own logging object and pass
				115	# it into PLY.
				116
				117	class PlyLogger(object):
				118	def __init__(self,f):
				119	self.f = f
				120	def debug(self,msg,args,*kwargs):
				121	self.f.write((msg % args) + "\n")
				122	info = debug
				123
				124	def warning(self,msg,args,*kwargs):
				125	self.f.write("WARNING: "+ (msg % args) + "\n")
				126
				127	def error(self,msg,args,*kwargs):
				128	self.f.write("ERROR: " + (msg % args) + "\n")
				129
				130	critical = debug
				131
				132	# Null logger is used when no output is generated. Does nothing.
				133	class NullLogger(object):
				134	def __getattribute__(self,name):
				135	return self
				136	def __call__(self,args,*kwargs):
				137	return self
				138
				139	# Exception raised for yacc-related errors
				140	class YaccError(Exception): pass
				141
				142	# Format the result message that the parser produces when running in debug mode.
				143	def format_result(r):
				144	repr_str = repr(r)
				145	if '\n' in repr_str: repr_str = repr(repr_str)
				146	if len(repr_str) > resultlimit:
				147	repr_str = repr_str[:resultlimit]+" ..."
				148	result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
				149	return result
				150
				151
				152	# Format stack entries when the parser is running in debug mode
				153	def format_stack_entry(r):
				154	repr_str = repr(r)
				155	if '\n' in repr_str: repr_str = repr(repr_str)
				156	if len(repr_str) < 16:
				157	return repr_str
				158	else:
				159	return "<%s @ 0x%x>" % (type(r).__name__,id(r))
				160
				161	#-----------------------------------------------------------------------------
				162	# === LR Parsing Engine ===
				163	#
				164	# The following classes are used for the LR parser itself. These are not
				165	# used during table construction and are independent of the actual LR
				166	# table generation algorithm
				167	#-----------------------------------------------------------------------------
				168
				169	# This class is used to hold non-terminal grammar symbols during parsing.
				170	# It normally has the following attributes set:
				171	# .type = Grammar symbol type
				172	# .value = Symbol value
				173	# .lineno = Starting line number
				174	# .endlineno = Ending line number (optional, set automatically)
				175	# .lexpos = Starting lex position
				176	# .endlexpos = Ending lex position (optional, set automatically)
				177
				178	class YaccSymbol:
				179	def __str__(self): return self.type
				180	def __repr__(self): return str(self)
				181
				182	# This class is a wrapper around the objects actually passed to each
				183	# grammar rule. Index lookup and assignment actually assign the
				184	# .value attribute of the underlying YaccSymbol object.
				185	# The lineno() method returns the line number of a given
				186	# item (or 0 if not defined). The linespan() method returns
				187	# a tuple of (startline,endline) representing the range of lines
				188	# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
				189	# representing the range of positional information for a symbol.
				190
				191	class YaccProduction:
				192	def __init__(self,s,stack=None):
				193	self.slice = s
				194	self.stack = stack
				195	self.lexer = None
				196	self.parser= None
				197	def __getitem__(self,n):
				198	if n >= 0: return self.slice[n].value
				199	else: return self.stack[n].value
				200
				201	def __setitem__(self,n,v):
				202	self.slice[n].value = v
				203
				204	def __getslice__(self,i,j):
				205	return [s.value for s in self.slice[i:j]]
				206
				207	def __len__(self):
				208	return len(self.slice)
				209
				210	def lineno(self,n):
				211	return getattr(self.slice[n],"lineno",0)
				212
				213	def set_lineno(self,n,lineno):
				214	self.slice[n].lineno = lineno
				215
				216	def linespan(self,n):
				217	startline = getattr(self.slice[n],"lineno",0)
				218	endline = getattr(self.slice[n],"endlineno",startline)
				219	return startline,endline
				220
				221	def lexpos(self,n):
				222	return getattr(self.slice[n],"lexpos",0)
				223
				224	def lexspan(self,n):
				225	startpos = getattr(self.slice[n],"lexpos",0)
				226	endpos = getattr(self.slice[n],"endlexpos",startpos)
				227	return startpos,endpos
				228
				229	def error(self):
				230	raise SyntaxError
				231
				232
				233	# -----------------------------------------------------------------------------
				234	# == LRParser ==
				235	#
				236	# The LR Parsing engine.
				237	# -----------------------------------------------------------------------------
				238
				239	class LRParser:
				240	def __init__(self,lrtab,errorf):
				241	self.productions = lrtab.lr_productions
				242	self.action = lrtab.lr_action
				243	self.goto = lrtab.lr_goto
				244	self.errorfunc = errorf
				245
				246	def errok(self):
				247	self.errorok = 1
				248
				249	def restart(self):
				250	del self.statestack[:]
				251	del self.symstack[:]
				252	sym = YaccSymbol()
				253	sym.type = '$end'
				254	self.symstack.append(sym)
				255	self.statestack.append(0)
				256
				257	def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				258	if debug or yaccdevel:
				259	if isinstance(debug,int):
				260	debug = PlyLogger(sys.stderr)
				261	return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
				262	elif tracking:
				263	return self.parseopt(input,lexer,debug,tracking,tokenfunc)
				264	else:
				265	return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
				266
				267
				268	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				269	# parsedebug().
				270	#
				271	# This is the debugging enabled version of parse(). All changes made to the
				272	# parsing engine should be made here. For the non-debugging version,
				273	# copy this code to a method parseopt() and delete all of the sections
				274	# enclosed in:
				275	#
				276	# #--! DEBUG
				277	# statements
				278	# #--! DEBUG
				279	#
				280	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				281
				282	def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
				283	lookahead = None # Current lookahead symbol
				284	lookaheadstack = [ ] # Stack of lookahead symbols
				285	actions = self.action # Local reference to action table (to avoid lookup on self.)
				286	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				287	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				288	pslice = YaccProduction(None) # Production object passed to grammar rules
				289	errorcount = 0 # Used during error recovery
				290
				291	# --! DEBUG
				292	debug.info("PLY: PARSE DEBUG START")
				293	# --! DEBUG
				294
				295	# If no lexer was given, we will try to use the lex module
				296	if not lexer:
				297	lex = load_ply_lex()
				298	lexer = lex.lexer
				299
				300	# Set up the lexer and parser objects on pslice
				301	pslice.lexer = lexer
				302	pslice.parser = self
				303
				304	# If input was supplied, pass to lexer
				305	if input is not None:
				306	lexer.input(input)
				307
				308	if tokenfunc is None:
				309	# Tokenize function
				310	get_token = lexer.token
				311	else:
				312	get_token = tokenfunc
				313
				314	# Set up the state and symbol stacks
				315
				316	statestack = [ ] # Stack of parsing states
				317	self.statestack = statestack
				318	symstack = [ ] # Stack of grammar symbols
				319	self.symstack = symstack
				320
				321	pslice.stack = symstack # Put in the production
				322	errtoken = None # Err token
				323
				324	# The start state is assumed to be (0,$end)
				325
				326	statestack.append(0)
				327	sym = YaccSymbol()
				328	sym.type = "$end"
				329	symstack.append(sym)
				330	state = 0
				331	while 1:
				332	# Get the next symbol on the input. If a lookahead symbol
				333	# is already set, we just use that. Otherwise, we'll pull
				334	# the next token off of the lookaheadstack or from the lexer
				335
				336	# --! DEBUG
				337	debug.debug('')
				338	debug.debug('State : %s', state)
				339	# --! DEBUG
				340
				341	if not lookahead:
				342	if not lookaheadstack:
				343	lookahead = get_token() # Get the next token
				344	else:
				345	lookahead = lookaheadstack.pop()
				346	if not lookahead:
				347	lookahead = YaccSymbol()
				348	lookahead.type = "$end"
				349
				350	# --! DEBUG
				351	debug.debug('Stack : %s',
				352	("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
				353	# --! DEBUG
				354
				355	# Check the action table
				356	ltype = lookahead.type
				357	t = actions[state].get(ltype)
				358
				359	if t is not None:
				360	if t > 0:
				361	# shift a symbol on the stack
				362	statestack.append(t)
				363	state = t
				364
				365	# --! DEBUG
				366	debug.debug("Action : Shift and goto state %s", t)
				367	# --! DEBUG
				368
				369	symstack.append(lookahead)
				370	lookahead = None
				371
				372	# Decrease error count on successful shift
				373	if errorcount: errorcount -=1
				374	continue
				375
				376	if t < 0:
				377	# reduce a symbol on the stack, emit a production
				378	p = prod[-t]
				379	pname = p.name
				380	plen = p.len
				381
				382	# Get production function
				383	sym = YaccSymbol()
				384	sym.type = pname # Production name
				385	sym.value = None
				386
				387	# --! DEBUG
				388	if plen:
				389	debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
				390	else:
				391	debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
				392
				393	# --! DEBUG
				394
				395	if plen:
				396	targ = symstack[-plen-1:]
				397	targ[0] = sym
				398
				399	# --! TRACKING
				400	if tracking:
				401	t1 = targ[1]
				402	sym.lineno = t1.lineno
				403	sym.lexpos = t1.lexpos
				404	t1 = targ[-1]
				405	sym.endlineno = getattr(t1,"endlineno",t1.lineno)
				406	sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
				407
				408	# --! TRACKING
				409
				410	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				411	# The code enclosed in this section is duplicated
				412	# below as a performance optimization. Make sure
				413	# changes get made in both locations.
				414
				415	pslice.slice = targ
				416
				417	try:
				418	# Call the grammar rule with our special slice object
				419	del symstack[-plen:]
				420	del statestack[-plen:]
				421	p.callable(pslice)
				422	# --! DEBUG
				423	debug.info("Result : %s", format_result(pslice[0]))
				424	# --! DEBUG
				425	symstack.append(sym)
				426	state = goto[statestack[-1]][pname]
				427	statestack.append(state)
				428	except SyntaxError:
				429	# If an error was set. Enter error recovery state
				430	lookaheadstack.append(lookahead)
				431	symstack.pop()
				432	statestack.pop()
				433	state = statestack[-1]
				434	sym.type = 'error'
				435	lookahead = sym
				436	errorcount = error_count
				437	self.errorok = 0
				438	continue
				439	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				440
				441	else:
				442
				443	# --! TRACKING
				444	if tracking:
				445	sym.lineno = lexer.lineno
				446	sym.lexpos = lexer.lexpos
				447	# --! TRACKING
				448
				449	targ = [ sym ]
				450
				451	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				452	# The code enclosed in this section is duplicated
				453	# above as a performance optimization. Make sure
				454	# changes get made in both locations.
				455
				456	pslice.slice = targ
				457
				458	try:
				459	# Call the grammar rule with our special slice object
				460	p.callable(pslice)
				461	# --! DEBUG
				462	debug.info("Result : %s", format_result(pslice[0]))
				463	# --! DEBUG
				464	symstack.append(sym)
				465	state = goto[statestack[-1]][pname]
				466	statestack.append(state)
				467	except SyntaxError:
				468	# If an error was set. Enter error recovery state
				469	lookaheadstack.append(lookahead)
				470	symstack.pop()
				471	statestack.pop()
				472	state = statestack[-1]
				473	sym.type = 'error'
				474	lookahead = sym
				475	errorcount = error_count
				476	self.errorok = 0
				477	continue
				478	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				479
				480	if t == 0:
				481	n = symstack[-1]
				482	result = getattr(n,"value",None)
				483	# --! DEBUG
				484	debug.info("Done : Returning %s", format_result(result))
				485	debug.info("PLY: PARSE DEBUG END")
				486	# --! DEBUG
				487	return result
				488
				489	if t == None:
				490
				491	# --! DEBUG
				492	debug.error('Error : %s',
				493	("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
				494	# --! DEBUG
				495
				496	# We have some kind of parsing error here. To handle
				497	# this, we are going to push the current token onto
				498	# the tokenstack and replace it with an 'error' token.
				499	# If there are any synchronization rules, they may
				500	# catch it.
				501	#
				502	# In addition to pushing the error token, we call call
				503	# the user defined p_error() function if this is the
				504	# first syntax error. This function is only called if
				505	# errorcount == 0.
				506	if errorcount == 0 or self.errorok:
				507	errorcount = error_count
				508	self.errorok = 0
				509	errtoken = lookahead
				510	if errtoken.type == "$end":
				511	errtoken = None # End of file!
				512	if self.errorfunc:
				513	global errok,token,restart
				514	errok = self.errok # Set some special functions available in error recovery
				515	token = get_token
				516	restart = self.restart
				517	if errtoken and not hasattr(errtoken,'lexer'):
				518	errtoken.lexer = lexer
				519	tok = self.errorfunc(errtoken)
				520	del errok, token, restart # Delete special functions
				521
				522	if self.errorok:
				523	# User must have done some kind of panic
				524	# mode recovery on their own. The
				525	# returned token is the next lookahead
				526	lookahead = tok
				527	errtoken = None
				528	continue
				529	else:
				530	if errtoken:
				531	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				532	else: lineno = 0
				533	if lineno:
				534	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				535	else:
				536	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				537	else:
				538	sys.stderr.write("yacc: Parse error in input. EOF\n")
				539	return
				540
				541	else:
				542	errorcount = error_count
				543
				544	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				545	# entire parse has been rolled back and we're completely hosed. The token is
				546	# discarded and we just keep going.
				547
				548	if len(statestack) <= 1 and lookahead.type != "$end":
				549	lookahead = None
				550	errtoken = None
				551	state = 0
				552	# Nuke the pushback stack
				553	del lookaheadstack[:]
				554	continue
				555
				556	# case 2: the statestack has a couple of entries on it, but we're
				557	# at the end of the file. nuke the top entry and generate an error token
				558
				559	# Start nuking entries on the stack
				560	if lookahead.type == "$end":
				561	# Whoa. We're really hosed here. Bail out
				562	return
				563
				564	if lookahead.type != 'error':
				565	sym = symstack[-1]
				566	if sym.type == 'error':
				567	# Hmmm. Error is on top of stack, we'll just nuke input
				568	# symbol and continue
				569	lookahead = None
				570	continue
				571	t = YaccSymbol()
				572	t.type = 'error'
				573	if hasattr(lookahead,"lineno"):
				574	t.lineno = lookahead.lineno
				575	t.value = lookahead
				576	lookaheadstack.append(lookahead)
				577	lookahead = t
				578	else:
				579	symstack.pop()
				580	statestack.pop()
				581	state = statestack[-1] # Potential bug fix
				582
				583	continue
				584
				585	# Call an error function here
				586	raise RuntimeError("yacc: internal parser error!!!\n")
				587
				588	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				589	# parseopt().
				590	#
				591	# Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
				592	# Edit the debug version above, then copy any modifications to the method
				593	# below while removing #--! DEBUG sections.
				594	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				595
				596
				597	def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				598	lookahead = None # Current lookahead symbol
				599	lookaheadstack = [ ] # Stack of lookahead symbols
				600	actions = self.action # Local reference to action table (to avoid lookup on self.)
				601	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				602	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				603	pslice = YaccProduction(None) # Production object passed to grammar rules
				604	errorcount = 0 # Used during error recovery
				605
				606	# If no lexer was given, we will try to use the lex module
				607	if not lexer:
				608	lex = load_ply_lex()
				609	lexer = lex.lexer
				610
				611	# Set up the lexer and parser objects on pslice
				612	pslice.lexer = lexer
				613	pslice.parser = self
				614
				615	# If input was supplied, pass to lexer
				616	if input is not None:
				617	lexer.input(input)
				618
				619	if tokenfunc is None:
				620	# Tokenize function
				621	get_token = lexer.token
				622	else:
				623	get_token = tokenfunc
				624
				625	# Set up the state and symbol stacks
				626
				627	statestack = [ ] # Stack of parsing states
				628	self.statestack = statestack
				629	symstack = [ ] # Stack of grammar symbols
				630	self.symstack = symstack
				631
				632	pslice.stack = symstack # Put in the production
				633	errtoken = None # Err token
				634
				635	# The start state is assumed to be (0,$end)
				636
				637	statestack.append(0)
				638	sym = YaccSymbol()
				639	sym.type = '$end'
				640	symstack.append(sym)
				641	state = 0
				642	while 1:
				643	# Get the next symbol on the input. If a lookahead symbol
				644	# is already set, we just use that. Otherwise, we'll pull
				645	# the next token off of the lookaheadstack or from the lexer
				646
				647	if not lookahead:
				648	if not lookaheadstack:
				649	lookahead = get_token() # Get the next token
				650	else:
				651	lookahead = lookaheadstack.pop()
				652	if not lookahead:
				653	lookahead = YaccSymbol()
				654	lookahead.type = '$end'
				655
				656	# Check the action table
				657	ltype = lookahead.type
				658	t = actions[state].get(ltype)
				659
				660	if t is not None:
				661	if t > 0:
				662	# shift a symbol on the stack
				663	statestack.append(t)
				664	state = t
				665
				666	symstack.append(lookahead)
				667	lookahead = None
				668
				669	# Decrease error count on successful shift
				670	if errorcount: errorcount -=1
				671	continue
				672
				673	if t < 0:
				674	# reduce a symbol on the stack, emit a production
				675	p = prod[-t]
				676	pname = p.name
				677	plen = p.len
				678
				679	# Get production function
				680	sym = YaccSymbol()
				681	sym.type = pname # Production name
				682	sym.value = None
				683
				684	if plen:
				685	targ = symstack[-plen-1:]
				686	targ[0] = sym
				687
				688	# --! TRACKING
				689	if tracking:
				690	t1 = targ[1]
				691	sym.lineno = t1.lineno
				692	sym.lexpos = t1.lexpos
				693	t1 = targ[-1]
				694	sym.endlineno = getattr(t1,"endlineno",t1.lineno)
				695	sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
				696
				697	# --! TRACKING
				698
				699	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				700	# The code enclosed in this section is duplicated
				701	# below as a performance optimization. Make sure
				702	# changes get made in both locations.
				703
				704	pslice.slice = targ
				705
				706	try:
				707	# Call the grammar rule with our special slice object
				708	del symstack[-plen:]
				709	del statestack[-plen:]
				710	p.callable(pslice)
				711	symstack.append(sym)
				712	state = goto[statestack[-1]][pname]
				713	statestack.append(state)
				714	except SyntaxError:
				715	# If an error was set. Enter error recovery state
				716	lookaheadstack.append(lookahead)
				717	symstack.pop()
				718	statestack.pop()
				719	state = statestack[-1]
				720	sym.type = 'error'
				721	lookahead = sym
				722	errorcount = error_count
				723	self.errorok = 0
				724	continue
				725	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				726
				727	else:
				728
				729	# --! TRACKING
				730	if tracking:
				731	sym.lineno = lexer.lineno
				732	sym.lexpos = lexer.lexpos
				733	# --! TRACKING
				734
				735	targ = [ sym ]
				736
				737	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				738	# The code enclosed in this section is duplicated
				739	# above as a performance optimization. Make sure
				740	# changes get made in both locations.
				741
				742	pslice.slice = targ
				743
				744	try:
				745	# Call the grammar rule with our special slice object
				746	p.callable(pslice)
				747	symstack.append(sym)
				748	state = goto[statestack[-1]][pname]
				749	statestack.append(state)
				750	except SyntaxError:
				751	# If an error was set. Enter error recovery state
				752	lookaheadstack.append(lookahead)
				753	symstack.pop()
				754	statestack.pop()
				755	state = statestack[-1]
				756	sym.type = 'error'
				757	lookahead = sym
				758	errorcount = error_count
				759	self.errorok = 0
				760	continue
				761	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				762
				763	if t == 0:
				764	n = symstack[-1]
				765	return getattr(n,"value",None)
				766
				767	if t == None:
				768
				769	# We have some kind of parsing error here. To handle
				770	# this, we are going to push the current token onto
				771	# the tokenstack and replace it with an 'error' token.
				772	# If there are any synchronization rules, they may
				773	# catch it.
				774	#
				775	# In addition to pushing the error token, we call call
				776	# the user defined p_error() function if this is the
				777	# first syntax error. This function is only called if
				778	# errorcount == 0.
				779	if errorcount == 0 or self.errorok:
				780	errorcount = error_count
				781	self.errorok = 0
				782	errtoken = lookahead
				783	if errtoken.type == '$end':
				784	errtoken = None # End of file!
				785	if self.errorfunc:
				786	global errok,token,restart
				787	errok = self.errok # Set some special functions available in error recovery
				788	token = get_token
				789	restart = self.restart
				790	if errtoken and not hasattr(errtoken,'lexer'):
				791	errtoken.lexer = lexer
				792	tok = self.errorfunc(errtoken)
				793	del errok, token, restart # Delete special functions
				794
				795	if self.errorok:
				796	# User must have done some kind of panic
				797	# mode recovery on their own. The
				798	# returned token is the next lookahead
				799	lookahead = tok
				800	errtoken = None
				801	continue
				802	else:
				803	if errtoken:
				804	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				805	else: lineno = 0
				806	if lineno:
				807	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				808	else:
				809	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				810	else:
				811	sys.stderr.write("yacc: Parse error in input. EOF\n")
				812	return
				813
				814	else:
				815	errorcount = error_count
				816
				817	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				818	# entire parse has been rolled back and we're completely hosed. The token is
				819	# discarded and we just keep going.
				820
				821	if len(statestack) <= 1 and lookahead.type != '$end':
				822	lookahead = None
				823	errtoken = None
				824	state = 0
				825	# Nuke the pushback stack
				826	del lookaheadstack[:]
				827	continue
				828
				829	# case 2: the statestack has a couple of entries on it, but we're
				830	# at the end of the file. nuke the top entry and generate an error token
				831
				832	# Start nuking entries on the stack
				833	if lookahead.type == '$end':
				834	# Whoa. We're really hosed here. Bail out
				835	return
				836
				837	if lookahead.type != 'error':
				838	sym = symstack[-1]
				839	if sym.type == 'error':
				840	# Hmmm. Error is on top of stack, we'll just nuke input
				841	# symbol and continue
				842	lookahead = None
				843	continue
				844	t = YaccSymbol()
				845	t.type = 'error'
				846	if hasattr(lookahead,"lineno"):
				847	t.lineno = lookahead.lineno
				848	t.value = lookahead
				849	lookaheadstack.append(lookahead)
				850	lookahead = t
				851	else:
				852	symstack.pop()
				853	statestack.pop()
				854	state = statestack[-1] # Potential bug fix
				855
				856	continue
				857
				858	# Call an error function here
				859	raise RuntimeError("yacc: internal parser error!!!\n")
				860
				861	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				862	# parseopt_notrack().
				863	#
				864	# Optimized version of parseopt() with line number tracking removed.
				865	# DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
				866	# code in the #--! TRACKING sections
				867	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				868
				869	def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				870	lookahead = None # Current lookahead symbol
				871	lookaheadstack = [ ] # Stack of lookahead symbols
				872	actions = self.action # Local reference to action table (to avoid lookup on self.)
				873	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				874	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				875	pslice = YaccProduction(None) # Production object passed to grammar rules
				876	errorcount = 0 # Used during error recovery
				877
				878	# If no lexer was given, we will try to use the lex module
				879	if not lexer:
				880	lex = load_ply_lex()
				881	lexer = lex.lexer
				882
				883	# Set up the lexer and parser objects on pslice
				884	pslice.lexer = lexer
				885	pslice.parser = self
				886
				887	# If input was supplied, pass to lexer
				888	if input is not None:
				889	lexer.input(input)
				890
				891	if tokenfunc is None:
				892	# Tokenize function
				893	get_token = lexer.token
				894	else:
				895	get_token = tokenfunc
				896
				897	# Set up the state and symbol stacks
				898
				899	statestack = [ ] # Stack of parsing states
				900	self.statestack = statestack
				901	symstack = [ ] # Stack of grammar symbols
				902	self.symstack = symstack
				903
				904	pslice.stack = symstack # Put in the production
				905	errtoken = None # Err token
				906
				907	# The start state is assumed to be (0,$end)
				908
				909	statestack.append(0)
				910	sym = YaccSymbol()
				911	sym.type = '$end'
				912	symstack.append(sym)
				913	state = 0
				914	while 1:
				915	# Get the next symbol on the input. If a lookahead symbol
				916	# is already set, we just use that. Otherwise, we'll pull
				917	# the next token off of the lookaheadstack or from the lexer
				918
				919	if not lookahead:
				920	if not lookaheadstack:
				921	lookahead = get_token() # Get the next token
				922	else:
				923	lookahead = lookaheadstack.pop()
				924	if not lookahead:
				925	lookahead = YaccSymbol()
				926	lookahead.type = '$end'
				927
				928	# Check the action table
				929	ltype = lookahead.type
				930	t = actions[state].get(ltype)
				931
				932	if t is not None:
				933	if t > 0:
				934	# shift a symbol on the stack
				935	statestack.append(t)
				936	state = t
				937
				938	symstack.append(lookahead)
				939	lookahead = None
				940
				941	# Decrease error count on successful shift
				942	if errorcount: errorcount -=1
				943	continue
				944
				945	if t < 0:
				946	# reduce a symbol on the stack, emit a production
				947	p = prod[-t]
				948	pname = p.name
				949	plen = p.len
				950
				951	# Get production function
				952	sym = YaccSymbol()
				953	sym.type = pname # Production name
				954	sym.value = None
				955
				956	if plen:
				957	targ = symstack[-plen-1:]
				958	targ[0] = sym
				959
				960	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				961	# The code enclosed in this section is duplicated
				962	# below as a performance optimization. Make sure
				963	# changes get made in both locations.
				964
				965	pslice.slice = targ
				966
				967	try:
				968	# Call the grammar rule with our special slice object
				969	del symstack[-plen:]
				970	del statestack[-plen:]
				971	p.callable(pslice)
				972	symstack.append(sym)
				973	state = goto[statestack[-1]][pname]
				974	statestack.append(state)
				975	except SyntaxError:
				976	# If an error was set. Enter error recovery state
				977	lookaheadstack.append(lookahead)
				978	symstack.pop()
				979	statestack.pop()
				980	state = statestack[-1]
				981	sym.type = 'error'
				982	lookahead = sym
				983	errorcount = error_count
				984	self.errorok = 0
				985	continue
				986	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				987
				988	else:
				989
				990	targ = [ sym ]
				991
				992	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				993	# The code enclosed in this section is duplicated
				994	# above as a performance optimization. Make sure
				995	# changes get made in both locations.
				996
				997	pslice.slice = targ
				998
				999	try:
				1000	# Call the grammar rule with our special slice object
				1001	p.callable(pslice)
				1002	symstack.append(sym)
				1003	state = goto[statestack[-1]][pname]
				1004	statestack.append(state)
				1005	except SyntaxError:
				1006	# If an error was set. Enter error recovery state
				1007	lookaheadstack.append(lookahead)
				1008	symstack.pop()
				1009	statestack.pop()
				1010	state = statestack[-1]
				1011	sym.type = 'error'
				1012	lookahead = sym
				1013	errorcount = error_count
				1014	self.errorok = 0
				1015	continue
				1016	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				1017
				1018	if t == 0:
				1019	n = symstack[-1]
				1020	return getattr(n,"value",None)
				1021
				1022	if t == None:
				1023
				1024	# We have some kind of parsing error here. To handle
				1025	# this, we are going to push the current token onto
				1026	# the tokenstack and replace it with an 'error' token.
				1027	# If there are any synchronization rules, they may
				1028	# catch it.
				1029	#
				1030	# In addition to pushing the error token, we call call
				1031	# the user defined p_error() function if this is the
				1032	# first syntax error. This function is only called if
				1033	# errorcount == 0.
				1034	if errorcount == 0 or self.errorok:
				1035	errorcount = error_count
				1036	self.errorok = 0
				1037	errtoken = lookahead
				1038	if errtoken.type == '$end':
				1039	errtoken = None # End of file!
				1040	if self.errorfunc:
				1041	global errok,token,restart
				1042	errok = self.errok # Set some special functions available in error recovery
				1043	token = get_token
				1044	restart = self.restart
				1045	if errtoken and not hasattr(errtoken,'lexer'):
				1046	errtoken.lexer = lexer
				1047	tok = self.errorfunc(errtoken)
				1048	del errok, token, restart # Delete special functions
				1049
				1050	if self.errorok:
				1051	# User must have done some kind of panic
				1052	# mode recovery on their own. The
				1053	# returned token is the next lookahead
				1054	lookahead = tok
				1055	errtoken = None
				1056	continue
				1057	else:
				1058	if errtoken:
				1059	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				1060	else: lineno = 0
				1061	if lineno:
				1062	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				1063	else:
				1064	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				1065	else:
				1066	sys.stderr.write("yacc: Parse error in input. EOF\n")
				1067	return
				1068
				1069	else:
				1070	errorcount = error_count
				1071
				1072	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				1073	# entire parse has been rolled back and we're completely hosed. The token is
				1074	# discarded and we just keep going.
				1075
				1076	if len(statestack) <= 1 and lookahead.type != '$end':
				1077	lookahead = None
				1078	errtoken = None
				1079	state = 0
				1080	# Nuke the pushback stack
				1081	del lookaheadstack[:]
				1082	continue
				1083
				1084	# case 2: the statestack has a couple of entries on it, but we're
				1085	# at the end of the file. nuke the top entry and generate an error token
				1086
				1087	# Start nuking entries on the stack
				1088	if lookahead.type == '$end':
				1089	# Whoa. We're really hosed here. Bail out
				1090	return
				1091
				1092	if lookahead.type != 'error':
				1093	sym = symstack[-1]
				1094	if sym.type == 'error':
				1095	# Hmmm. Error is on top of stack, we'll just nuke input
				1096	# symbol and continue
				1097	lookahead = None
				1098	continue
				1099	t = YaccSymbol()
				1100	t.type = 'error'
				1101	if hasattr(lookahead,"lineno"):
				1102	t.lineno = lookahead.lineno
				1103	t.value = lookahead
				1104	lookaheadstack.append(lookahead)
				1105	lookahead = t
				1106	else:
				1107	symstack.pop()
				1108	statestack.pop()
				1109	state = statestack[-1] # Potential bug fix
				1110
				1111	continue
				1112
				1113	# Call an error function here
				1114	raise RuntimeError("yacc: internal parser error!!!\n")
				1115
				1116	# -----------------------------------------------------------------------------
				1117	# === Grammar Representation ===
				1118	#
				1119	# The following functions, classes, and variables are used to represent and
				1120	# manipulate the rules that make up a grammar.
				1121	# -----------------------------------------------------------------------------
				1122
				1123	import re
				1124
				1125	# regex matching identifiers
				1126	_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
				1127
				1128	# -----------------------------------------------------------------------------
				1129	# class Production:
				1130	#
				1131	# This class stores the raw information about a single production or grammar rule.
				1132	# A grammar rule refers to a specification such as this:
				1133	#
				1134	# expr : expr PLUS term
				1135	#
				1136	# Here are the basic attributes defined on all productions
				1137	#
				1138	# name - Name of the production. For example 'expr'
				1139	# prod - A list of symbols on the right side ['expr','PLUS','term']
				1140	# prec - Production precedence level
				1141	# number - Production number.
				1142	# func - Function that executes on reduce
				1143	# file - File where production function is defined
				1144	# lineno - Line number where production function is defined
				1145	#
				1146	# The following attributes are defined or optional.
				1147	#
				1148	# len - Length of the production (number of symbols on right hand side)
				1149	# usyms - Set of unique symbols found in the production
				1150	# -----------------------------------------------------------------------------
				1151
				1152	class Production(object):
				1153	reduced = 0
				1154	def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
				1155	self.name = name
				1156	self.prod = tuple(prod)
				1157	self.number = number
				1158	self.func = func
				1159	self.callable = None
				1160	self.file = file
				1161	self.line = line
				1162	self.prec = precedence
				1163
				1164	# Internal settings used during table construction
				1165
				1166	self.len = len(self.prod) # Length of the production
				1167
				1168	# Create a list of unique production symbols used in the production
				1169	self.usyms = [ ]
				1170	for s in self.prod:
				1171	if s not in self.usyms:
				1172	self.usyms.append(s)
				1173
				1174	# List of all LR items for the production
				1175	self.lr_items = []
				1176	self.lr_next = None
				1177
				1178	# Create a string representation
				1179	if self.prod:
				1180	self.str = "%s -> %s" % (self.name," ".join(self.prod))
				1181	else:
				1182	self.str = "%s -> <empty>" % self.name
				1183
				1184	def __str__(self):
				1185	return self.str
				1186
				1187	def __repr__(self):
				1188	return "Production("+str(self)+")"
				1189
				1190	def __len__(self):
				1191	return len(self.prod)
				1192
				1193	def __nonzero__(self):
				1194	return 1
				1195
				1196	def __getitem__(self,index):
				1197	return self.prod[index]
				1198
				1199	# Return the nth lr_item from the production (or None if at the end)
				1200	def lr_item(self,n):
				1201	if n > len(self.prod): return None
				1202	p = LRItem(self,n)
				1203
				1204	# Precompute the list of productions immediately following. Hack. Remove later
				1205	try:
				1206	p.lr_after = Prodnames[p.prod[n+1]]
				1207	except (IndexError,KeyError):
				1208	p.lr_after = []
				1209	try:
				1210	p.lr_before = p.prod[n-1]
				1211	except IndexError:
				1212	p.lr_before = None
				1213
				1214	return p
				1215
				1216	# Bind the production function name to a callable
				1217	def bind(self,pdict):
				1218	if self.func:
				1219	self.callable = pdict[self.func]
				1220
				1221	# This class serves as a minimal standin for Production objects when
				1222	# reading table data from files. It only contains information
				1223	# actually used by the LR parsing engine, plus some additional
				1224	# debugging information.
				1225	class MiniProduction(object):
				1226	def __init__(self,str,name,len,func,file,line):
				1227	self.name = name
				1228	self.len = len
				1229	self.func = func
				1230	self.callable = None
				1231	self.file = file
				1232	self.line = line
				1233	self.str = str
				1234	def __str__(self):
				1235	return self.str
				1236	def __repr__(self):
				1237	return "MiniProduction(%s)" % self.str
				1238
				1239	# Bind the production function name to a callable
				1240	def bind(self,pdict):
				1241	if self.func:
				1242	self.callable = pdict[self.func]
				1243
				1244
				1245	# -----------------------------------------------------------------------------
				1246	# class LRItem
				1247	#
				1248	# This class represents a specific stage of parsing a production rule. For
				1249	# example:
				1250	#
				1251	# expr : expr . PLUS term
				1252	#
				1253	# In the above, the "." represents the current location of the parse. Here
				1254	# basic attributes:
				1255	#
				1256	# name - Name of the production. For example 'expr'
				1257	# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
				1258	# number - Production number.
				1259	#
				1260	# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
				1261	# then lr_next refers to 'expr -> expr PLUS . term'
				1262	# lr_index - LR item index (location of the ".") in the prod list.
				1263	# lookaheads - LALR lookahead symbols for this item
				1264	# len - Length of the production (number of symbols on right hand side)
				1265	# lr_after - List of all productions that immediately follow
				1266	# lr_before - Grammar symbol immediately before
				1267	# -----------------------------------------------------------------------------
				1268
				1269	class LRItem(object):
				1270	def __init__(self,p,n):
				1271	self.name = p.name
				1272	self.prod = list(p.prod)
				1273	self.number = p.number
				1274	self.lr_index = n
				1275	self.lookaheads = { }
				1276	self.prod.insert(n,".")
				1277	self.prod = tuple(self.prod)
				1278	self.len = len(self.prod)
				1279	self.usyms = p.usyms
				1280
				1281	def __str__(self):
				1282	if self.prod:
				1283	s = "%s -> %s" % (self.name," ".join(self.prod))
				1284	else:
				1285	s = "%s -> <empty>" % self.name
				1286	return s
				1287
				1288	def __repr__(self):
				1289	return "LRItem("+str(self)+")"
				1290
				1291	# -----------------------------------------------------------------------------
				1292	# rightmost_terminal()
				1293	#
				1294	# Return the rightmost terminal from a list of symbols. Used in add_production()
				1295	# -----------------------------------------------------------------------------
				1296	def rightmost_terminal(symbols, terminals):
				1297	i = len(symbols) - 1
				1298	while i >= 0:
				1299	if symbols[i] in terminals:
				1300	return symbols[i]
				1301	i -= 1
				1302	return None
				1303
				1304	# -----------------------------------------------------------------------------
				1305	# === GRAMMAR CLASS ===
				1306	#
				1307	# The following class represents the contents of the specified grammar along
				1308	# with various computed properties such as first sets, follow sets, LR items, etc.
				1309	# This data is used for critical parts of the table generation process later.
				1310	# -----------------------------------------------------------------------------
				1311
				1312	class GrammarError(YaccError): pass
				1313
				1314	class Grammar(object):
				1315	def __init__(self,terminals):
				1316	self.Productions = [None] # A list of all of the productions. The first
				1317	# entry is always reserved for the purpose of
				1318	# building an augmented grammar
				1319
				1320	self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
				1321	# productions of that nonterminal.
				1322
				1323	self.Prodmap = { } # A dictionary that is only used to detect duplicate
				1324	# productions.
				1325
				1326	self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
				1327	# list of the rules where they are used.
				1328
				1329	for term in terminals:
				1330	self.Terminals[term] = []
				1331
				1332	self.Terminals['error'] = []
				1333
				1334	self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
				1335	# of rule numbers where they are used.
				1336
				1337	self.First = { } # A dictionary of precomputed FIRST(x) symbols
				1338
				1339	self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
				1340
				1341	self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the
				1342	# form ('right',level) or ('nonassoc', level) or ('left',level)
				1343
				1344	self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
				1345	# This is only used to provide error checking and to generate
				1346	# a warning about unused precedence rules.
				1347
				1348	self.Start = None # Starting symbol for the grammar
				1349
				1350
				1351	def __len__(self):
				1352	return len(self.Productions)
				1353
				1354	def __getitem__(self,index):
				1355	return self.Productions[index]
				1356
				1357	# -----------------------------------------------------------------------------
				1358	# set_precedence()
				1359	#
				1360	# Sets the precedence for a given terminal. assoc is the associativity such as
				1361	# 'left','right', or 'nonassoc'. level is a numeric level.
				1362	#
				1363	# -----------------------------------------------------------------------------
				1364
				1365	def set_precedence(self,term,assoc,level):
				1366	assert self.Productions == [None],"Must call set_precedence() before add_production()"
				1367	if term in self.Precedence:
				1368	raise GrammarError("Precedence already specified for terminal '%s'" % term)
				1369	if assoc not in ['left','right','nonassoc']:
				1370	raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
				1371	self.Precedence[term] = (assoc,level)
				1372
				1373	# -----------------------------------------------------------------------------
				1374	# add_production()
				1375	#
				1376	# Given an action function, this function assembles a production rule and
				1377	# computes its precedence level.
				1378	#
				1379	# The production rule is supplied as a list of symbols. For example,
				1380	# a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
				1381	# symbols ['expr','PLUS','term'].
				1382	#
				1383	# Precedence is determined by the precedence of the right-most non-terminal
				1384	# or the precedence of a terminal specified by %prec.
				1385	#
				1386	# A variety of error checks are performed to make sure production symbols
				1387	# are valid and that %prec is used correctly.
				1388	# -----------------------------------------------------------------------------
				1389
				1390	def add_production(self,prodname,syms,func=None,file='',line=0):
				1391
				1392	if prodname in self.Terminals:
				1393	raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
				1394	if prodname == 'error':
				1395	raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
				1396	if not _is_identifier.match(prodname):
				1397	raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
				1398
				1399	# Look for literal tokens
				1400	for n,s in enumerate(syms):
				1401	if s[0] in "'\"":
				1402	try:
				1403	c = eval(s)
				1404	if (len(c) > 1):
				1405	raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
				1406	if not c in self.Terminals:
				1407	self.Terminals[c] = []
				1408	syms[n] = c
				1409	continue
				1410	except SyntaxError:
				1411	pass
				1412	if not _is_identifier.match(s) and s != '%prec':
				1413	raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
				1414
				1415	# Determine the precedence level
				1416	if '%prec' in syms:
				1417	if syms[-1] == '%prec':
				1418	raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
				1419	if syms[-2] != '%prec':
				1420	raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
				1421	precname = syms[-1]
				1422	prodprec = self.Precedence.get(precname,None)
				1423	if not prodprec:
				1424	raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
				1425	else:
				1426	self.UsedPrecedence[precname] = 1
				1427	del syms[-2:] # Drop %prec from the rule
				1428	else:
				1429	# If no %prec, precedence is determined by the rightmost terminal symbol
				1430	precname = rightmost_terminal(syms,self.Terminals)
				1431	prodprec = self.Precedence.get(precname,('right',0))
				1432
				1433	# See if the rule is already in the rulemap
				1434	map = "%s -> %s" % (prodname,syms)
				1435	if map in self.Prodmap:
				1436	m = self.Prodmap[map]
				1437	raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
				1438	"Previous definition at %s:%d" % (m.file, m.line))
				1439
				1440	# From this point on, everything is valid. Create a new Production instance
				1441	pnumber = len(self.Productions)
				1442	if not prodname in self.Nonterminals:
				1443	self.Nonterminals[prodname] = [ ]
				1444
				1445	# Add the production number to Terminals and Nonterminals
				1446	for t in syms:
				1447	if t in self.Terminals:
				1448	self.Terminals[t].append(pnumber)
				1449	else:
				1450	if not t in self.Nonterminals:
				1451	self.Nonterminals[t] = [ ]
				1452	self.Nonterminals[t].append(pnumber)
				1453
				1454	# Create a production and add it to the list of productions
				1455	p = Production(pnumber,prodname,syms,prodprec,func,file,line)
				1456	self.Productions.append(p)
				1457	self.Prodmap[map] = p
				1458
				1459	# Add to the global productions list
				1460	try:
				1461	self.Prodnames[prodname].append(p)
				1462	except KeyError:
				1463	self.Prodnames[prodname] = [ p ]
				1464	return 0
				1465
				1466	# -----------------------------------------------------------------------------
				1467	# set_start()
				1468	#
				1469	# Sets the starting symbol and creates the augmented grammar. Production
				1470	# rule 0 is S' -> start where start is the start symbol.
				1471	# -----------------------------------------------------------------------------
				1472
				1473	def set_start(self,start=None):
				1474	if not start:
				1475	start = self.Productions[1].name
				1476	if start not in self.Nonterminals:
				1477	raise GrammarError("start symbol %s undefined" % start)
				1478	self.Productions[0] = Production(0,"S'",[start])
				1479	self.Nonterminals[start].append(0)
				1480	self.Start = start
				1481
				1482	# -----------------------------------------------------------------------------
				1483	# find_unreachable()
				1484	#
				1485	# Find all of the nonterminal symbols that can't be reached from the starting
				1486	# symbol. Returns a list of nonterminals that can't be reached.
				1487	# -----------------------------------------------------------------------------
				1488
				1489	def find_unreachable(self):
				1490
				1491	# Mark all symbols that are reachable from a symbol s
				1492	def mark_reachable_from(s):
				1493	if reachable[s]:
				1494	# We've already reached symbol s.
				1495	return
				1496	reachable[s] = 1
				1497	for p in self.Prodnames.get(s,[]):
				1498	for r in p.prod:
				1499	mark_reachable_from(r)
				1500
				1501	reachable = { }
				1502	for s in list(self.Terminals) + list(self.Nonterminals):
				1503	reachable[s] = 0
				1504
				1505	mark_reachable_from( self.Productions[0].prod[0] )
				1506
				1507	return [s for s in list(self.Nonterminals)
				1508	if not reachable[s]]
				1509
				1510	# -----------------------------------------------------------------------------
				1511	# infinite_cycles()
				1512	#
				1513	# This function looks at the various parsing rules and tries to detect
				1514	# infinite recursion cycles (grammar rules where there is no possible way
				1515	# to derive a string of only terminals).
				1516	# -----------------------------------------------------------------------------
				1517
				1518	def infinite_cycles(self):
				1519	terminates = {}
				1520
				1521	# Terminals:
				1522	for t in self.Terminals:
				1523	terminates[t] = 1
				1524
				1525	terminates['$end'] = 1
				1526
				1527	# Nonterminals:
				1528
				1529	# Initialize to false:
				1530	for n in self.Nonterminals:
				1531	terminates[n] = 0
				1532
				1533	# Then propagate termination until no change:
				1534	while 1:
				1535	some_change = 0
				1536	for (n,pl) in self.Prodnames.items():
				1537	# Nonterminal n terminates iff any of its productions terminates.
				1538	for p in pl:
				1539	# Production p terminates iff all of its rhs symbols terminate.
				1540	for s in p.prod:
				1541	if not terminates[s]:
				1542	# The symbol s does not terminate,
				1543	# so production p does not terminate.
				1544	p_terminates = 0
				1545	break
				1546	else:
				1547	# didn't break from the loop,
				1548	# so every symbol s terminates
				1549	# so production p terminates.
				1550	p_terminates = 1
				1551
				1552	if p_terminates:
				1553	# symbol n terminates!
				1554	if not terminates[n]:
				1555	terminates[n] = 1
				1556	some_change = 1
				1557	# Don't need to consider any more productions for this n.
				1558	break
				1559
				1560	if not some_change:
				1561	break
				1562
				1563	infinite = []
				1564	for (s,term) in terminates.items():
				1565	if not term:
				1566	if not s in self.Prodnames and not s in self.Terminals and s != 'error':
				1567	# s is used-but-not-defined, and we've already warned of that,
				1568	# so it would be overkill to say that it's also non-terminating.
				1569	pass
				1570	else:
				1571	infinite.append(s)
				1572
				1573	return infinite
				1574
				1575
				1576	# -----------------------------------------------------------------------------
				1577	# undefined_symbols()
				1578	#
				1579	# Find all symbols that were used the grammar, but not defined as tokens or
				1580	# grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
				1581	# and prod is the production where the symbol was used.
				1582	# -----------------------------------------------------------------------------
				1583	def undefined_symbols(self):
				1584	result = []
				1585	for p in self.Productions:
				1586	if not p: continue
				1587
				1588	for s in p.prod:
				1589	if not s in self.Prodnames and not s in self.Terminals and s != 'error':
				1590	result.append((s,p))
				1591	return result
				1592
				1593	# -----------------------------------------------------------------------------
				1594	# unused_terminals()
				1595	#
				1596	# Find all terminals that were defined, but not used by the grammar. Returns
				1597	# a list of all symbols.
				1598	# -----------------------------------------------------------------------------
				1599	def unused_terminals(self):
				1600	unused_tok = []
				1601	for s,v in self.Terminals.items():
				1602	if s != 'error' and not v:
				1603	unused_tok.append(s)
				1604
				1605	return unused_tok
				1606
				1607	# ------------------------------------------------------------------------------
				1608	# unused_rules()
				1609	#
				1610	# Find all grammar rules that were defined, but not used (maybe not reachable)
				1611	# Returns a list of productions.
				1612	# ------------------------------------------------------------------------------
				1613
				1614	def unused_rules(self):
				1615	unused_prod = []
				1616	for s,v in self.Nonterminals.items():
				1617	if not v:
				1618	p = self.Prodnames[s][0]
				1619	unused_prod.append(p)
				1620	return unused_prod
				1621
				1622	# -----------------------------------------------------------------------------
				1623	# unused_precedence()
				1624	#
				1625	# Returns a list of tuples (term,precedence) corresponding to precedence
				1626	# rules that were never used by the grammar. term is the name of the terminal
				1627	# on which precedence was applied and precedence is a string such as 'left' or
				1628	# 'right' corresponding to the type of precedence.
				1629	# -----------------------------------------------------------------------------
				1630
				1631	def unused_precedence(self):
				1632	unused = []
				1633	for termname in self.Precedence:
				1634	if not (termname in self.Terminals or termname in self.UsedPrecedence):
				1635	unused.append((termname,self.Precedence[termname][0]))
				1636
				1637	return unused
				1638
				1639	# -------------------------------------------------------------------------
				1640	# _first()
				1641	#
				1642	# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
				1643	#
				1644	# During execution of compute_first1, the result may be incomplete.
				1645	# Afterward (e.g., when called from compute_follow()), it will be complete.
				1646	# -------------------------------------------------------------------------
				1647	def _first(self,beta):
				1648
				1649	# We are computing First(x1,x2,x3,...,xn)
				1650	result = [ ]
				1651	for x in beta:
				1652	x_produces_empty = 0
				1653
				1654	# Add all the non-<empty> symbols of First[x] to the result.
				1655	for f in self.First[x]:
				1656	if f == '<empty>':
				1657	x_produces_empty = 1
				1658	else:
				1659	if f not in result: result.append(f)
				1660
				1661	if x_produces_empty:
				1662	# We have to consider the next x in beta,
				1663	# i.e. stay in the loop.
				1664	pass
				1665	else:
				1666	# We don't have to consider any further symbols in beta.
				1667	break
				1668	else:
				1669	# There was no 'break' from the loop,
				1670	# so x_produces_empty was true for all x in beta,
				1671	# so beta produces empty as well.
				1672	result.append('<empty>')
				1673
				1674	return result
				1675
				1676	# -------------------------------------------------------------------------
				1677	# compute_first()
				1678	#
				1679	# Compute the value of FIRST1(X) for all symbols
				1680	# -------------------------------------------------------------------------
				1681	def compute_first(self):
				1682	if self.First:
				1683	return self.First
				1684
				1685	# Terminals:
				1686	for t in self.Terminals:
				1687	self.First[t] = [t]
				1688
				1689	self.First['$end'] = ['$end']
				1690
				1691	# Nonterminals:
				1692
				1693	# Initialize to the empty set:
				1694	for n in self.Nonterminals:
				1695	self.First[n] = []
				1696
				1697	# Then propagate symbols until no change:
				1698	while 1:
				1699	some_change = 0
				1700	for n in self.Nonterminals:
				1701	for p in self.Prodnames[n]:
				1702	for f in self._first(p.prod):
				1703	if f not in self.First[n]:
				1704	self.First[n].append( f )
				1705	some_change = 1
				1706	if not some_change:
				1707	break
				1708
				1709	return self.First
				1710
				1711	# ---------------------------------------------------------------------
				1712	# compute_follow()
				1713	#
				1714	# Computes all of the follow sets for every non-terminal symbol. The
				1715	# follow set is the set of all symbols that might follow a given
				1716	# non-terminal. See the Dragon book, 2nd Ed. p. 189.
				1717	# ---------------------------------------------------------------------
				1718	def compute_follow(self,start=None):
				1719	# If already computed, return the result
				1720	if self.Follow:
				1721	return self.Follow
				1722
				1723	# If first sets not computed yet, do that first.
				1724	if not self.First:
				1725	self.compute_first()
				1726
				1727	# Add '$end' to the follow list of the start symbol
				1728	for k in self.Nonterminals:
				1729	self.Follow[k] = [ ]
				1730
				1731	if not start:
				1732	start = self.Productions[1].name
				1733
				1734	self.Follow[start] = [ '$end' ]
				1735
				1736	while 1:
				1737	didadd = 0
				1738	for p in self.Productions[1:]:
				1739	# Here is the production set
				1740	for i in range(len(p.prod)):
				1741	B = p.prod[i]
				1742	if B in self.Nonterminals:
				1743	# Okay. We got a non-terminal in a production
				1744	fst = self._first(p.prod[i+1:])
				1745	hasempty = 0
				1746	for f in fst:
				1747	if f != '<empty>' and f not in self.Follow[B]:
				1748	self.Follow[B].append(f)
				1749	didadd = 1
				1750	if f == '<empty>':
				1751	hasempty = 1
				1752	if hasempty or i == (len(p.prod)-1):
				1753	# Add elements of follow(a) to follow(b)
				1754	for f in self.Follow[p.name]:
				1755	if f not in self.Follow[B]:
				1756	self.Follow[B].append(f)
				1757	didadd = 1
				1758	if not didadd: break
				1759	return self.Follow
				1760
				1761
				1762	# -----------------------------------------------------------------------------
				1763	# build_lritems()
				1764	#
				1765	# This function walks the list of productions and builds a complete set of the
				1766	# LR items. The LR items are stored in two ways: First, they are uniquely
				1767	# numbered and placed in the list _lritems. Second, a linked list of LR items
				1768	# is built for each production. For example:
				1769	#
				1770	# E -> E PLUS E
				1771	#
				1772	# Creates the list
				1773	#
				1774	# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
				1775	# -----------------------------------------------------------------------------
				1776
				1777	def build_lritems(self):
				1778	for p in self.Productions:
				1779	lastlri = p
				1780	i = 0
				1781	lr_items = []
				1782	while 1:
				1783	if i > len(p):
				1784	lri = None
				1785	else:
				1786	lri = LRItem(p,i)
				1787	# Precompute the list of productions immediately following
				1788	try:
				1789	lri.lr_after = self.Prodnames[lri.prod[i+1]]
				1790	except (IndexError,KeyError):
				1791	lri.lr_after = []
				1792	try:
				1793	lri.lr_before = lri.prod[i-1]
				1794	except IndexError:
				1795	lri.lr_before = None
				1796
				1797	lastlri.lr_next = lri
				1798	if not lri: break
				1799	lr_items.append(lri)
				1800	lastlri = lri
				1801	i += 1
				1802	p.lr_items = lr_items
				1803
				1804	# -----------------------------------------------------------------------------
				1805	# == Class LRTable ==
				1806	#
				1807	# This basic class represents a basic table of LR parsing information.
				1808	# Methods for generating the tables are not defined here. They are defined
				1809	# in the derived class LRGeneratedTable.
				1810	# -----------------------------------------------------------------------------
				1811
				1812	class VersionError(YaccError): pass
				1813
				1814	class LRTable(object):
				1815	def __init__(self):
				1816	self.lr_action = None
				1817	self.lr_goto = None
				1818	self.lr_productions = None
				1819	self.lr_method = None
				1820
				1821	def read_table(self,module):
				1822	if isinstance(module,types.ModuleType):
				1823	parsetab = module
				1824	else:
				1825	if sys.version_info[0] < 3:
				1826	exec("import %s as parsetab" % module)
				1827	else:
				1828	env = { }
				1829	exec("import %s as parsetab" % module, env, env)
				1830	parsetab = env['parsetab']
				1831
				1832	if parsetab._tabversion != __tabversion__:
				1833	raise VersionError("yacc table file version is out of date")
				1834
				1835	self.lr_action = parsetab._lr_action
				1836	self.lr_goto = parsetab._lr_goto
				1837
				1838	self.lr_productions = []
				1839	for p in parsetab._lr_productions:
				1840	self.lr_productions.append(MiniProduction(*p))
				1841
				1842	self.lr_method = parsetab._lr_method
				1843	return parsetab._lr_signature
				1844
				1845	def read_pickle(self,filename):
				1846	try:
				1847	import cPickle as pickle
				1848	except ImportError:
				1849	import pickle
				1850
				1851	in_f = open(filename,"rb")
				1852
				1853	tabversion = pickle.load(in_f)
				1854	if tabversion != __tabversion__:
				1855	raise VersionError("yacc table file version is out of date")
				1856	self.lr_method = pickle.load(in_f)
				1857	signature = pickle.load(in_f)
				1858	self.lr_action = pickle.load(in_f)
				1859	self.lr_goto = pickle.load(in_f)
				1860	productions = pickle.load(in_f)
				1861
				1862	self.lr_productions = []
				1863	for p in productions:
				1864	self.lr_productions.append(MiniProduction(*p))
				1865
				1866	in_f.close()
				1867	return signature
				1868
				1869	# Bind all production function names to callable objects in pdict
				1870	def bind_callables(self,pdict):
				1871	for p in self.lr_productions:
				1872	p.bind(pdict)
				1873
				1874	# -----------------------------------------------------------------------------
				1875	# === LR Generator ===
				1876	#
				1877	# The following classes and functions are used to generate LR parsing tables on
				1878	# a grammar.
				1879	# -----------------------------------------------------------------------------
				1880
				1881	# -----------------------------------------------------------------------------
				1882	# digraph()
				1883	# traverse()
				1884	#
				1885	# The following two functions are used to compute set valued functions
				1886	# of the form:
				1887	#
				1888	# F(x) = F'(x) U U{F(y) \| x R y}
				1889	#
				1890	# This is used to compute the values of Read() sets as well as FOLLOW sets
				1891	# in LALR(1) generation.
				1892	#
				1893	# Inputs: X - An input set
				1894	# R - A relation
				1895	# FP - Set-valued function
				1896	# ------------------------------------------------------------------------------
				1897
				1898	def digraph(X,R,FP):
				1899	N = { }
				1900	for x in X:
				1901	N[x] = 0
				1902	stack = []
				1903	F = { }
				1904	for x in X:
				1905	if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
				1906	return F
				1907
				1908	def traverse(x,N,stack,F,X,R,FP):
				1909	stack.append(x)
				1910	d = len(stack)
				1911	N[x] = d
				1912	F[x] = FP(x) # F(X) <- F'(x)
				1913
				1914	rel = R(x) # Get y's related to x
				1915	for y in rel:
				1916	if N[y] == 0:
				1917	traverse(y,N,stack,F,X,R,FP)
				1918	N[x] = min(N[x],N[y])
				1919	for a in F.get(y,[]):
				1920	if a not in F[x]: F[x].append(a)
				1921	if N[x] == d:
				1922	N[stack[-1]] = MAXINT
				1923	F[stack[-1]] = F[x]
				1924	element = stack.pop()
				1925	while element != x:
				1926	N[stack[-1]] = MAXINT
				1927	F[stack[-1]] = F[x]
				1928	element = stack.pop()
				1929
				1930	class LALRError(YaccError): pass
				1931
				1932	# -----------------------------------------------------------------------------
				1933	# == LRGeneratedTable ==
				1934	#
				1935	# This class implements the LR table generation algorithm. There are no
				1936	# public methods except for write()
				1937	# -----------------------------------------------------------------------------
				1938
				1939	class LRGeneratedTable(LRTable):
				1940	def __init__(self,grammar,method='LALR',log=None):
				1941	if method not in ['SLR','LALR']:
				1942	raise LALRError("Unsupported method %s" % method)
				1943
				1944	self.grammar = grammar
				1945	self.lr_method = method
				1946
				1947	# Set up the logger
				1948	if not log:
				1949	log = NullLogger()
				1950	self.log = log
				1951
				1952	# Internal attributes
				1953	self.lr_action = {} # Action table
				1954	self.lr_goto = {} # Goto table
				1955	self.lr_productions = grammar.Productions # Copy of grammar Production array
				1956	self.lr_goto_cache = {} # Cache of computed gotos
				1957	self.lr0_cidhash = {} # Cache of closures
				1958
				1959	self._add_count = 0 # Internal counter used to detect cycles
				1960
				1961	# Diagonistic information filled in by the table generator
				1962	self.sr_conflict = 0
				1963	self.rr_conflict = 0
				1964	self.conflicts = [] # List of conflicts
				1965
				1966	self.sr_conflicts = []
				1967	self.rr_conflicts = []
				1968
				1969	# Build the tables
				1970	self.grammar.build_lritems()
				1971	self.grammar.compute_first()
				1972	self.grammar.compute_follow()
				1973	self.lr_parse_table()
				1974
				1975	# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
				1976
				1977	def lr0_closure(self,I):
				1978	self._add_count += 1
				1979
				1980	# Add everything in I to J
				1981	J = I[:]
				1982	didadd = 1
				1983	while didadd:
				1984	didadd = 0
				1985	for j in J:
				1986	for x in j.lr_after:
				1987	if getattr(x,"lr0_added",0) == self._add_count: continue
				1988	# Add B --> .G to J
				1989	J.append(x.lr_next)
				1990	x.lr0_added = self._add_count
				1991	didadd = 1
				1992
				1993	return J
				1994
				1995	# Compute the LR(0) goto function goto(I,X) where I is a set
				1996	# of LR(0) items and X is a grammar symbol. This function is written
				1997	# in a way that guarantees uniqueness of the generated goto sets
				1998	# (i.e. the same goto set will never be returned as two different Python
				1999	# objects). With uniqueness, we can later do fast set comparisons using
				2000	# id(obj) instead of element-wise comparison.
				2001
				2002	def lr0_goto(self,I,x):
				2003	# First we look for a previously cached entry
				2004	g = self.lr_goto_cache.get((id(I),x),None)
				2005	if g: return g
				2006
				2007	# Now we generate the goto set in a way that guarantees uniqueness
				2008	# of the result
				2009
				2010	s = self.lr_goto_cache.get(x,None)
				2011	if not s:
				2012	s = { }
				2013	self.lr_goto_cache[x] = s
				2014
				2015	gs = [ ]
				2016	for p in I:
				2017	n = p.lr_next
				2018	if n and n.lr_before == x:
				2019	s1 = s.get(id(n),None)
				2020	if not s1:
				2021	s1 = { }
				2022	s[id(n)] = s1
				2023	gs.append(n)
				2024	s = s1
				2025	g = s.get('$end',None)
				2026	if not g:
				2027	if gs:
				2028	g = self.lr0_closure(gs)
				2029	s['$end'] = g
				2030	else:
				2031	s['$end'] = gs
				2032	self.lr_goto_cache[(id(I),x)] = g
				2033	return g
				2034
				2035	# Compute the LR(0) sets of item function
				2036	def lr0_items(self):
				2037
				2038	C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
				2039	i = 0
				2040	for I in C:
				2041	self.lr0_cidhash[id(I)] = i
				2042	i += 1
				2043
				2044	# Loop over the items in C and each grammar symbols
				2045	i = 0
				2046	while i < len(C):
				2047	I = C[i]
				2048	i += 1
				2049
				2050	# Collect all of the symbols that could possibly be in the goto(I,X) sets
				2051	asyms = { }
				2052	for ii in I:
				2053	for s in ii.usyms:
				2054	asyms[s] = None
				2055
				2056	for x in asyms:
				2057	g = self.lr0_goto(I,x)
				2058	if not g: continue
				2059	if id(g) in self.lr0_cidhash: continue
				2060	self.lr0_cidhash[id(g)] = len(C)
				2061	C.append(g)
				2062
				2063	return C
				2064
				2065	# -----------------------------------------------------------------------------
				2066	# ==== LALR(1) Parsing ====
				2067	#
				2068	# LALR(1) parsing is almost exactly the same as SLR except that instead of
				2069	# relying upon Follow() sets when performing reductions, a more selective
				2070	# lookahead set that incorporates the state of the LR(0) machine is utilized.
				2071	# Thus, we mainly just have to focus on calculating the lookahead sets.
				2072	#
				2073	# The method used here is due to DeRemer and Pennelo (1982).
				2074	#
				2075	# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
				2076	# Lookahead Sets", ACM Transactions on Programming Languages and Systems,
				2077	# Vol. 4, No. 4, Oct. 1982, pp. 615-649
				2078	#
				2079	# Further details can also be found in:
				2080	#
				2081	# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
				2082	# McGraw-Hill Book Company, (1985).
				2083	#
				2084	# -----------------------------------------------------------------------------
				2085
				2086	# -----------------------------------------------------------------------------
				2087	# compute_nullable_nonterminals()
				2088	#
				2089	# Creates a dictionary containing all of the non-terminals that might produce
				2090	# an empty production.
				2091	# -----------------------------------------------------------------------------
				2092
				2093	def compute_nullable_nonterminals(self):
				2094	nullable = {}
				2095	num_nullable = 0
				2096	while 1:
				2097	for p in self.grammar.Productions[1:]:
				2098	if p.len == 0:
				2099	nullable[p.name] = 1
				2100	continue
				2101	for t in p.prod:
				2102	if not t in nullable: break
				2103	else:
				2104	nullable[p.name] = 1
				2105	if len(nullable) == num_nullable: break
				2106	num_nullable = len(nullable)
				2107	return nullable
				2108
				2109	# -----------------------------------------------------------------------------
				2110	# find_nonterminal_trans(C)
				2111	#
				2112	# Given a set of LR(0) items, this functions finds all of the non-terminal
				2113	# transitions. These are transitions in which a dot appears immediately before
				2114	# a non-terminal. Returns a list of tuples of the form (state,N) where state
				2115	# is the state number and N is the nonterminal symbol.
				2116	#
				2117	# The input C is the set of LR(0) items.
				2118	# -----------------------------------------------------------------------------
				2119
				2120	def find_nonterminal_transitions(self,C):
				2121	trans = []
				2122	for state in range(len(C)):
				2123	for p in C[state]:
				2124	if p.lr_index < p.len - 1:
				2125	t = (state,p.prod[p.lr_index+1])
				2126	if t[1] in self.grammar.Nonterminals:
				2127	if t not in trans: trans.append(t)
				2128	state = state + 1
				2129	return trans
				2130
				2131	# -----------------------------------------------------------------------------
				2132	# dr_relation()
				2133	#
				2134	# Computes the DR(p,A) relationships for non-terminal transitions. The input
				2135	# is a tuple (state,N) where state is a number and N is a nonterminal symbol.
				2136	#
				2137	# Returns a list of terminals.
				2138	# -----------------------------------------------------------------------------
				2139
				2140	def dr_relation(self,C,trans,nullable):
				2141	dr_set = { }
				2142	state,N = trans
				2143	terms = []
				2144
				2145	g = self.lr0_goto(C[state],N)
				2146	for p in g:
				2147	if p.lr_index < p.len - 1:
				2148	a = p.prod[p.lr_index+1]
				2149	if a in self.grammar.Terminals:
				2150	if a not in terms: terms.append(a)
				2151
				2152	# This extra bit is to handle the start state
				2153	if state == 0 and N == self.grammar.Productions[0].prod[0]:
				2154	terms.append('$end')
				2155
				2156	return terms
				2157
				2158	# -----------------------------------------------------------------------------
				2159	# reads_relation()
				2160	#
				2161	# Computes the READS() relation (p,A) READS (t,C).
				2162	# -----------------------------------------------------------------------------
				2163
				2164	def reads_relation(self,C, trans, empty):
				2165	# Look for empty transitions
				2166	rel = []
				2167	state, N = trans
				2168
				2169	g = self.lr0_goto(C[state],N)
				2170	j = self.lr0_cidhash.get(id(g),-1)
				2171	for p in g:
				2172	if p.lr_index < p.len - 1:
				2173	a = p.prod[p.lr_index + 1]
				2174	if a in empty:
				2175	rel.append((j,a))
				2176
				2177	return rel
				2178
				2179	# -----------------------------------------------------------------------------
				2180	# compute_lookback_includes()
				2181	#
				2182	# Determines the lookback and includes relations
				2183	#
				2184	# LOOKBACK:
				2185	#
				2186	# This relation is determined by running the LR(0) state machine forward.
				2187	# For example, starting with a production "N : . A B C", we run it forward
				2188	# to obtain "N : A B C ." We then build a relationship between this final
				2189	# state and the starting state. These relationships are stored in a dictionary
				2190	# lookdict.
				2191	#
				2192	# INCLUDES:
				2193	#
				2194	# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
				2195	#
				2196	# This relation is used to determine non-terminal transitions that occur
				2197	# inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
				2198	# if the following holds:
				2199	#
				2200	# B -> LAT, where T -> epsilon and p' -L-> p
				2201	#
				2202	# L is essentially a prefix (which may be empty), T is a suffix that must be
				2203	# able to derive an empty string. State p' must lead to state p with the string L.
				2204	#
				2205	# -----------------------------------------------------------------------------
				2206
				2207	def compute_lookback_includes(self,C,trans,nullable):
				2208
				2209	lookdict = {} # Dictionary of lookback relations
				2210	includedict = {} # Dictionary of include relations
				2211
				2212	# Make a dictionary of non-terminal transitions
				2213	dtrans = {}
				2214	for t in trans:
				2215	dtrans[t] = 1
				2216
				2217	# Loop over all transitions and compute lookbacks and includes
				2218	for state,N in trans:
				2219	lookb = []
				2220	includes = []
				2221	for p in C[state]:
				2222	if p.name != N: continue
				2223
				2224	# Okay, we have a name match. We now follow the production all the way
				2225	# through the state machine until we get the . on the right hand side
				2226
				2227	lr_index = p.lr_index
				2228	j = state
				2229	while lr_index < p.len - 1:
				2230	lr_index = lr_index + 1
				2231	t = p.prod[lr_index]
				2232
				2233	# Check to see if this symbol and state are a non-terminal transition
				2234	if (j,t) in dtrans:
				2235	# Yes. Okay, there is some chance that this is an includes relation
				2236	# the only way to know for certain is whether the rest of the
				2237	# production derives empty
				2238
				2239	li = lr_index + 1
				2240	while li < p.len:
				2241	if p.prod[li] in self.grammar.Terminals: break # No forget it
				2242	if not p.prod[li] in nullable: break
				2243	li = li + 1
				2244	else:
				2245	# Appears to be a relation between (j,t) and (state,N)
				2246	includes.append((j,t))
				2247
				2248	g = self.lr0_goto(C[j],t) # Go to next set
				2249	j = self.lr0_cidhash.get(id(g),-1) # Go to next state
				2250
				2251	# When we get here, j is the final state, now we have to locate the production
				2252	for r in C[j]:
				2253	if r.name != p.name: continue
				2254	if r.len != p.len: continue
				2255	i = 0
				2256	# This look is comparing a production ". A B C" with "A B C ."
				2257	while i < r.lr_index:
				2258	if r.prod[i] != p.prod[i+1]: break
				2259	i = i + 1
				2260	else:
				2261	lookb.append((j,r))
				2262	for i in includes:
				2263	if not i in includedict: includedict[i] = []
				2264	includedict[i].append((state,N))
				2265	lookdict[(state,N)] = lookb
				2266
				2267	return lookdict,includedict
				2268
				2269	# -----------------------------------------------------------------------------
				2270	# compute_read_sets()
				2271	#
				2272	# Given a set of LR(0) items, this function computes the read sets.
				2273	#
				2274	# Inputs: C = Set of LR(0) items
				2275	# ntrans = Set of nonterminal transitions
				2276	# nullable = Set of empty transitions
				2277	#
				2278	# Returns a set containing the read sets
				2279	# -----------------------------------------------------------------------------
				2280
				2281	def compute_read_sets(self,C, ntrans, nullable):
				2282	FP = lambda x: self.dr_relation(C,x,nullable)
				2283	R = lambda x: self.reads_relation(C,x,nullable)
				2284	F = digraph(ntrans,R,FP)
				2285	return F
				2286
				2287	# -----------------------------------------------------------------------------
				2288	# compute_follow_sets()
				2289	#
				2290	# Given a set of LR(0) items, a set of non-terminal transitions, a readset,
				2291	# and an include set, this function computes the follow sets
				2292	#
				2293	# Follow(p,A) = Read(p,A) U U {Follow(p',B) \| (p,A) INCLUDES (p',B)}
				2294	#
				2295	# Inputs:
				2296	# ntrans = Set of nonterminal transitions
				2297	# readsets = Readset (previously computed)
				2298	# inclsets = Include sets (previously computed)
				2299	#
				2300	# Returns a set containing the follow sets
				2301	# -----------------------------------------------------------------------------
				2302
				2303	def compute_follow_sets(self,ntrans,readsets,inclsets):
				2304	FP = lambda x: readsets[x]
				2305	R = lambda x: inclsets.get(x,[])
				2306	F = digraph(ntrans,R,FP)
				2307	return F
				2308
				2309	# -----------------------------------------------------------------------------
				2310	# add_lookaheads()
				2311	#
				2312	# Attaches the lookahead symbols to grammar rules.
				2313	#
				2314	# Inputs: lookbacks - Set of lookback relations
				2315	# followset - Computed follow set
				2316	#
				2317	# This function directly attaches the lookaheads to productions contained
				2318	# in the lookbacks set
				2319	# -----------------------------------------------------------------------------
				2320
				2321	def add_lookaheads(self,lookbacks,followset):
				2322	for trans,lb in lookbacks.items():
				2323	# Loop over productions in lookback
				2324	for state,p in lb:
				2325	if not state in p.lookaheads:
				2326	p.lookaheads[state] = []
				2327	f = followset.get(trans,[])
				2328	for a in f:
				2329	if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
				2330
				2331	# -----------------------------------------------------------------------------
				2332	# add_lalr_lookaheads()
				2333	#
				2334	# This function does all of the work of adding lookahead information for use
				2335	# with LALR parsing
				2336	# -----------------------------------------------------------------------------
				2337
				2338	def add_lalr_lookaheads(self,C):
				2339	# Determine all of the nullable nonterminals
				2340	nullable = self.compute_nullable_nonterminals()
				2341
				2342	# Find all non-terminal transitions
				2343	trans = self.find_nonterminal_transitions(C)
				2344
				2345	# Compute read sets
				2346	readsets = self.compute_read_sets(C,trans,nullable)
				2347
				2348	# Compute lookback/includes relations
				2349	lookd, included = self.compute_lookback_includes(C,trans,nullable)
				2350
				2351	# Compute LALR FOLLOW sets
				2352	followsets = self.compute_follow_sets(trans,readsets,included)
				2353
				2354	# Add all of the lookaheads
				2355	self.add_lookaheads(lookd,followsets)
				2356
				2357	# -----------------------------------------------------------------------------
				2358	# lr_parse_table()
				2359	#
				2360	# This function constructs the parse tables for SLR or LALR
				2361	# -----------------------------------------------------------------------------
				2362	def lr_parse_table(self):
				2363	Productions = self.grammar.Productions
				2364	Precedence = self.grammar.Precedence
				2365	goto = self.lr_goto # Goto array
				2366	action = self.lr_action # Action array
				2367	log = self.log # Logger for output
				2368
				2369	actionp = { } # Action production array (temporary)
				2370
				2371	log.info("Parsing method: %s", self.lr_method)
				2372
				2373	# Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
				2374	# This determines the number of states
				2375
				2376	C = self.lr0_items()
				2377
				2378	if self.lr_method == 'LALR':
				2379	self.add_lalr_lookaheads(C)
				2380
				2381	# Build the parser table, state by state
				2382	st = 0
				2383	for I in C:
				2384	# Loop over each production in I
				2385	actlist = [ ] # List of actions
				2386	st_action = { }
				2387	st_actionp = { }
				2388	st_goto = { }
				2389	log.info("")
				2390	log.info("state %d", st)
				2391	log.info("")
				2392	for p in I:
				2393	log.info(" (%d) %s", p.number, str(p))
				2394	log.info("")
				2395
				2396	for p in I:
				2397	if p.len == p.lr_index + 1:
				2398	if p.name == "S'":
				2399	# Start symbol. Accept!
				2400	st_action["$end"] = 0
				2401	st_actionp["$end"] = p
				2402	else:
				2403	# We are at the end of a production. Reduce!
				2404	if self.lr_method == 'LALR':
				2405	laheads = p.lookaheads[st]
				2406	else:
				2407	laheads = self.grammar.Follow[p.name]
				2408	for a in laheads:
				2409	actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
				2410	r = st_action.get(a,None)
				2411	if r is not None:
				2412	# Whoa. Have a shift/reduce or reduce/reduce conflict
				2413	if r > 0:
				2414	# Need to decide on shift or reduce here
				2415	# By default we favor shifting. Need to add
				2416	# some precedence rules here.
				2417	sprec,slevel = Productions[st_actionp[a].number].prec
				2418	rprec,rlevel = Precedence.get(a,('right',0))
				2419	if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
				2420	# We really need to reduce here.
				2421	st_action[a] = -p.number
				2422	st_actionp[a] = p
				2423	if not slevel and not rlevel:
				2424	log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
				2425	self.sr_conflicts.append((st,a,'reduce'))
				2426	Productions[p.number].reduced += 1
				2427	elif (slevel == rlevel) and (rprec == 'nonassoc'):
				2428	st_action[a] = None
				2429	else:
				2430	# Hmmm. Guess we'll keep the shift
				2431	if not rlevel:
				2432	log.info(" ! shift/reduce conflict for %s resolved as shift",a)
				2433	self.sr_conflicts.append((st,a,'shift'))
				2434	elif r < 0:
				2435	# Reduce/reduce conflict. In this case, we favor the rule
				2436	# that was defined first in the grammar file
				2437	oldp = Productions[-r]
				2438	pp = Productions[p.number]
				2439	if oldp.line > pp.line:
				2440	st_action[a] = -p.number
				2441	st_actionp[a] = p
				2442	chosenp,rejectp = pp,oldp
				2443	Productions[p.number].reduced += 1
				2444	Productions[oldp.number].reduced -= 1
				2445	else:
				2446	chosenp,rejectp = oldp,pp
				2447	self.rr_conflicts.append((st,chosenp,rejectp))
				2448	log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
				2449	else:
				2450	raise LALRError("Unknown conflict in state %d" % st)
				2451	else:
				2452	st_action[a] = -p.number
				2453	st_actionp[a] = p
				2454	Productions[p.number].reduced += 1
				2455	else:
				2456	i = p.lr_index
				2457	a = p.prod[i+1] # Get symbol right after the "."
				2458	if a in self.grammar.Terminals:
				2459	g = self.lr0_goto(I,a)
				2460	j = self.lr0_cidhash.get(id(g),-1)
				2461	if j >= 0:
				2462	# We are in a shift state
				2463	actlist.append((a,p,"shift and go to state %d" % j))
				2464	r = st_action.get(a,None)
				2465	if r is not None:
				2466	# Whoa have a shift/reduce or shift/shift conflict
				2467	if r > 0:
				2468	if r != j:
				2469	raise LALRError("Shift/shift conflict in state %d" % st)
				2470	elif r < 0:
				2471	# Do a precedence check.
				2472	# - if precedence of reduce rule is higher, we reduce.
				2473	# - if precedence of reduce is same and left assoc, we reduce.
				2474	# - otherwise we shift
				2475	rprec,rlevel = Productions[st_actionp[a].number].prec
				2476	sprec,slevel = Precedence.get(a,('right',0))
				2477	if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
				2478	# We decide to shift here... highest precedence to shift
				2479	Productions[st_actionp[a].number].reduced -= 1
				2480	st_action[a] = j
				2481	st_actionp[a] = p
				2482	if not rlevel:
				2483	log.info(" ! shift/reduce conflict for %s resolved as shift",a)
				2484	self.sr_conflicts.append((st,a,'shift'))
				2485	elif (slevel == rlevel) and (rprec == 'nonassoc'):
				2486	st_action[a] = None
				2487	else:
				2488	# Hmmm. Guess we'll keep the reduce
				2489	if not slevel and not rlevel:
				2490	log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
				2491	self.sr_conflicts.append((st,a,'reduce'))
				2492
				2493	else:
				2494	raise LALRError("Unknown conflict in state %d" % st)
				2495	else:
				2496	st_action[a] = j
				2497	st_actionp[a] = p
				2498
				2499	# Print the actions associated with each terminal
				2500	_actprint = { }
				2501	for a,p,m in actlist:
				2502	if a in st_action:
				2503	if p is st_actionp[a]:
				2504	log.info(" %-15s %s",a,m)
				2505	_actprint[(a,m)] = 1
				2506	log.info("")
				2507	# Print the actions that were not used. (debugging)
				2508	not_used = 0
				2509	for a,p,m in actlist:
				2510	if a in st_action:
				2511	if p is not st_actionp[a]:
				2512	if not (a,m) in _actprint:
				2513	log.debug(" ! %-15s [ %s ]",a,m)
				2514	not_used = 1
				2515	_actprint[(a,m)] = 1
				2516	if not_used:
				2517	log.debug("")
				2518
				2519	# Construct the goto table for this state
				2520
				2521	nkeys = { }
				2522	for ii in I:
				2523	for s in ii.usyms:
				2524	if s in self.grammar.Nonterminals:
				2525	nkeys[s] = None
				2526	for n in nkeys:
				2527	g = self.lr0_goto(I,n)
				2528	j = self.lr0_cidhash.get(id(g),-1)
				2529	if j >= 0:
				2530	st_goto[n] = j
				2531	log.info(" %-30s shift and go to state %d",n,j)
				2532
				2533	action[st] = st_action
				2534	actionp[st] = st_actionp
				2535	goto[st] = st_goto
				2536	st += 1
				2537
				2538
				2539	# -----------------------------------------------------------------------------
				2540	# write()
				2541	#
				2542	# This function writes the LR parsing tables to a file
				2543	# -----------------------------------------------------------------------------
				2544
				2545	def write_table(self,modulename,outputdir='',signature=""):
				2546	basemodulename = modulename.split(".")[-1]
				2547	filename = os.path.join(outputdir,basemodulename) + ".py"
				2548	try:
				2549	f = open(filename,"w")
				2550
				2551	f.write("""
				2552	# %s
				2553	# This file is automatically generated. Do not edit.
				2554	_tabversion = %r
				2555
				2556	_lr_method = %r
				2557
				2558	_lr_signature = %r
				2559	""" % (filename, __tabversion__, self.lr_method, signature))
				2560
				2561	# Change smaller to 0 to go back to original tables
				2562	smaller = 1
				2563
				2564	# Factor out names to try and make smaller
				2565	if smaller:
				2566	items = { }
				2567
				2568	for s,nd in self.lr_action.items():
				2569	for name,v in nd.items():
				2570	i = items.get(name)
				2571	if not i:
				2572	i = ([],[])
				2573	items[name] = i
				2574	i[0].append(s)
				2575	i[1].append(v)
				2576
				2577	f.write("\n_lr_action_items = {")
				2578	for k,v in items.items():
				2579	f.write("%r:([" % k)
				2580	for i in v[0]:
				2581	f.write("%r," % i)
				2582	f.write("],[")
				2583	for i in v[1]:
				2584	f.write("%r," % i)
				2585
				2586	f.write("]),")
				2587	f.write("}\n")
				2588
				2589	f.write("""
				2590	_lr_action = { }
				2591	for _k, _v in _lr_action_items.items():
				2592	for _x,_y in zip(_v[0],_v[1]):
				2593	if not _x in _lr_action: _lr_action[_x] = { }
				2594	_lr_action[_x][_k] = _y
				2595	del _lr_action_items
				2596	""")
				2597
				2598	else:
				2599	f.write("\n_lr_action = { ");
				2600	for k,v in self.lr_action.items():
				2601	f.write("(%r,%r):%r," % (k[0],k[1],v))
				2602	f.write("}\n");
				2603
				2604	if smaller:
				2605	# Factor out names to try and make smaller
				2606	items = { }
				2607
				2608	for s,nd in self.lr_goto.items():
				2609	for name,v in nd.items():
				2610	i = items.get(name)
				2611	if not i:
				2612	i = ([],[])
				2613	items[name] = i
				2614	i[0].append(s)
				2615	i[1].append(v)
				2616
				2617	f.write("\n_lr_goto_items = {")
				2618	for k,v in items.items():
				2619	f.write("%r:([" % k)
				2620	for i in v[0]:
				2621	f.write("%r," % i)
				2622	f.write("],[")
				2623	for i in v[1]:
				2624	f.write("%r," % i)
				2625
				2626	f.write("]),")
				2627	f.write("}\n")
				2628
				2629	f.write("""
				2630	_lr_goto = { }
				2631	for _k, _v in _lr_goto_items.items():
				2632	for _x,_y in zip(_v[0],_v[1]):
				2633	if not _x in _lr_goto: _lr_goto[_x] = { }
				2634	_lr_goto[_x][_k] = _y
				2635	del _lr_goto_items
				2636	""")
				2637	else:
				2638	f.write("\n_lr_goto = { ");
				2639	for k,v in self.lr_goto.items():
				2640	f.write("(%r,%r):%r," % (k[0],k[1],v))
				2641	f.write("}\n");
				2642
				2643	# Write production table
				2644	f.write("_lr_productions = [\n")
				2645	for p in self.lr_productions:
				2646	if p.func:
				2647	f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
				2648	else:
				2649	f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
				2650	f.write("]\n")
				2651	f.close()
				2652
				2653	except IOError:
				2654	e = sys.exc_info()[1]
				2655	sys.stderr.write("Unable to create '%s'\n" % filename)
				2656	sys.stderr.write(str(e)+"\n")
				2657	return
				2658
				2659
				2660	# -----------------------------------------------------------------------------
				2661	# pickle_table()
				2662	#
				2663	# This function pickles the LR parsing tables to a supplied file object
				2664	# -----------------------------------------------------------------------------
				2665
				2666	def pickle_table(self,filename,signature=""):
				2667	try:
				2668	import cPickle as pickle
				2669	except ImportError:
				2670	import pickle
				2671	outf = open(filename,"wb")
				2672	pickle.dump(__tabversion__,outf,pickle_protocol)
				2673	pickle.dump(self.lr_method,outf,pickle_protocol)
				2674	pickle.dump(signature,outf,pickle_protocol)
				2675	pickle.dump(self.lr_action,outf,pickle_protocol)
				2676	pickle.dump(self.lr_goto,outf,pickle_protocol)
				2677
				2678	outp = []
				2679	for p in self.lr_productions:
				2680	if p.func:
				2681	outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
				2682	else:
				2683	outp.append((str(p),p.name,p.len,None,None,None))
				2684	pickle.dump(outp,outf,pickle_protocol)
				2685	outf.close()
				2686
				2687	# -----------------------------------------------------------------------------
				2688	# === INTROSPECTION ===
				2689	#
				2690	# The following functions and classes are used to implement the PLY
				2691	# introspection features followed by the yacc() function itself.
				2692	# -----------------------------------------------------------------------------
				2693
				2694	# -----------------------------------------------------------------------------
				2695	# get_caller_module_dict()
				2696	#
				2697	# This function returns a dictionary containing all of the symbols defined within
				2698	# a caller further down the call stack. This is used to get the environment
				2699	# associated with the yacc() call if none was provided.
				2700	# -----------------------------------------------------------------------------
				2701
				2702	def get_caller_module_dict(levels):
				2703	try:
				2704	raise RuntimeError
				2705	except RuntimeError:
				2706	e,b,t = sys.exc_info()
				2707	f = t.tb_frame
				2708	while levels > 0:
				2709	f = f.f_back
				2710	levels -= 1
				2711	ldict = f.f_globals.copy()
				2712	if f.f_globals != f.f_locals:
				2713	ldict.update(f.f_locals)
				2714
				2715	return ldict
				2716
				2717	# -----------------------------------------------------------------------------
				2718	# parse_grammar()
				2719	#
				2720	# This takes a raw grammar rule string and parses it into production data
				2721	# -----------------------------------------------------------------------------
				2722	def parse_grammar(doc,file,line):
				2723	grammar = []
				2724	# Split the doc string into lines
				2725	pstrings = doc.splitlines()
				2726	lastp = None
				2727	dline = line
				2728	for ps in pstrings:
				2729	dline += 1
				2730	p = ps.split()
				2731	if not p: continue
				2732	try:
				2733	if p[0] == '\|':
				2734	# This is a continuation of a previous rule
				2735	if not lastp:
				2736	raise SyntaxError("%s:%d: Misplaced '\|'" % (file,dline))
				2737	prodname = lastp
				2738	syms = p[1:]
				2739	else:
				2740	prodname = p[0]
				2741	lastp = prodname
				2742	syms = p[2:]
				2743	assign = p[1]
				2744	if assign != ':' and assign != '::=':
				2745	raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
				2746
				2747	grammar.append((file,dline,prodname,syms))
				2748	except SyntaxError:
				2749	raise
				2750	except Exception:
				2751	raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
				2752
				2753	return grammar
				2754
				2755	# -----------------------------------------------------------------------------
				2756	# ParserReflect()
				2757	#
				2758	# This class represents information extracted for building a parser including
				2759	# start symbol, error function, tokens, precedence list, action functions,
				2760	# etc.
				2761	# -----------------------------------------------------------------------------
				2762	class ParserReflect(object):
				2763	def __init__(self,pdict,log=None):
				2764	self.pdict = pdict
				2765	self.start = None
				2766	self.error_func = None
				2767	self.tokens = None
				2768	self.files = {}
				2769	self.grammar = []
				2770	self.error = 0
				2771
				2772	if log is None:
				2773	self.log = PlyLogger(sys.stderr)
				2774	else:
				2775	self.log = log
				2776
				2777	# Get all of the basic information
				2778	def get_all(self):
				2779	self.get_start()
				2780	self.get_error_func()
				2781	self.get_tokens()
				2782	self.get_precedence()
				2783	self.get_pfunctions()
				2784
				2785	# Validate all of the information
				2786	def validate_all(self):
				2787	self.validate_start()
				2788	self.validate_error_func()
				2789	self.validate_tokens()
				2790	self.validate_precedence()
				2791	self.validate_pfunctions()
				2792	self.validate_files()
				2793	return self.error
				2794
				2795	# Compute a signature over the grammar
				2796	def signature(self):
				2797	try:
				2798	from hashlib import md5
				2799	except ImportError:
				2800	from md5 import md5
				2801	try:
				2802	sig = md5()
				2803	if self.start:
				2804	sig.update(self.start.encode('latin-1'))
				2805	if self.prec:
				2806	sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
				2807	if self.tokens:
				2808	sig.update(" ".join(self.tokens).encode('latin-1'))
				2809	for f in self.pfuncs:
				2810	if f[3]:
				2811	sig.update(f[3].encode('latin-1'))
				2812	except (TypeError,ValueError):
				2813	pass
				2814	return sig.digest()
				2815
				2816	# -----------------------------------------------------------------------------
				2817	# validate_file()
				2818	#
				2819	# This method checks to see if there are duplicated p_rulename() functions
				2820	# in the parser module file. Without this function, it is really easy for
				2821	# users to make mistakes by cutting and pasting code fragments (and it's a real
				2822	# bugger to try and figure out why the resulting parser doesn't work). Therefore,
				2823	# we just do a little regular expression pattern matching of def statements
				2824	# to try and detect duplicates.
				2825	# -----------------------------------------------------------------------------
				2826
				2827	def validate_files(self):
				2828	# Match def p_funcname(
				2829	fre = re.compile(r'\sdef\s+(p_[a-zA-Z_0-9])\(')
				2830
				2831	for filename in self.files.keys():
				2832	base,ext = os.path.splitext(filename)
				2833	if ext != '.py': return 1 # No idea. Assume it's okay.
				2834
				2835	try:
				2836	f = open(filename)
				2837	lines = f.readlines()
				2838	f.close()
				2839	except IOError:
				2840	continue
				2841
				2842	counthash = { }
				2843	for linen,l in enumerate(lines):
				2844	linen += 1
				2845	m = fre.match(l)
				2846	if m:
				2847	name = m.group(1)
				2848	prev = counthash.get(name)
				2849	if not prev:
				2850	counthash[name] = linen
				2851	else:
				2852	self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
				2853
				2854	# Get the start symbol
				2855	def get_start(self):
				2856	self.start = self.pdict.get('start')
				2857
				2858	# Validate the start symbol
				2859	def validate_start(self):
				2860	if self.start is not None:
				2861	if not isinstance(self.start,str):
				2862	self.log.error("'start' must be a string")
				2863
				2864	# Look for error handler
				2865	def get_error_func(self):
				2866	self.error_func = self.pdict.get('p_error')
				2867
				2868	# Validate the error function
				2869	def validate_error_func(self):
				2870	if self.error_func:
				2871	if isinstance(self.error_func,types.FunctionType):
				2872	ismethod = 0
				2873	elif isinstance(self.error_func, types.MethodType):
				2874	ismethod = 1
				2875	else:
				2876	self.log.error("'p_error' defined, but is not a function or method")
				2877	self.error = 1
				2878	return
				2879
				2880	eline = func_code(self.error_func).co_firstlineno
				2881	efile = func_code(self.error_func).co_filename
				2882	self.files[efile] = 1
				2883
				2884	if (func_code(self.error_func).co_argcount != 1+ismethod):
				2885	self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
				2886	self.error = 1
				2887
				2888	# Get the tokens map
				2889	def get_tokens(self):
				2890	tokens = self.pdict.get("tokens",None)
				2891	if not tokens:
				2892	self.log.error("No token list is defined")
				2893	self.error = 1
				2894	return
				2895
				2896	if not isinstance(tokens,(list, tuple)):
				2897	self.log.error("tokens must be a list or tuple")
				2898	self.error = 1
				2899	return
				2900
				2901	if not tokens:
				2902	self.log.error("tokens is empty")
				2903	self.error = 1
				2904	return
				2905
				2906	self.tokens = tokens
				2907
				2908	# Validate the tokens
				2909	def validate_tokens(self):
				2910	# Validate the tokens.
				2911	if 'error' in self.tokens:
				2912	self.log.error("Illegal token name 'error'. Is a reserved word")
				2913	self.error = 1
				2914	return
				2915
				2916	terminals = {}
				2917	for n in self.tokens:
				2918	if n in terminals:
				2919	self.log.warning("Token '%s' multiply defined", n)
				2920	terminals[n] = 1
				2921
				2922	# Get the precedence map (if any)
				2923	def get_precedence(self):
				2924	self.prec = self.pdict.get("precedence",None)
				2925
				2926	# Validate and parse the precedence map
				2927	def validate_precedence(self):
				2928	preclist = []
				2929	if self.prec:
				2930	if not isinstance(self.prec,(list,tuple)):
				2931	self.log.error("precedence must be a list or tuple")
				2932	self.error = 1
				2933	return
				2934	for level,p in enumerate(self.prec):
				2935	if not isinstance(p,(list,tuple)):
				2936	self.log.error("Bad precedence table")
				2937	self.error = 1
				2938	return
				2939
				2940	if len(p) < 2:
				2941	self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
				2942	self.error = 1
				2943	return
				2944	assoc = p[0]
				2945	if not isinstance(assoc,str):
				2946	self.log.error("precedence associativity must be a string")
				2947	self.error = 1
				2948	return
				2949	for term in p[1:]:
				2950	if not isinstance(term,str):
				2951	self.log.error("precedence items must be strings")
				2952	self.error = 1
				2953	return
				2954	preclist.append((term,assoc,level+1))
				2955	self.preclist = preclist
				2956
				2957	# Get all p_functions from the grammar
				2958	def get_pfunctions(self):
				2959	p_functions = []
				2960	for name, item in self.pdict.items():
				2961	if name[:2] != 'p_': continue
				2962	if name == 'p_error': continue
				2963	if isinstance(item,(types.FunctionType,types.MethodType)):
				2964	line = func_code(item).co_firstlineno
				2965	file = func_code(item).co_filename
				2966	p_functions.append((line,file,name,item.__doc__))
				2967
				2968	# Sort all of the actions by line number
				2969	p_functions.sort()
				2970	self.pfuncs = p_functions
				2971
				2972
				2973	# Validate all of the p_functions
				2974	def validate_pfunctions(self):
				2975	grammar = []
				2976	# Check for non-empty symbols
				2977	if len(self.pfuncs) == 0:
				2978	self.log.error("no rules of the form p_rulename are defined")
				2979	self.error = 1
				2980	return
				2981
				2982	for line, file, name, doc in self.pfuncs:
				2983	func = self.pdict[name]
				2984	if isinstance(func, types.MethodType):
				2985	reqargs = 2
				2986	else:
				2987	reqargs = 1
				2988	if func_code(func).co_argcount > reqargs:
				2989	self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
				2990	self.error = 1
				2991	elif func_code(func).co_argcount < reqargs:
				2992	self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
				2993	self.error = 1
				2994	elif not func.__doc__:
				2995	self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
				2996	else:
				2997	try:
				2998	parsed_g = parse_grammar(doc,file,line)
				2999	for g in parsed_g:
				3000	grammar.append((name, g))
				3001	except SyntaxError:
				3002	e = sys.exc_info()[1]
				3003	self.log.error(str(e))
				3004	self.error = 1
				3005
				3006	# Looks like a valid grammar rule
				3007	# Mark the file in which defined.
				3008	self.files[file] = 1
				3009
				3010	# Secondary validation step that looks for p_ definitions that are not functions
				3011	# or functions that look like they might be grammar rules.
				3012
				3013	for n,v in self.pdict.items():
				3014	if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
				3015	if n[0:2] == 't_': continue
				3016	if n[0:2] == 'p_' and n != 'p_error':
				3017	self.log.warning("'%s' not defined as a function", n)
				3018	if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
				3019	(isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
				3020	try:
				3021	doc = v.__doc__.split(" ")
				3022	if doc[1] == ':':
				3023	self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
				3024	func_code(v).co_filename, func_code(v).co_firstlineno,n)
				3025	except Exception:
				3026	pass
				3027
				3028	self.grammar = grammar
				3029
				3030	# -----------------------------------------------------------------------------
				3031	# yacc(module)
				3032	#
				3033	# Build a parser
				3034	# -----------------------------------------------------------------------------
				3035
				3036	def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
				3037	check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
				3038	debuglog=None, errorlog = None, picklefile=None):
				3039
				3040	global parse # Reference to the parsing method of the last built parser
				3041
				3042	# If pickling is enabled, table files are not created
				3043
				3044	if picklefile:
				3045	write_tables = 0
				3046
				3047	if errorlog is None:
				3048	errorlog = PlyLogger(sys.stderr)
				3049
				3050	# Get the module dictionary used for the parser
				3051	if module:
				3052	_items = [(k,getattr(module,k)) for k in dir(module)]
				3053	pdict = dict(_items)
				3054	else:
				3055	pdict = get_caller_module_dict(2)
				3056
				3057	# Collect parser information from the dictionary
				3058	pinfo = ParserReflect(pdict,log=errorlog)
				3059	pinfo.get_all()
				3060
				3061	if pinfo.error:
				3062	raise YaccError("Unable to build parser")
				3063
				3064	# Check signature against table files (if any)
				3065	signature = pinfo.signature()
				3066
				3067	# Read the tables
				3068	try:
				3069	lr = LRTable()
				3070	if picklefile:
				3071	read_signature = lr.read_pickle(picklefile)
				3072	else:
				3073	read_signature = lr.read_table(tabmodule)
				3074	if optimize or (read_signature == signature):
				3075	try:
				3076	lr.bind_callables(pinfo.pdict)
				3077	parser = LRParser(lr,pinfo.error_func)
				3078	parse = parser.parse
				3079	return parser
				3080	except Exception:
				3081	e = sys.exc_info()[1]
				3082	errorlog.warning("There was a problem loading the table file: %s", repr(e))
				3083	except VersionError:
				3084	e = sys.exc_info()
				3085	errorlog.warning(str(e))
				3086	except Exception:
				3087	pass
				3088
				3089	if debuglog is None:
				3090	if debug:
				3091	debuglog = PlyLogger(open(debugfile,"w"))
				3092	else:
				3093	debuglog = NullLogger()
				3094
				3095	debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
				3096
				3097
				3098	errors = 0
				3099
				3100	# Validate the parser information
				3101	if pinfo.validate_all():
				3102	raise YaccError("Unable to build parser")
				3103
				3104	if not pinfo.error_func:
				3105	errorlog.warning("no p_error() function is defined")
				3106
				3107	# Create a grammar object
				3108	grammar = Grammar(pinfo.tokens)
				3109
				3110	# Set precedence level for terminals
				3111	for term, assoc, level in pinfo.preclist:
				3112	try:
				3113	grammar.set_precedence(term,assoc,level)
				3114	except GrammarError:
				3115	e = sys.exc_info()[1]
				3116	errorlog.warning("%s",str(e))
				3117
				3118	# Add productions to the grammar
				3119	for funcname, gram in pinfo.grammar:
				3120	file, line, prodname, syms = gram
				3121	try:
				3122	grammar.add_production(prodname,syms,funcname,file,line)
				3123	except GrammarError:
				3124	e = sys.exc_info()[1]
				3125	errorlog.error("%s",str(e))
				3126	errors = 1
				3127
				3128	# Set the grammar start symbols
				3129	try:
				3130	if start is None:
				3131	grammar.set_start(pinfo.start)
				3132	else:
				3133	grammar.set_start(start)
				3134	except GrammarError:
				3135	e = sys.exc_info()[1]
				3136	errorlog.error(str(e))
				3137	errors = 1
				3138
				3139	if errors:
				3140	raise YaccError("Unable to build parser")
				3141
				3142	# Verify the grammar structure
				3143	undefined_symbols = grammar.undefined_symbols()
				3144	for sym, prod in undefined_symbols:
				3145	errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
				3146	errors = 1
				3147
				3148	unused_terminals = grammar.unused_terminals()
				3149	if unused_terminals:
				3150	debuglog.info("")
				3151	debuglog.info("Unused terminals:")
				3152	debuglog.info("")
				3153	for term in unused_terminals:
				3154	errorlog.warning("Token '%s' defined, but not used", term)
				3155	debuglog.info(" %s", term)
				3156
				3157	# Print out all productions to the debug log
				3158	if debug:
				3159	debuglog.info("")
				3160	debuglog.info("Grammar")
				3161	debuglog.info("")
				3162	for n,p in enumerate(grammar.Productions):
				3163	debuglog.info("Rule %-5d %s", n, p)
				3164
				3165	# Find unused non-terminals
				3166	unused_rules = grammar.unused_rules()
				3167	for prod in unused_rules:
				3168	errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
				3169
				3170	if len(unused_terminals) == 1:
				3171	errorlog.warning("There is 1 unused token")
				3172	if len(unused_terminals) > 1:
				3173	errorlog.warning("There are %d unused tokens", len(unused_terminals))
				3174
				3175	if len(unused_rules) == 1:
				3176	errorlog.warning("There is 1 unused rule")
				3177	if len(unused_rules) > 1:
				3178	errorlog.warning("There are %d unused rules", len(unused_rules))
				3179
				3180	if debug:
				3181	debuglog.info("")
				3182	debuglog.info("Terminals, with rules where they appear")
				3183	debuglog.info("")
				3184	terms = list(grammar.Terminals)
				3185	terms.sort()
				3186	for term in terms:
				3187	debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
				3188
				3189	debuglog.info("")
				3190	debuglog.info("Nonterminals, with rules where they appear")
				3191	debuglog.info("")
				3192	nonterms = list(grammar.Nonterminals)
				3193	nonterms.sort()
				3194	for nonterm in nonterms:
				3195	debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
				3196	debuglog.info("")
				3197
				3198	if check_recursion:
				3199	unreachable = grammar.find_unreachable()
				3200	for u in unreachable:
				3201	errorlog.warning("Symbol '%s' is unreachable",u)
				3202
				3203	infinite = grammar.infinite_cycles()
				3204	for inf in infinite:
				3205	errorlog.error("Infinite recursion detected for symbol '%s'", inf)
				3206	errors = 1
				3207
				3208	unused_prec = grammar.unused_precedence()
				3209	for term, assoc in unused_prec:
				3210	errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
				3211	errors = 1
				3212
				3213	if errors:
				3214	raise YaccError("Unable to build parser")
				3215
				3216	# Run the LRGeneratedTable on the grammar
				3217	if debug:
				3218	errorlog.debug("Generating %s tables", method)
				3219
				3220	lr = LRGeneratedTable(grammar,method,debuglog)
				3221
				3222	if debug:
				3223	num_sr = len(lr.sr_conflicts)
				3224
				3225	# Report shift/reduce and reduce/reduce conflicts
				3226	if num_sr == 1:
				3227	errorlog.warning("1 shift/reduce conflict")
				3228	elif num_sr > 1:
				3229	errorlog.warning("%d shift/reduce conflicts", num_sr)
				3230
				3231	num_rr = len(lr.rr_conflicts)
				3232	if num_rr == 1:
				3233	errorlog.warning("1 reduce/reduce conflict")
				3234	elif num_rr > 1:
				3235	errorlog.warning("%d reduce/reduce conflicts", num_rr)
				3236
				3237	# Write out conflicts to the output file
				3238	if debug and (lr.sr_conflicts or lr.rr_conflicts):
				3239	debuglog.warning("")
				3240	debuglog.warning("Conflicts:")
				3241	debuglog.warning("")
				3242
				3243	for state, tok, resolution in lr.sr_conflicts:
				3244	debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
				3245
				3246	already_reported = {}
				3247	for state, rule, rejected in lr.rr_conflicts:
				3248	if (state,id(rule),id(rejected)) in already_reported:
				3249	continue
				3250	debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
				3251	debuglog.warning("rejected rule (%s) in state %d", rejected,state)
				3252	errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
				3253	errorlog.warning("rejected rule (%s) in state %d", rejected, state)
				3254	already_reported[state,id(rule),id(rejected)] = 1
				3255
				3256	warned_never = []
				3257	for state, rule, rejected in lr.rr_conflicts:
				3258	if not rejected.reduced and (rejected not in warned_never):
				3259	debuglog.warning("Rule (%s) is never reduced", rejected)
				3260	errorlog.warning("Rule (%s) is never reduced", rejected)
				3261	warned_never.append(rejected)
				3262
				3263	# Write the table file if requested
				3264	if write_tables:
				3265	lr.write_table(tabmodule,outputdir,signature)
				3266
				3267	# Write a pickled version of the tables
				3268	if picklefile:
				3269	lr.pickle_table(picklefile,signature)
				3270
				3271	# Build the parser
				3272	lr.bind_callables(pinfo.pdict)
				3273	parser = LRParser(lr,pinfo.error_func)
				3274
				3275	parse = parser.parse
				3276	return parser