Blame - poky/bitbake/lib/ply/yacc.py - openbmc/openbmc

blob: 381b50cf0ba73066539cdf8e3543f62fcdf0c79b [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	# -----------------------------------------------------------------------------
				2	# ply: yacc.py
				3	#
				4	# Copyright (C) 2001-2009,
				5	# David M. Beazley (Dabeaz LLC)
				6	# All rights reserved.
				7	#
				8	# Redistribution and use in source and binary forms, with or without
				9	# modification, are permitted provided that the following conditions are
				10	# met:
				11	#
				12	# * Redistributions of source code must retain the above copyright notice,
				13	# this list of conditions and the following disclaimer.
				14	# * Redistributions in binary form must reproduce the above copyright notice,
				15	# this list of conditions and the following disclaimer in the documentation
				16	# and/or other materials provided with the distribution.
				17	# * Neither the name of the David Beazley or Dabeaz LLC may be used to
				18	# endorse or promote products derived from this software without
				19	# specific prior written permission.
				20	#
				21	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				22	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				23	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				24	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				25	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				26	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				27	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				28	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				29	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				30	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				31	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				32	# -----------------------------------------------------------------------------
				33	#
				34	# This implements an LR parser that is constructed from grammar rules defined
				35	# as Python functions. The grammer is specified by supplying the BNF inside
				36	# Python documentation strings. The inspiration for this technique was borrowed
				37	# from John Aycock's Spark parsing system. PLY might be viewed as cross between
				38	# Spark and the GNU bison utility.
				39	#
				40	# The current implementation is only somewhat object-oriented. The
				41	# LR parser itself is defined in terms of an object (which allows multiple
				42	# parsers to co-exist). However, most of the variables used during table
				43	# construction are defined in terms of global variables. Users shouldn't
				44	# notice unless they are trying to define multiple parsers at the same
				45	# time using threads (in which case they should have their head examined).
				46	#
				47	# This implementation supports both SLR and LALR(1) parsing. LALR(1)
				48	# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
				49	# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
				50	# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
				51	# by the more efficient DeRemer and Pennello algorithm.
				52	#
				53	# :::::::: WARNING :::::::
				54	#
				55	# Construction of LR parsing tables is fairly complicated and expensive.
				56	# To make this module run fast, a LOT of work has been put into
				57	# optimization---often at the expensive of readability and what might
				58	# consider to be good Python "coding style." Modify the code at your
				59	# own risk!
				60	# ----------------------------------------------------------------------------
				61
				62	__version__ = "3.3"
				63	__tabversion__ = "3.2" # Table version
				64
				65	#-----------------------------------------------------------------------------
				66	# === User configurable parameters ===
				67	#
				68	# Change these to modify the default behavior of yacc (if you wish)
				69	#-----------------------------------------------------------------------------
				70
				71	yaccdebug = 0 # Debugging mode. If set, yacc generates a
				72	# a 'parser.out' file in the current directory
				73
				74	debug_file = 'parser.out' # Default name of the debugging file
				75	tab_module = 'parsetab' # Default name of the table module
				76	default_lr = 'LALR' # Default LR table generation method
				77
				78	error_count = 3 # Number of symbols that must be shifted to leave recovery mode
				79
				80	yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
				81	# implementations of certain functions.
				82
				83	resultlimit = 40 # Size limit of results when running in debug mode.
				84
				85	pickle_protocol = 0 # Protocol to use when writing pickle files
				86
				87	import re, types, sys, os.path
				88
				89	# Compatibility function for python 2.6/3.0
				90	if sys.version_info[0] < 3:
				91	def func_code(f):
				92	return f.func_code
				93	else:
				94	def func_code(f):
				95	return f.__code__
				96
				97	# Compatibility
				98	try:
				99	MAXINT = sys.maxint
				100	except AttributeError:
				101	MAXINT = sys.maxsize
				102
				103	# Python 2.x/3.0 compatibility.
				104	def load_ply_lex():
				105	if sys.version_info[0] < 3:
				106	import lex
				107	else:
				108	import ply.lex as lex
				109	return lex
				110
				111	# This object is a stand-in for a logging object created by the
				112	# logging module. PLY will use this by default to create things
				113	# such as the parser.out file. If a user wants more detailed
				114	# information, they can create their own logging object and pass
				115	# it into PLY.
				116
				117	class PlyLogger(object):
				118	def __init__(self,f):
				119	self.f = f
				120	def debug(self,msg,args,*kwargs):
				121	self.f.write((msg % args) + "\n")
				122	info = debug
				123
				124	def warning(self,msg,args,*kwargs):
				125	self.f.write("WARNING: "+ (msg % args) + "\n")
				126
				127	def error(self,msg,args,*kwargs):
				128	self.f.write("ERROR: " + (msg % args) + "\n")
				129
				130	critical = debug
				131
				132	# Null logger is used when no output is generated. Does nothing.
				133	class NullLogger(object):
				134	def __getattribute__(self,name):
				135	return self
				136	def __call__(self,args,*kwargs):
				137	return self
				138
				139	# Exception raised for yacc-related errors
				140	class YaccError(Exception): pass
				141
				142	# Format the result message that the parser produces when running in debug mode.
				143	def format_result(r):
				144	repr_str = repr(r)
				145	if '\n' in repr_str: repr_str = repr(repr_str)
				146	if len(repr_str) > resultlimit:
				147	repr_str = repr_str[:resultlimit]+" ..."
				148	result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
				149	return result
				150
				151
				152	# Format stack entries when the parser is running in debug mode
				153	def format_stack_entry(r):
				154	repr_str = repr(r)
				155	if '\n' in repr_str: repr_str = repr(repr_str)
				156	if len(repr_str) < 16:
				157	return repr_str
				158	else:
				159	return "<%s @ 0x%x>" % (type(r).__name__,id(r))
				160
				161	#-----------------------------------------------------------------------------
				162	# === LR Parsing Engine ===
				163	#
				164	# The following classes are used for the LR parser itself. These are not
				165	# used during table construction and are independent of the actual LR
				166	# table generation algorithm
				167	#-----------------------------------------------------------------------------
				168
				169	# This class is used to hold non-terminal grammar symbols during parsing.
				170	# It normally has the following attributes set:
				171	# .type = Grammar symbol type
				172	# .value = Symbol value
				173	# .lineno = Starting line number
				174	# .endlineno = Ending line number (optional, set automatically)
				175	# .lexpos = Starting lex position
				176	# .endlexpos = Ending lex position (optional, set automatically)
				177
				178	class YaccSymbol:
				179	def __str__(self): return self.type
				180	def __repr__(self): return str(self)
				181
				182	# This class is a wrapper around the objects actually passed to each
				183	# grammar rule. Index lookup and assignment actually assign the
				184	# .value attribute of the underlying YaccSymbol object.
				185	# The lineno() method returns the line number of a given
				186	# item (or 0 if not defined). The linespan() method returns
				187	# a tuple of (startline,endline) representing the range of lines
				188	# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
				189	# representing the range of positional information for a symbol.
				190
				191	class YaccProduction:
				192	def __init__(self,s,stack=None):
				193	self.slice = s
				194	self.stack = stack
				195	self.lexer = None
				196	self.parser= None
				197	def __getitem__(self,n):
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	198	if isinstance(n,slice):
				199	return [self[i] for i in range(*(n.indices(len(self.slice))))]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	200	if n >= 0: return self.slice[n].value
				201	else: return self.stack[n].value
				202
				203	def __setitem__(self,n,v):
				204	self.slice[n].value = v
				205
				206	def __getslice__(self,i,j):
				207	return [s.value for s in self.slice[i:j]]
				208
				209	def __len__(self):
				210	return len(self.slice)
				211
				212	def lineno(self,n):
				213	return getattr(self.slice[n],"lineno",0)
				214
				215	def set_lineno(self,n,lineno):
				216	self.slice[n].lineno = lineno
				217
				218	def linespan(self,n):
				219	startline = getattr(self.slice[n],"lineno",0)
				220	endline = getattr(self.slice[n],"endlineno",startline)
				221	return startline,endline
				222
				223	def lexpos(self,n):
				224	return getattr(self.slice[n],"lexpos",0)
				225
				226	def lexspan(self,n):
				227	startpos = getattr(self.slice[n],"lexpos",0)
				228	endpos = getattr(self.slice[n],"endlexpos",startpos)
				229	return startpos,endpos
				230
				231	def error(self):
				232	raise SyntaxError
				233
				234
				235	# -----------------------------------------------------------------------------
				236	# == LRParser ==
				237	#
				238	# The LR Parsing engine.
				239	# -----------------------------------------------------------------------------
				240
				241	class LRParser:
				242	def __init__(self,lrtab,errorf):
				243	self.productions = lrtab.lr_productions
				244	self.action = lrtab.lr_action
				245	self.goto = lrtab.lr_goto
				246	self.errorfunc = errorf
				247
				248	def errok(self):
				249	self.errorok = 1
				250
				251	def restart(self):
				252	del self.statestack[:]
				253	del self.symstack[:]
				254	sym = YaccSymbol()
				255	sym.type = '$end'
				256	self.symstack.append(sym)
				257	self.statestack.append(0)
				258
				259	def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				260	if debug or yaccdevel:
				261	if isinstance(debug,int):
				262	debug = PlyLogger(sys.stderr)
				263	return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
				264	elif tracking:
				265	return self.parseopt(input,lexer,debug,tracking,tokenfunc)
				266	else:
				267	return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
				268
				269
				270	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				271	# parsedebug().
				272	#
				273	# This is the debugging enabled version of parse(). All changes made to the
				274	# parsing engine should be made here. For the non-debugging version,
				275	# copy this code to a method parseopt() and delete all of the sections
				276	# enclosed in:
				277	#
				278	# #--! DEBUG
				279	# statements
				280	# #--! DEBUG
				281	#
				282	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				283
				284	def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
				285	lookahead = None # Current lookahead symbol
				286	lookaheadstack = [ ] # Stack of lookahead symbols
				287	actions = self.action # Local reference to action table (to avoid lookup on self.)
				288	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				289	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				290	pslice = YaccProduction(None) # Production object passed to grammar rules
				291	errorcount = 0 # Used during error recovery
				292
				293	# --! DEBUG
				294	debug.info("PLY: PARSE DEBUG START")
				295	# --! DEBUG
				296
				297	# If no lexer was given, we will try to use the lex module
				298	if not lexer:
				299	lex = load_ply_lex()
				300	lexer = lex.lexer
				301
				302	# Set up the lexer and parser objects on pslice
				303	pslice.lexer = lexer
				304	pslice.parser = self
				305
				306	# If input was supplied, pass to lexer
				307	if input is not None:
				308	lexer.input(input)
				309
				310	if tokenfunc is None:
				311	# Tokenize function
				312	get_token = lexer.token
				313	else:
				314	get_token = tokenfunc
				315
				316	# Set up the state and symbol stacks
				317
				318	statestack = [ ] # Stack of parsing states
				319	self.statestack = statestack
				320	symstack = [ ] # Stack of grammar symbols
				321	self.symstack = symstack
				322
				323	pslice.stack = symstack # Put in the production
				324	errtoken = None # Err token
				325
				326	# The start state is assumed to be (0,$end)
				327
				328	statestack.append(0)
				329	sym = YaccSymbol()
				330	sym.type = "$end"
				331	symstack.append(sym)
				332	state = 0
				333	while 1:
				334	# Get the next symbol on the input. If a lookahead symbol
				335	# is already set, we just use that. Otherwise, we'll pull
				336	# the next token off of the lookaheadstack or from the lexer
				337
				338	# --! DEBUG
				339	debug.debug('')
				340	debug.debug('State : %s', state)
				341	# --! DEBUG
				342
				343	if not lookahead:
				344	if not lookaheadstack:
				345	lookahead = get_token() # Get the next token
				346	else:
				347	lookahead = lookaheadstack.pop()
				348	if not lookahead:
				349	lookahead = YaccSymbol()
				350	lookahead.type = "$end"
				351
				352	# --! DEBUG
				353	debug.debug('Stack : %s',
				354	("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
				355	# --! DEBUG
				356
				357	# Check the action table
				358	ltype = lookahead.type
				359	t = actions[state].get(ltype)
				360
				361	if t is not None:
				362	if t > 0:
				363	# shift a symbol on the stack
				364	statestack.append(t)
				365	state = t
				366
				367	# --! DEBUG
				368	debug.debug("Action : Shift and goto state %s", t)
				369	# --! DEBUG
				370
				371	symstack.append(lookahead)
				372	lookahead = None
				373
				374	# Decrease error count on successful shift
				375	if errorcount: errorcount -=1
				376	continue
				377
				378	if t < 0:
				379	# reduce a symbol on the stack, emit a production
				380	p = prod[-t]
				381	pname = p.name
				382	plen = p.len
				383
				384	# Get production function
				385	sym = YaccSymbol()
				386	sym.type = pname # Production name
				387	sym.value = None
				388
				389	# --! DEBUG
				390	if plen:
				391	debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
				392	else:
				393	debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
				394
				395	# --! DEBUG
				396
				397	if plen:
				398	targ = symstack[-plen-1:]
				399	targ[0] = sym
				400
				401	# --! TRACKING
				402	if tracking:
				403	t1 = targ[1]
				404	sym.lineno = t1.lineno
				405	sym.lexpos = t1.lexpos
				406	t1 = targ[-1]
				407	sym.endlineno = getattr(t1,"endlineno",t1.lineno)
				408	sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
				409
				410	# --! TRACKING
				411
				412	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				413	# The code enclosed in this section is duplicated
				414	# below as a performance optimization. Make sure
				415	# changes get made in both locations.
				416
				417	pslice.slice = targ
				418
				419	try:
				420	# Call the grammar rule with our special slice object
				421	del symstack[-plen:]
				422	del statestack[-plen:]
				423	p.callable(pslice)
				424	# --! DEBUG
				425	debug.info("Result : %s", format_result(pslice[0]))
				426	# --! DEBUG
				427	symstack.append(sym)
				428	state = goto[statestack[-1]][pname]
				429	statestack.append(state)
				430	except SyntaxError:
				431	# If an error was set. Enter error recovery state
				432	lookaheadstack.append(lookahead)
				433	symstack.pop()
				434	statestack.pop()
				435	state = statestack[-1]
				436	sym.type = 'error'
				437	lookahead = sym
				438	errorcount = error_count
				439	self.errorok = 0
				440	continue
				441	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				442
				443	else:
				444
				445	# --! TRACKING
				446	if tracking:
				447	sym.lineno = lexer.lineno
				448	sym.lexpos = lexer.lexpos
				449	# --! TRACKING
				450
				451	targ = [ sym ]
				452
				453	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				454	# The code enclosed in this section is duplicated
				455	# above as a performance optimization. Make sure
				456	# changes get made in both locations.
				457
				458	pslice.slice = targ
				459
				460	try:
				461	# Call the grammar rule with our special slice object
				462	p.callable(pslice)
				463	# --! DEBUG
				464	debug.info("Result : %s", format_result(pslice[0]))
				465	# --! DEBUG
				466	symstack.append(sym)
				467	state = goto[statestack[-1]][pname]
				468	statestack.append(state)
				469	except SyntaxError:
				470	# If an error was set. Enter error recovery state
				471	lookaheadstack.append(lookahead)
				472	symstack.pop()
				473	statestack.pop()
				474	state = statestack[-1]
				475	sym.type = 'error'
				476	lookahead = sym
				477	errorcount = error_count
				478	self.errorok = 0
				479	continue
				480	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				481
				482	if t == 0:
				483	n = symstack[-1]
				484	result = getattr(n,"value",None)
				485	# --! DEBUG
				486	debug.info("Done : Returning %s", format_result(result))
				487	debug.info("PLY: PARSE DEBUG END")
				488	# --! DEBUG
				489	return result
				490
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	491	if t is None:
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	492
				493	# --! DEBUG
				494	debug.error('Error : %s',
				495	("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
				496	# --! DEBUG
				497
				498	# We have some kind of parsing error here. To handle
				499	# this, we are going to push the current token onto
				500	# the tokenstack and replace it with an 'error' token.
				501	# If there are any synchronization rules, they may
				502	# catch it.
				503	#
				504	# In addition to pushing the error token, we call call
				505	# the user defined p_error() function if this is the
				506	# first syntax error. This function is only called if
				507	# errorcount == 0.
				508	if errorcount == 0 or self.errorok:
				509	errorcount = error_count
				510	self.errorok = 0
				511	errtoken = lookahead
				512	if errtoken.type == "$end":
				513	errtoken = None # End of file!
				514	if self.errorfunc:
				515	global errok,token,restart
				516	errok = self.errok # Set some special functions available in error recovery
				517	token = get_token
				518	restart = self.restart
				519	if errtoken and not hasattr(errtoken,'lexer'):
				520	errtoken.lexer = lexer
				521	tok = self.errorfunc(errtoken)
				522	del errok, token, restart # Delete special functions
				523
				524	if self.errorok:
				525	# User must have done some kind of panic
				526	# mode recovery on their own. The
				527	# returned token is the next lookahead
				528	lookahead = tok
				529	errtoken = None
				530	continue
				531	else:
				532	if errtoken:
				533	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				534	else: lineno = 0
				535	if lineno:
				536	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				537	else:
				538	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				539	else:
				540	sys.stderr.write("yacc: Parse error in input. EOF\n")
				541	return
				542
				543	else:
				544	errorcount = error_count
				545
				546	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				547	# entire parse has been rolled back and we're completely hosed. The token is
				548	# discarded and we just keep going.
				549
				550	if len(statestack) <= 1 and lookahead.type != "$end":
				551	lookahead = None
				552	errtoken = None
				553	state = 0
				554	# Nuke the pushback stack
				555	del lookaheadstack[:]
				556	continue
				557
				558	# case 2: the statestack has a couple of entries on it, but we're
				559	# at the end of the file. nuke the top entry and generate an error token
				560
				561	# Start nuking entries on the stack
				562	if lookahead.type == "$end":
				563	# Whoa. We're really hosed here. Bail out
				564	return
				565
				566	if lookahead.type != 'error':
				567	sym = symstack[-1]
				568	if sym.type == 'error':
				569	# Hmmm. Error is on top of stack, we'll just nuke input
				570	# symbol and continue
				571	lookahead = None
				572	continue
				573	t = YaccSymbol()
				574	t.type = 'error'
				575	if hasattr(lookahead,"lineno"):
				576	t.lineno = lookahead.lineno
				577	t.value = lookahead
				578	lookaheadstack.append(lookahead)
				579	lookahead = t
				580	else:
				581	symstack.pop()
				582	statestack.pop()
				583	state = statestack[-1] # Potential bug fix
				584
				585	continue
				586
				587	# Call an error function here
				588	raise RuntimeError("yacc: internal parser error!!!\n")
				589
				590	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				591	# parseopt().
				592	#
				593	# Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
				594	# Edit the debug version above, then copy any modifications to the method
				595	# below while removing #--! DEBUG sections.
				596	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				597
				598
				599	def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				600	lookahead = None # Current lookahead symbol
				601	lookaheadstack = [ ] # Stack of lookahead symbols
				602	actions = self.action # Local reference to action table (to avoid lookup on self.)
				603	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				604	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				605	pslice = YaccProduction(None) # Production object passed to grammar rules
				606	errorcount = 0 # Used during error recovery
				607
				608	# If no lexer was given, we will try to use the lex module
				609	if not lexer:
				610	lex = load_ply_lex()
				611	lexer = lex.lexer
				612
				613	# Set up the lexer and parser objects on pslice
				614	pslice.lexer = lexer
				615	pslice.parser = self
				616
				617	# If input was supplied, pass to lexer
				618	if input is not None:
				619	lexer.input(input)
				620
				621	if tokenfunc is None:
				622	# Tokenize function
				623	get_token = lexer.token
				624	else:
				625	get_token = tokenfunc
				626
				627	# Set up the state and symbol stacks
				628
				629	statestack = [ ] # Stack of parsing states
				630	self.statestack = statestack
				631	symstack = [ ] # Stack of grammar symbols
				632	self.symstack = symstack
				633
				634	pslice.stack = symstack # Put in the production
				635	errtoken = None # Err token
				636
				637	# The start state is assumed to be (0,$end)
				638
				639	statestack.append(0)
				640	sym = YaccSymbol()
				641	sym.type = '$end'
				642	symstack.append(sym)
				643	state = 0
				644	while 1:
				645	# Get the next symbol on the input. If a lookahead symbol
				646	# is already set, we just use that. Otherwise, we'll pull
				647	# the next token off of the lookaheadstack or from the lexer
				648
				649	if not lookahead:
				650	if not lookaheadstack:
				651	lookahead = get_token() # Get the next token
				652	else:
				653	lookahead = lookaheadstack.pop()
				654	if not lookahead:
				655	lookahead = YaccSymbol()
				656	lookahead.type = '$end'
				657
				658	# Check the action table
				659	ltype = lookahead.type
				660	t = actions[state].get(ltype)
				661
				662	if t is not None:
				663	if t > 0:
				664	# shift a symbol on the stack
				665	statestack.append(t)
				666	state = t
				667
				668	symstack.append(lookahead)
				669	lookahead = None
				670
				671	# Decrease error count on successful shift
				672	if errorcount: errorcount -=1
				673	continue
				674
				675	if t < 0:
				676	# reduce a symbol on the stack, emit a production
				677	p = prod[-t]
				678	pname = p.name
				679	plen = p.len
				680
				681	# Get production function
				682	sym = YaccSymbol()
				683	sym.type = pname # Production name
				684	sym.value = None
				685
				686	if plen:
				687	targ = symstack[-plen-1:]
				688	targ[0] = sym
				689
				690	# --! TRACKING
				691	if tracking:
				692	t1 = targ[1]
				693	sym.lineno = t1.lineno
				694	sym.lexpos = t1.lexpos
				695	t1 = targ[-1]
				696	sym.endlineno = getattr(t1,"endlineno",t1.lineno)
				697	sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
				698
				699	# --! TRACKING
				700
				701	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				702	# The code enclosed in this section is duplicated
				703	# below as a performance optimization. Make sure
				704	# changes get made in both locations.
				705
				706	pslice.slice = targ
				707
				708	try:
				709	# Call the grammar rule with our special slice object
				710	del symstack[-plen:]
				711	del statestack[-plen:]
				712	p.callable(pslice)
				713	symstack.append(sym)
				714	state = goto[statestack[-1]][pname]
				715	statestack.append(state)
				716	except SyntaxError:
				717	# If an error was set. Enter error recovery state
				718	lookaheadstack.append(lookahead)
				719	symstack.pop()
				720	statestack.pop()
				721	state = statestack[-1]
				722	sym.type = 'error'
				723	lookahead = sym
				724	errorcount = error_count
				725	self.errorok = 0
				726	continue
				727	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				728
				729	else:
				730
				731	# --! TRACKING
				732	if tracking:
				733	sym.lineno = lexer.lineno
				734	sym.lexpos = lexer.lexpos
				735	# --! TRACKING
				736
				737	targ = [ sym ]
				738
				739	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				740	# The code enclosed in this section is duplicated
				741	# above as a performance optimization. Make sure
				742	# changes get made in both locations.
				743
				744	pslice.slice = targ
				745
				746	try:
				747	# Call the grammar rule with our special slice object
				748	p.callable(pslice)
				749	symstack.append(sym)
				750	state = goto[statestack[-1]][pname]
				751	statestack.append(state)
				752	except SyntaxError:
				753	# If an error was set. Enter error recovery state
				754	lookaheadstack.append(lookahead)
				755	symstack.pop()
				756	statestack.pop()
				757	state = statestack[-1]
				758	sym.type = 'error'
				759	lookahead = sym
				760	errorcount = error_count
				761	self.errorok = 0
				762	continue
				763	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				764
				765	if t == 0:
				766	n = symstack[-1]
				767	return getattr(n,"value",None)
				768
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	769	if t is None:
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	770
				771	# We have some kind of parsing error here. To handle
				772	# this, we are going to push the current token onto
				773	# the tokenstack and replace it with an 'error' token.
				774	# If there are any synchronization rules, they may
				775	# catch it.
				776	#
				777	# In addition to pushing the error token, we call call
				778	# the user defined p_error() function if this is the
				779	# first syntax error. This function is only called if
				780	# errorcount == 0.
				781	if errorcount == 0 or self.errorok:
				782	errorcount = error_count
				783	self.errorok = 0
				784	errtoken = lookahead
				785	if errtoken.type == '$end':
				786	errtoken = None # End of file!
				787	if self.errorfunc:
				788	global errok,token,restart
				789	errok = self.errok # Set some special functions available in error recovery
				790	token = get_token
				791	restart = self.restart
				792	if errtoken and not hasattr(errtoken,'lexer'):
				793	errtoken.lexer = lexer
				794	tok = self.errorfunc(errtoken)
				795	del errok, token, restart # Delete special functions
				796
				797	if self.errorok:
				798	# User must have done some kind of panic
				799	# mode recovery on their own. The
				800	# returned token is the next lookahead
				801	lookahead = tok
				802	errtoken = None
				803	continue
				804	else:
				805	if errtoken:
				806	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				807	else: lineno = 0
				808	if lineno:
				809	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				810	else:
				811	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				812	else:
				813	sys.stderr.write("yacc: Parse error in input. EOF\n")
				814	return
				815
				816	else:
				817	errorcount = error_count
				818
				819	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				820	# entire parse has been rolled back and we're completely hosed. The token is
				821	# discarded and we just keep going.
				822
				823	if len(statestack) <= 1 and lookahead.type != '$end':
				824	lookahead = None
				825	errtoken = None
				826	state = 0
				827	# Nuke the pushback stack
				828	del lookaheadstack[:]
				829	continue
				830
				831	# case 2: the statestack has a couple of entries on it, but we're
				832	# at the end of the file. nuke the top entry and generate an error token
				833
				834	# Start nuking entries on the stack
				835	if lookahead.type == '$end':
				836	# Whoa. We're really hosed here. Bail out
				837	return
				838
				839	if lookahead.type != 'error':
				840	sym = symstack[-1]
				841	if sym.type == 'error':
				842	# Hmmm. Error is on top of stack, we'll just nuke input
				843	# symbol and continue
				844	lookahead = None
				845	continue
				846	t = YaccSymbol()
				847	t.type = 'error'
				848	if hasattr(lookahead,"lineno"):
				849	t.lineno = lookahead.lineno
				850	t.value = lookahead
				851	lookaheadstack.append(lookahead)
				852	lookahead = t
				853	else:
				854	symstack.pop()
				855	statestack.pop()
				856	state = statestack[-1] # Potential bug fix
				857
				858	continue
				859
				860	# Call an error function here
				861	raise RuntimeError("yacc: internal parser error!!!\n")
				862
				863	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				864	# parseopt_notrack().
				865	#
				866	# Optimized version of parseopt() with line number tracking removed.
				867	# DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
				868	# code in the #--! TRACKING sections
				869	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				870
				871	def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				872	lookahead = None # Current lookahead symbol
				873	lookaheadstack = [ ] # Stack of lookahead symbols
				874	actions = self.action # Local reference to action table (to avoid lookup on self.)
				875	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				876	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				877	pslice = YaccProduction(None) # Production object passed to grammar rules
				878	errorcount = 0 # Used during error recovery
				879
				880	# If no lexer was given, we will try to use the lex module
				881	if not lexer:
				882	lex = load_ply_lex()
				883	lexer = lex.lexer
				884
				885	# Set up the lexer and parser objects on pslice
				886	pslice.lexer = lexer
				887	pslice.parser = self
				888
				889	# If input was supplied, pass to lexer
				890	if input is not None:
				891	lexer.input(input)
				892
				893	if tokenfunc is None:
				894	# Tokenize function
				895	get_token = lexer.token
				896	else:
				897	get_token = tokenfunc
				898
				899	# Set up the state and symbol stacks
				900
				901	statestack = [ ] # Stack of parsing states
				902	self.statestack = statestack
				903	symstack = [ ] # Stack of grammar symbols
				904	self.symstack = symstack
				905
				906	pslice.stack = symstack # Put in the production
				907	errtoken = None # Err token
				908
				909	# The start state is assumed to be (0,$end)
				910
				911	statestack.append(0)
				912	sym = YaccSymbol()
				913	sym.type = '$end'
				914	symstack.append(sym)
				915	state = 0
				916	while 1:
				917	# Get the next symbol on the input. If a lookahead symbol
				918	# is already set, we just use that. Otherwise, we'll pull
				919	# the next token off of the lookaheadstack or from the lexer
				920
				921	if not lookahead:
				922	if not lookaheadstack:
				923	lookahead = get_token() # Get the next token
				924	else:
				925	lookahead = lookaheadstack.pop()
				926	if not lookahead:
				927	lookahead = YaccSymbol()
				928	lookahead.type = '$end'
				929
				930	# Check the action table
				931	ltype = lookahead.type
				932	t = actions[state].get(ltype)
				933
				934	if t is not None:
				935	if t > 0:
				936	# shift a symbol on the stack
				937	statestack.append(t)
				938	state = t
				939
				940	symstack.append(lookahead)
				941	lookahead = None
				942
				943	# Decrease error count on successful shift
				944	if errorcount: errorcount -=1
				945	continue
				946
				947	if t < 0:
				948	# reduce a symbol on the stack, emit a production
				949	p = prod[-t]
				950	pname = p.name
				951	plen = p.len
				952
				953	# Get production function
				954	sym = YaccSymbol()
				955	sym.type = pname # Production name
				956	sym.value = None
				957
				958	if plen:
				959	targ = symstack[-plen-1:]
				960	targ[0] = sym
				961
				962	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				963	# The code enclosed in this section is duplicated
				964	# below as a performance optimization. Make sure
				965	# changes get made in both locations.
				966
				967	pslice.slice = targ
				968
				969	try:
				970	# Call the grammar rule with our special slice object
				971	del symstack[-plen:]
				972	del statestack[-plen:]
				973	p.callable(pslice)
				974	symstack.append(sym)
				975	state = goto[statestack[-1]][pname]
				976	statestack.append(state)
				977	except SyntaxError:
				978	# If an error was set. Enter error recovery state
				979	lookaheadstack.append(lookahead)
				980	symstack.pop()
				981	statestack.pop()
				982	state = statestack[-1]
				983	sym.type = 'error'
				984	lookahead = sym
				985	errorcount = error_count
				986	self.errorok = 0
				987	continue
				988	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				989
				990	else:
				991
				992	targ = [ sym ]
				993
				994	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				995	# The code enclosed in this section is duplicated
				996	# above as a performance optimization. Make sure
				997	# changes get made in both locations.
				998
				999	pslice.slice = targ
				1000
				1001	try:
				1002	# Call the grammar rule with our special slice object
				1003	p.callable(pslice)
				1004	symstack.append(sym)
				1005	state = goto[statestack[-1]][pname]
				1006	statestack.append(state)
				1007	except SyntaxError:
				1008	# If an error was set. Enter error recovery state
				1009	lookaheadstack.append(lookahead)
				1010	symstack.pop()
				1011	statestack.pop()
				1012	state = statestack[-1]
				1013	sym.type = 'error'
				1014	lookahead = sym
				1015	errorcount = error_count
				1016	self.errorok = 0
				1017	continue
				1018	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				1019
				1020	if t == 0:
				1021	n = symstack[-1]
				1022	return getattr(n,"value",None)
				1023
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	1024	if t is None:
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1025
				1026	# We have some kind of parsing error here. To handle
				1027	# this, we are going to push the current token onto
				1028	# the tokenstack and replace it with an 'error' token.
				1029	# If there are any synchronization rules, they may
				1030	# catch it.
				1031	#
				1032	# In addition to pushing the error token, we call call
				1033	# the user defined p_error() function if this is the
				1034	# first syntax error. This function is only called if
				1035	# errorcount == 0.
				1036	if errorcount == 0 or self.errorok:
				1037	errorcount = error_count
				1038	self.errorok = 0
				1039	errtoken = lookahead
				1040	if errtoken.type == '$end':
				1041	errtoken = None # End of file!
				1042	if self.errorfunc:
				1043	global errok,token,restart
				1044	errok = self.errok # Set some special functions available in error recovery
				1045	token = get_token
				1046	restart = self.restart
				1047	if errtoken and not hasattr(errtoken,'lexer'):
				1048	errtoken.lexer = lexer
				1049	tok = self.errorfunc(errtoken)
				1050	del errok, token, restart # Delete special functions
				1051
				1052	if self.errorok:
				1053	# User must have done some kind of panic
				1054	# mode recovery on their own. The
				1055	# returned token is the next lookahead
				1056	lookahead = tok
				1057	errtoken = None
				1058	continue
				1059	else:
				1060	if errtoken:
				1061	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				1062	else: lineno = 0
				1063	if lineno:
				1064	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				1065	else:
				1066	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				1067	else:
				1068	sys.stderr.write("yacc: Parse error in input. EOF\n")
				1069	return
				1070
				1071	else:
				1072	errorcount = error_count
				1073
				1074	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				1075	# entire parse has been rolled back and we're completely hosed. The token is
				1076	# discarded and we just keep going.
				1077
				1078	if len(statestack) <= 1 and lookahead.type != '$end':
				1079	lookahead = None
				1080	errtoken = None
				1081	state = 0
				1082	# Nuke the pushback stack
				1083	del lookaheadstack[:]
				1084	continue
				1085
				1086	# case 2: the statestack has a couple of entries on it, but we're
				1087	# at the end of the file. nuke the top entry and generate an error token
				1088
				1089	# Start nuking entries on the stack
				1090	if lookahead.type == '$end':
				1091	# Whoa. We're really hosed here. Bail out
				1092	return
				1093
				1094	if lookahead.type != 'error':
				1095	sym = symstack[-1]
				1096	if sym.type == 'error':
				1097	# Hmmm. Error is on top of stack, we'll just nuke input
				1098	# symbol and continue
				1099	lookahead = None
				1100	continue
				1101	t = YaccSymbol()
				1102	t.type = 'error'
				1103	if hasattr(lookahead,"lineno"):
				1104	t.lineno = lookahead.lineno
				1105	t.value = lookahead
				1106	lookaheadstack.append(lookahead)
				1107	lookahead = t
				1108	else:
				1109	symstack.pop()
				1110	statestack.pop()
				1111	state = statestack[-1] # Potential bug fix
				1112
				1113	continue
				1114
				1115	# Call an error function here
				1116	raise RuntimeError("yacc: internal parser error!!!\n")
				1117
				1118	# -----------------------------------------------------------------------------
				1119	# === Grammar Representation ===
				1120	#
				1121	# The following functions, classes, and variables are used to represent and
				1122	# manipulate the rules that make up a grammar.
				1123	# -----------------------------------------------------------------------------
				1124
				1125	import re
				1126
				1127	# regex matching identifiers
				1128	_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
				1129
				1130	# -----------------------------------------------------------------------------
				1131	# class Production:
				1132	#
				1133	# This class stores the raw information about a single production or grammar rule.
				1134	# A grammar rule refers to a specification such as this:
				1135	#
				1136	# expr : expr PLUS term
				1137	#
				1138	# Here are the basic attributes defined on all productions
				1139	#
				1140	# name - Name of the production. For example 'expr'
				1141	# prod - A list of symbols on the right side ['expr','PLUS','term']
				1142	# prec - Production precedence level
				1143	# number - Production number.
				1144	# func - Function that executes on reduce
				1145	# file - File where production function is defined
				1146	# lineno - Line number where production function is defined
				1147	#
				1148	# The following attributes are defined or optional.
				1149	#
				1150	# len - Length of the production (number of symbols on right hand side)
				1151	# usyms - Set of unique symbols found in the production
				1152	# -----------------------------------------------------------------------------
				1153
				1154	class Production(object):
				1155	reduced = 0
				1156	def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
				1157	self.name = name
				1158	self.prod = tuple(prod)
				1159	self.number = number
				1160	self.func = func
				1161	self.callable = None
				1162	self.file = file
				1163	self.line = line
				1164	self.prec = precedence
				1165
				1166	# Internal settings used during table construction
				1167
				1168	self.len = len(self.prod) # Length of the production
				1169
				1170	# Create a list of unique production symbols used in the production
				1171	self.usyms = [ ]
				1172	for s in self.prod:
				1173	if s not in self.usyms:
				1174	self.usyms.append(s)
				1175
				1176	# List of all LR items for the production
				1177	self.lr_items = []
				1178	self.lr_next = None
				1179
				1180	# Create a string representation
				1181	if self.prod:
				1182	self.str = "%s -> %s" % (self.name," ".join(self.prod))
				1183	else:
				1184	self.str = "%s -> <empty>" % self.name
				1185
				1186	def __str__(self):
				1187	return self.str
				1188
				1189	def __repr__(self):
				1190	return "Production("+str(self)+")"
				1191
				1192	def __len__(self):
				1193	return len(self.prod)
				1194
				1195	def __nonzero__(self):
				1196	return 1
				1197
				1198	def __getitem__(self,index):
				1199	return self.prod[index]
				1200
				1201	# Return the nth lr_item from the production (or None if at the end)
				1202	def lr_item(self,n):
				1203	if n > len(self.prod): return None
				1204	p = LRItem(self,n)
				1205
				1206	# Precompute the list of productions immediately following. Hack. Remove later
				1207	try:
Andrew Geissler	c9f7865	2020-09-18 14:11:35 -0500	[diff] [blame]	1208	p.lr_after = self.Prodnames[p.prod[n+1]]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1209	except (IndexError,KeyError):
				1210	p.lr_after = []
				1211	try:
				1212	p.lr_before = p.prod[n-1]
				1213	except IndexError:
				1214	p.lr_before = None
				1215
				1216	return p
				1217
				1218	# Bind the production function name to a callable
				1219	def bind(self,pdict):
				1220	if self.func:
				1221	self.callable = pdict[self.func]
				1222
				1223	# This class serves as a minimal standin for Production objects when
				1224	# reading table data from files. It only contains information
				1225	# actually used by the LR parsing engine, plus some additional
				1226	# debugging information.
				1227	class MiniProduction(object):
				1228	def __init__(self,str,name,len,func,file,line):
				1229	self.name = name
				1230	self.len = len
				1231	self.func = func
				1232	self.callable = None
				1233	self.file = file
				1234	self.line = line
				1235	self.str = str
				1236	def __str__(self):
				1237	return self.str
				1238	def __repr__(self):
				1239	return "MiniProduction(%s)" % self.str
				1240
				1241	# Bind the production function name to a callable
				1242	def bind(self,pdict):
				1243	if self.func:
				1244	self.callable = pdict[self.func]
				1245
				1246
				1247	# -----------------------------------------------------------------------------
				1248	# class LRItem
				1249	#
				1250	# This class represents a specific stage of parsing a production rule. For
				1251	# example:
				1252	#
				1253	# expr : expr . PLUS term
				1254	#
				1255	# In the above, the "." represents the current location of the parse. Here
				1256	# basic attributes:
				1257	#
				1258	# name - Name of the production. For example 'expr'
				1259	# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
				1260	# number - Production number.
				1261	#
				1262	# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
				1263	# then lr_next refers to 'expr -> expr PLUS . term'
				1264	# lr_index - LR item index (location of the ".") in the prod list.
				1265	# lookaheads - LALR lookahead symbols for this item
				1266	# len - Length of the production (number of symbols on right hand side)
				1267	# lr_after - List of all productions that immediately follow
				1268	# lr_before - Grammar symbol immediately before
				1269	# -----------------------------------------------------------------------------
				1270
				1271	class LRItem(object):
				1272	def __init__(self,p,n):
				1273	self.name = p.name
				1274	self.prod = list(p.prod)
				1275	self.number = p.number
				1276	self.lr_index = n
				1277	self.lookaheads = { }
				1278	self.prod.insert(n,".")
				1279	self.prod = tuple(self.prod)
				1280	self.len = len(self.prod)
				1281	self.usyms = p.usyms
				1282
				1283	def __str__(self):
				1284	if self.prod:
				1285	s = "%s -> %s" % (self.name," ".join(self.prod))
				1286	else:
				1287	s = "%s -> <empty>" % self.name
				1288	return s
				1289
				1290	def __repr__(self):
				1291	return "LRItem("+str(self)+")"
				1292
				1293	# -----------------------------------------------------------------------------
				1294	# rightmost_terminal()
				1295	#
				1296	# Return the rightmost terminal from a list of symbols. Used in add_production()
				1297	# -----------------------------------------------------------------------------
				1298	def rightmost_terminal(symbols, terminals):
				1299	i = len(symbols) - 1
				1300	while i >= 0:
				1301	if symbols[i] in terminals:
				1302	return symbols[i]
				1303	i -= 1
				1304	return None
				1305
				1306	# -----------------------------------------------------------------------------
				1307	# === GRAMMAR CLASS ===
				1308	#
				1309	# The following class represents the contents of the specified grammar along
				1310	# with various computed properties such as first sets, follow sets, LR items, etc.
				1311	# This data is used for critical parts of the table generation process later.
				1312	# -----------------------------------------------------------------------------
				1313
				1314	class GrammarError(YaccError): pass
				1315
				1316	class Grammar(object):
				1317	def __init__(self,terminals):
				1318	self.Productions = [None] # A list of all of the productions. The first
				1319	# entry is always reserved for the purpose of
				1320	# building an augmented grammar
				1321
				1322	self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
				1323	# productions of that nonterminal.
				1324
				1325	self.Prodmap = { } # A dictionary that is only used to detect duplicate
				1326	# productions.
				1327
				1328	self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
				1329	# list of the rules where they are used.
				1330
				1331	for term in terminals:
				1332	self.Terminals[term] = []
				1333
				1334	self.Terminals['error'] = []
				1335
				1336	self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
				1337	# of rule numbers where they are used.
				1338
				1339	self.First = { } # A dictionary of precomputed FIRST(x) symbols
				1340
				1341	self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
				1342
				1343	self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the
				1344	# form ('right',level) or ('nonassoc', level) or ('left',level)
				1345
				1346	self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
				1347	# This is only used to provide error checking and to generate
				1348	# a warning about unused precedence rules.
				1349
				1350	self.Start = None # Starting symbol for the grammar
				1351
				1352
				1353	def __len__(self):
				1354	return len(self.Productions)
				1355
				1356	def __getitem__(self,index):
				1357	return self.Productions[index]
				1358
				1359	# -----------------------------------------------------------------------------
				1360	# set_precedence()
				1361	#
				1362	# Sets the precedence for a given terminal. assoc is the associativity such as
				1363	# 'left','right', or 'nonassoc'. level is a numeric level.
				1364	#
				1365	# -----------------------------------------------------------------------------
				1366
				1367	def set_precedence(self,term,assoc,level):
				1368	assert self.Productions == [None],"Must call set_precedence() before add_production()"
				1369	if term in self.Precedence:
				1370	raise GrammarError("Precedence already specified for terminal '%s'" % term)
				1371	if assoc not in ['left','right','nonassoc']:
				1372	raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
				1373	self.Precedence[term] = (assoc,level)
				1374
				1375	# -----------------------------------------------------------------------------
				1376	# add_production()
				1377	#
				1378	# Given an action function, this function assembles a production rule and
				1379	# computes its precedence level.
				1380	#
				1381	# The production rule is supplied as a list of symbols. For example,
				1382	# a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
				1383	# symbols ['expr','PLUS','term'].
				1384	#
				1385	# Precedence is determined by the precedence of the right-most non-terminal
				1386	# or the precedence of a terminal specified by %prec.
				1387	#
				1388	# A variety of error checks are performed to make sure production symbols
				1389	# are valid and that %prec is used correctly.
				1390	# -----------------------------------------------------------------------------
				1391
				1392	def add_production(self,prodname,syms,func=None,file='',line=0):
				1393
				1394	if prodname in self.Terminals:
				1395	raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
				1396	if prodname == 'error':
				1397	raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
				1398	if not _is_identifier.match(prodname):
				1399	raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
				1400
				1401	# Look for literal tokens
				1402	for n,s in enumerate(syms):
				1403	if s[0] in "'\"":
				1404	try:
				1405	c = eval(s)
				1406	if (len(c) > 1):
				1407	raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
				1408	if not c in self.Terminals:
				1409	self.Terminals[c] = []
				1410	syms[n] = c
				1411	continue
				1412	except SyntaxError:
				1413	pass
				1414	if not _is_identifier.match(s) and s != '%prec':
				1415	raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
				1416
				1417	# Determine the precedence level
				1418	if '%prec' in syms:
				1419	if syms[-1] == '%prec':
				1420	raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
				1421	if syms[-2] != '%prec':
				1422	raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
				1423	precname = syms[-1]
				1424	prodprec = self.Precedence.get(precname,None)
				1425	if not prodprec:
				1426	raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
				1427	else:
				1428	self.UsedPrecedence[precname] = 1
				1429	del syms[-2:] # Drop %prec from the rule
				1430	else:
				1431	# If no %prec, precedence is determined by the rightmost terminal symbol
				1432	precname = rightmost_terminal(syms,self.Terminals)
				1433	prodprec = self.Precedence.get(precname,('right',0))
				1434
				1435	# See if the rule is already in the rulemap
				1436	map = "%s -> %s" % (prodname,syms)
				1437	if map in self.Prodmap:
				1438	m = self.Prodmap[map]
				1439	raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
				1440	"Previous definition at %s:%d" % (m.file, m.line))
				1441
				1442	# From this point on, everything is valid. Create a new Production instance
				1443	pnumber = len(self.Productions)
				1444	if not prodname in self.Nonterminals:
				1445	self.Nonterminals[prodname] = [ ]
				1446
				1447	# Add the production number to Terminals and Nonterminals
				1448	for t in syms:
				1449	if t in self.Terminals:
				1450	self.Terminals[t].append(pnumber)
				1451	else:
				1452	if not t in self.Nonterminals:
				1453	self.Nonterminals[t] = [ ]
				1454	self.Nonterminals[t].append(pnumber)
				1455
				1456	# Create a production and add it to the list of productions
				1457	p = Production(pnumber,prodname,syms,prodprec,func,file,line)
				1458	self.Productions.append(p)
				1459	self.Prodmap[map] = p
				1460
				1461	# Add to the global productions list
				1462	try:
				1463	self.Prodnames[prodname].append(p)
				1464	except KeyError:
				1465	self.Prodnames[prodname] = [ p ]
				1466	return 0
				1467
				1468	# -----------------------------------------------------------------------------
				1469	# set_start()
				1470	#
				1471	# Sets the starting symbol and creates the augmented grammar. Production
				1472	# rule 0 is S' -> start where start is the start symbol.
				1473	# -----------------------------------------------------------------------------
				1474
				1475	def set_start(self,start=None):
				1476	if not start:
				1477	start = self.Productions[1].name
				1478	if start not in self.Nonterminals:
				1479	raise GrammarError("start symbol %s undefined" % start)
				1480	self.Productions[0] = Production(0,"S'",[start])
				1481	self.Nonterminals[start].append(0)
				1482	self.Start = start
				1483
				1484	# -----------------------------------------------------------------------------
				1485	# find_unreachable()
				1486	#
				1487	# Find all of the nonterminal symbols that can't be reached from the starting
				1488	# symbol. Returns a list of nonterminals that can't be reached.
				1489	# -----------------------------------------------------------------------------
				1490
				1491	def find_unreachable(self):
				1492
				1493	# Mark all symbols that are reachable from a symbol s
				1494	def mark_reachable_from(s):
				1495	if reachable[s]:
				1496	# We've already reached symbol s.
				1497	return
				1498	reachable[s] = 1
				1499	for p in self.Prodnames.get(s,[]):
				1500	for r in p.prod:
				1501	mark_reachable_from(r)
				1502
				1503	reachable = { }
				1504	for s in list(self.Terminals) + list(self.Nonterminals):
				1505	reachable[s] = 0
				1506
				1507	mark_reachable_from( self.Productions[0].prod[0] )
				1508
				1509	return [s for s in list(self.Nonterminals)
				1510	if not reachable[s]]
				1511
				1512	# -----------------------------------------------------------------------------
				1513	# infinite_cycles()
				1514	#
				1515	# This function looks at the various parsing rules and tries to detect
				1516	# infinite recursion cycles (grammar rules where there is no possible way
				1517	# to derive a string of only terminals).
				1518	# -----------------------------------------------------------------------------
				1519
				1520	def infinite_cycles(self):
				1521	terminates = {}
				1522
				1523	# Terminals:
				1524	for t in self.Terminals:
				1525	terminates[t] = 1
				1526
				1527	terminates['$end'] = 1
				1528
				1529	# Nonterminals:
				1530
				1531	# Initialize to false:
				1532	for n in self.Nonterminals:
				1533	terminates[n] = 0
				1534
				1535	# Then propagate termination until no change:
				1536	while 1:
				1537	some_change = 0
				1538	for (n,pl) in self.Prodnames.items():
				1539	# Nonterminal n terminates iff any of its productions terminates.
				1540	for p in pl:
				1541	# Production p terminates iff all of its rhs symbols terminate.
				1542	for s in p.prod:
				1543	if not terminates[s]:
				1544	# The symbol s does not terminate,
				1545	# so production p does not terminate.
				1546	p_terminates = 0
				1547	break
				1548	else:
				1549	# didn't break from the loop,
				1550	# so every symbol s terminates
				1551	# so production p terminates.
				1552	p_terminates = 1
				1553
				1554	if p_terminates:
				1555	# symbol n terminates!
				1556	if not terminates[n]:
				1557	terminates[n] = 1
				1558	some_change = 1
				1559	# Don't need to consider any more productions for this n.
				1560	break
				1561
				1562	if not some_change:
				1563	break
				1564
				1565	infinite = []
				1566	for (s,term) in terminates.items():
				1567	if not term:
				1568	if not s in self.Prodnames and not s in self.Terminals and s != 'error':
				1569	# s is used-but-not-defined, and we've already warned of that,
				1570	# so it would be overkill to say that it's also non-terminating.
				1571	pass
				1572	else:
				1573	infinite.append(s)
				1574
				1575	return infinite
				1576
				1577
				1578	# -----------------------------------------------------------------------------
				1579	# undefined_symbols()
				1580	#
				1581	# Find all symbols that were used the grammar, but not defined as tokens or
				1582	# grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
				1583	# and prod is the production where the symbol was used.
				1584	# -----------------------------------------------------------------------------
				1585	def undefined_symbols(self):
				1586	result = []
				1587	for p in self.Productions:
				1588	if not p: continue
				1589
				1590	for s in p.prod:
				1591	if not s in self.Prodnames and not s in self.Terminals and s != 'error':
				1592	result.append((s,p))
				1593	return result
				1594
				1595	# -----------------------------------------------------------------------------
				1596	# unused_terminals()
				1597	#
				1598	# Find all terminals that were defined, but not used by the grammar. Returns
				1599	# a list of all symbols.
				1600	# -----------------------------------------------------------------------------
				1601	def unused_terminals(self):
				1602	unused_tok = []
				1603	for s,v in self.Terminals.items():
				1604	if s != 'error' and not v:
				1605	unused_tok.append(s)
				1606
				1607	return unused_tok
				1608
				1609	# ------------------------------------------------------------------------------
				1610	# unused_rules()
				1611	#
				1612	# Find all grammar rules that were defined, but not used (maybe not reachable)
				1613	# Returns a list of productions.
				1614	# ------------------------------------------------------------------------------
				1615
				1616	def unused_rules(self):
				1617	unused_prod = []
				1618	for s,v in self.Nonterminals.items():
				1619	if not v:
				1620	p = self.Prodnames[s][0]
				1621	unused_prod.append(p)
				1622	return unused_prod
				1623
				1624	# -----------------------------------------------------------------------------
				1625	# unused_precedence()
				1626	#
				1627	# Returns a list of tuples (term,precedence) corresponding to precedence
				1628	# rules that were never used by the grammar. term is the name of the terminal
				1629	# on which precedence was applied and precedence is a string such as 'left' or
				1630	# 'right' corresponding to the type of precedence.
				1631	# -----------------------------------------------------------------------------
				1632
				1633	def unused_precedence(self):
				1634	unused = []
				1635	for termname in self.Precedence:
				1636	if not (termname in self.Terminals or termname in self.UsedPrecedence):
				1637	unused.append((termname,self.Precedence[termname][0]))
				1638
				1639	return unused
				1640
				1641	# -------------------------------------------------------------------------
				1642	# _first()
				1643	#
				1644	# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
				1645	#
				1646	# During execution of compute_first1, the result may be incomplete.
				1647	# Afterward (e.g., when called from compute_follow()), it will be complete.
				1648	# -------------------------------------------------------------------------
				1649	def _first(self,beta):
				1650
				1651	# We are computing First(x1,x2,x3,...,xn)
				1652	result = [ ]
				1653	for x in beta:
				1654	x_produces_empty = 0
				1655
				1656	# Add all the non-<empty> symbols of First[x] to the result.
				1657	for f in self.First[x]:
				1658	if f == '<empty>':
				1659	x_produces_empty = 1
				1660	else:
				1661	if f not in result: result.append(f)
				1662
				1663	if x_produces_empty:
				1664	# We have to consider the next x in beta,
				1665	# i.e. stay in the loop.
				1666	pass
				1667	else:
				1668	# We don't have to consider any further symbols in beta.
				1669	break
				1670	else:
				1671	# There was no 'break' from the loop,
				1672	# so x_produces_empty was true for all x in beta,
				1673	# so beta produces empty as well.
				1674	result.append('<empty>')
				1675
				1676	return result
				1677
				1678	# -------------------------------------------------------------------------
				1679	# compute_first()
				1680	#
				1681	# Compute the value of FIRST1(X) for all symbols
				1682	# -------------------------------------------------------------------------
				1683	def compute_first(self):
				1684	if self.First:
				1685	return self.First
				1686
				1687	# Terminals:
				1688	for t in self.Terminals:
				1689	self.First[t] = [t]
				1690
				1691	self.First['$end'] = ['$end']
				1692
				1693	# Nonterminals:
				1694
				1695	# Initialize to the empty set:
				1696	for n in self.Nonterminals:
				1697	self.First[n] = []
				1698
				1699	# Then propagate symbols until no change:
				1700	while 1:
				1701	some_change = 0
				1702	for n in self.Nonterminals:
				1703	for p in self.Prodnames[n]:
				1704	for f in self._first(p.prod):
				1705	if f not in self.First[n]:
				1706	self.First[n].append( f )
				1707	some_change = 1
				1708	if not some_change:
				1709	break
				1710
				1711	return self.First
				1712
				1713	# ---------------------------------------------------------------------
				1714	# compute_follow()
				1715	#
				1716	# Computes all of the follow sets for every non-terminal symbol. The
				1717	# follow set is the set of all symbols that might follow a given
				1718	# non-terminal. See the Dragon book, 2nd Ed. p. 189.
				1719	# ---------------------------------------------------------------------
				1720	def compute_follow(self,start=None):
				1721	# If already computed, return the result
				1722	if self.Follow:
				1723	return self.Follow
				1724
				1725	# If first sets not computed yet, do that first.
				1726	if not self.First:
				1727	self.compute_first()
				1728
				1729	# Add '$end' to the follow list of the start symbol
				1730	for k in self.Nonterminals:
				1731	self.Follow[k] = [ ]
				1732
				1733	if not start:
				1734	start = self.Productions[1].name
				1735
				1736	self.Follow[start] = [ '$end' ]
				1737
				1738	while 1:
				1739	didadd = 0
				1740	for p in self.Productions[1:]:
				1741	# Here is the production set
				1742	for i in range(len(p.prod)):
				1743	B = p.prod[i]
				1744	if B in self.Nonterminals:
				1745	# Okay. We got a non-terminal in a production
				1746	fst = self._first(p.prod[i+1:])
				1747	hasempty = 0
				1748	for f in fst:
				1749	if f != '<empty>' and f not in self.Follow[B]:
				1750	self.Follow[B].append(f)
				1751	didadd = 1
				1752	if f == '<empty>':
				1753	hasempty = 1
				1754	if hasempty or i == (len(p.prod)-1):
				1755	# Add elements of follow(a) to follow(b)
				1756	for f in self.Follow[p.name]:
				1757	if f not in self.Follow[B]:
				1758	self.Follow[B].append(f)
				1759	didadd = 1
				1760	if not didadd: break
				1761	return self.Follow
				1762
				1763
				1764	# -----------------------------------------------------------------------------
				1765	# build_lritems()
				1766	#
				1767	# This function walks the list of productions and builds a complete set of the
				1768	# LR items. The LR items are stored in two ways: First, they are uniquely
				1769	# numbered and placed in the list _lritems. Second, a linked list of LR items
				1770	# is built for each production. For example:
				1771	#
				1772	# E -> E PLUS E
				1773	#
				1774	# Creates the list
				1775	#
				1776	# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
				1777	# -----------------------------------------------------------------------------
				1778
				1779	def build_lritems(self):
				1780	for p in self.Productions:
				1781	lastlri = p
				1782	i = 0
				1783	lr_items = []
				1784	while 1:
				1785	if i > len(p):
				1786	lri = None
				1787	else:
				1788	lri = LRItem(p,i)
				1789	# Precompute the list of productions immediately following
				1790	try:
				1791	lri.lr_after = self.Prodnames[lri.prod[i+1]]
				1792	except (IndexError,KeyError):
				1793	lri.lr_after = []
				1794	try:
				1795	lri.lr_before = lri.prod[i-1]
				1796	except IndexError:
				1797	lri.lr_before = None
				1798
				1799	lastlri.lr_next = lri
				1800	if not lri: break
				1801	lr_items.append(lri)
				1802	lastlri = lri
				1803	i += 1
				1804	p.lr_items = lr_items
				1805
				1806	# -----------------------------------------------------------------------------
				1807	# == Class LRTable ==
				1808	#
				1809	# This basic class represents a basic table of LR parsing information.
				1810	# Methods for generating the tables are not defined here. They are defined
				1811	# in the derived class LRGeneratedTable.
				1812	# -----------------------------------------------------------------------------
				1813
				1814	class VersionError(YaccError): pass
				1815
				1816	class LRTable(object):
				1817	def __init__(self):
				1818	self.lr_action = None
				1819	self.lr_goto = None
				1820	self.lr_productions = None
				1821	self.lr_method = None
				1822
				1823	def read_table(self,module):
				1824	if isinstance(module,types.ModuleType):
				1825	parsetab = module
				1826	else:
				1827	if sys.version_info[0] < 3:
				1828	exec("import %s as parsetab" % module)
				1829	else:
				1830	env = { }
				1831	exec("import %s as parsetab" % module, env, env)
				1832	parsetab = env['parsetab']
				1833
				1834	if parsetab._tabversion != __tabversion__:
				1835	raise VersionError("yacc table file version is out of date")
				1836
				1837	self.lr_action = parsetab._lr_action
				1838	self.lr_goto = parsetab._lr_goto
				1839
				1840	self.lr_productions = []
				1841	for p in parsetab._lr_productions:
				1842	self.lr_productions.append(MiniProduction(*p))
				1843
				1844	self.lr_method = parsetab._lr_method
				1845	return parsetab._lr_signature
				1846
				1847	def read_pickle(self,filename):
				1848	try:
				1849	import cPickle as pickle
				1850	except ImportError:
				1851	import pickle
				1852
				1853	in_f = open(filename,"rb")
				1854
				1855	tabversion = pickle.load(in_f)
				1856	if tabversion != __tabversion__:
				1857	raise VersionError("yacc table file version is out of date")
				1858	self.lr_method = pickle.load(in_f)
				1859	signature = pickle.load(in_f)
				1860	self.lr_action = pickle.load(in_f)
				1861	self.lr_goto = pickle.load(in_f)
				1862	productions = pickle.load(in_f)
				1863
				1864	self.lr_productions = []
				1865	for p in productions:
				1866	self.lr_productions.append(MiniProduction(*p))
				1867
				1868	in_f.close()
				1869	return signature
				1870
				1871	# Bind all production function names to callable objects in pdict
				1872	def bind_callables(self,pdict):
				1873	for p in self.lr_productions:
				1874	p.bind(pdict)
				1875
				1876	# -----------------------------------------------------------------------------
				1877	# === LR Generator ===
				1878	#
				1879	# The following classes and functions are used to generate LR parsing tables on
				1880	# a grammar.
				1881	# -----------------------------------------------------------------------------
				1882
				1883	# -----------------------------------------------------------------------------
				1884	# digraph()
				1885	# traverse()
				1886	#
				1887	# The following two functions are used to compute set valued functions
				1888	# of the form:
				1889	#
				1890	# F(x) = F'(x) U U{F(y) \| x R y}
				1891	#
				1892	# This is used to compute the values of Read() sets as well as FOLLOW sets
				1893	# in LALR(1) generation.
				1894	#
				1895	# Inputs: X - An input set
				1896	# R - A relation
				1897	# FP - Set-valued function
				1898	# ------------------------------------------------------------------------------
				1899
				1900	def digraph(X,R,FP):
				1901	N = { }
				1902	for x in X:
				1903	N[x] = 0
				1904	stack = []
				1905	F = { }
				1906	for x in X:
				1907	if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
				1908	return F
				1909
				1910	def traverse(x,N,stack,F,X,R,FP):
				1911	stack.append(x)
				1912	d = len(stack)
				1913	N[x] = d
				1914	F[x] = FP(x) # F(X) <- F'(x)
				1915
				1916	rel = R(x) # Get y's related to x
				1917	for y in rel:
				1918	if N[y] == 0:
				1919	traverse(y,N,stack,F,X,R,FP)
				1920	N[x] = min(N[x],N[y])
				1921	for a in F.get(y,[]):
				1922	if a not in F[x]: F[x].append(a)
				1923	if N[x] == d:
				1924	N[stack[-1]] = MAXINT
				1925	F[stack[-1]] = F[x]
				1926	element = stack.pop()
				1927	while element != x:
				1928	N[stack[-1]] = MAXINT
				1929	F[stack[-1]] = F[x]
				1930	element = stack.pop()
				1931
				1932	class LALRError(YaccError): pass
				1933
				1934	# -----------------------------------------------------------------------------
				1935	# == LRGeneratedTable ==
				1936	#
				1937	# This class implements the LR table generation algorithm. There are no
				1938	# public methods except for write()
				1939	# -----------------------------------------------------------------------------
				1940
				1941	class LRGeneratedTable(LRTable):
				1942	def __init__(self,grammar,method='LALR',log=None):
				1943	if method not in ['SLR','LALR']:
				1944	raise LALRError("Unsupported method %s" % method)
				1945
				1946	self.grammar = grammar
				1947	self.lr_method = method
				1948
				1949	# Set up the logger
				1950	if not log:
				1951	log = NullLogger()
				1952	self.log = log
				1953
				1954	# Internal attributes
				1955	self.lr_action = {} # Action table
				1956	self.lr_goto = {} # Goto table
				1957	self.lr_productions = grammar.Productions # Copy of grammar Production array
				1958	self.lr_goto_cache = {} # Cache of computed gotos
				1959	self.lr0_cidhash = {} # Cache of closures
				1960
				1961	self._add_count = 0 # Internal counter used to detect cycles
				1962
				1963	# Diagonistic information filled in by the table generator
				1964	self.sr_conflict = 0
				1965	self.rr_conflict = 0
				1966	self.conflicts = [] # List of conflicts
				1967
				1968	self.sr_conflicts = []
				1969	self.rr_conflicts = []
				1970
				1971	# Build the tables
				1972	self.grammar.build_lritems()
				1973	self.grammar.compute_first()
				1974	self.grammar.compute_follow()
				1975	self.lr_parse_table()
				1976
				1977	# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
				1978
				1979	def lr0_closure(self,I):
				1980	self._add_count += 1
				1981
				1982	# Add everything in I to J
				1983	J = I[:]
				1984	didadd = 1
				1985	while didadd:
				1986	didadd = 0
				1987	for j in J:
				1988	for x in j.lr_after:
				1989	if getattr(x,"lr0_added",0) == self._add_count: continue
				1990	# Add B --> .G to J
				1991	J.append(x.lr_next)
				1992	x.lr0_added = self._add_count
				1993	didadd = 1
				1994
				1995	return J
				1996
				1997	# Compute the LR(0) goto function goto(I,X) where I is a set
				1998	# of LR(0) items and X is a grammar symbol. This function is written
				1999	# in a way that guarantees uniqueness of the generated goto sets
				2000	# (i.e. the same goto set will never be returned as two different Python
				2001	# objects). With uniqueness, we can later do fast set comparisons using
				2002	# id(obj) instead of element-wise comparison.
				2003
				2004	def lr0_goto(self,I,x):
				2005	# First we look for a previously cached entry
				2006	g = self.lr_goto_cache.get((id(I),x),None)
				2007	if g: return g
				2008
				2009	# Now we generate the goto set in a way that guarantees uniqueness
				2010	# of the result
				2011
				2012	s = self.lr_goto_cache.get(x,None)
				2013	if not s:
				2014	s = { }
				2015	self.lr_goto_cache[x] = s
				2016
				2017	gs = [ ]
				2018	for p in I:
				2019	n = p.lr_next
				2020	if n and n.lr_before == x:
				2021	s1 = s.get(id(n),None)
				2022	if not s1:
				2023	s1 = { }
				2024	s[id(n)] = s1
				2025	gs.append(n)
				2026	s = s1
				2027	g = s.get('$end',None)
				2028	if not g:
				2029	if gs:
				2030	g = self.lr0_closure(gs)
				2031	s['$end'] = g
				2032	else:
				2033	s['$end'] = gs
				2034	self.lr_goto_cache[(id(I),x)] = g
				2035	return g
				2036
				2037	# Compute the LR(0) sets of item function
				2038	def lr0_items(self):
				2039
				2040	C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
				2041	i = 0
				2042	for I in C:
				2043	self.lr0_cidhash[id(I)] = i
				2044	i += 1
				2045
				2046	# Loop over the items in C and each grammar symbols
				2047	i = 0
				2048	while i < len(C):
				2049	I = C[i]
				2050	i += 1
				2051
				2052	# Collect all of the symbols that could possibly be in the goto(I,X) sets
				2053	asyms = { }
				2054	for ii in I:
				2055	for s in ii.usyms:
				2056	asyms[s] = None
				2057
				2058	for x in asyms:
				2059	g = self.lr0_goto(I,x)
				2060	if not g: continue
				2061	if id(g) in self.lr0_cidhash: continue
				2062	self.lr0_cidhash[id(g)] = len(C)
				2063	C.append(g)
				2064
				2065	return C
				2066
				2067	# -----------------------------------------------------------------------------
				2068	# ==== LALR(1) Parsing ====
				2069	#
				2070	# LALR(1) parsing is almost exactly the same as SLR except that instead of
				2071	# relying upon Follow() sets when performing reductions, a more selective
				2072	# lookahead set that incorporates the state of the LR(0) machine is utilized.
				2073	# Thus, we mainly just have to focus on calculating the lookahead sets.
				2074	#
				2075	# The method used here is due to DeRemer and Pennelo (1982).
				2076	#
				2077	# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
				2078	# Lookahead Sets", ACM Transactions on Programming Languages and Systems,
				2079	# Vol. 4, No. 4, Oct. 1982, pp. 615-649
				2080	#
				2081	# Further details can also be found in:
				2082	#
				2083	# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
				2084	# McGraw-Hill Book Company, (1985).
				2085	#
				2086	# -----------------------------------------------------------------------------
				2087
				2088	# -----------------------------------------------------------------------------
				2089	# compute_nullable_nonterminals()
				2090	#
				2091	# Creates a dictionary containing all of the non-terminals that might produce
				2092	# an empty production.
				2093	# -----------------------------------------------------------------------------
				2094
				2095	def compute_nullable_nonterminals(self):
				2096	nullable = {}
				2097	num_nullable = 0
				2098	while 1:
				2099	for p in self.grammar.Productions[1:]:
				2100	if p.len == 0:
				2101	nullable[p.name] = 1
				2102	continue
				2103	for t in p.prod:
				2104	if not t in nullable: break
				2105	else:
				2106	nullable[p.name] = 1
				2107	if len(nullable) == num_nullable: break
				2108	num_nullable = len(nullable)
				2109	return nullable
				2110
				2111	# -----------------------------------------------------------------------------
				2112	# find_nonterminal_trans(C)
				2113	#
				2114	# Given a set of LR(0) items, this functions finds all of the non-terminal
				2115	# transitions. These are transitions in which a dot appears immediately before
				2116	# a non-terminal. Returns a list of tuples of the form (state,N) where state
				2117	# is the state number and N is the nonterminal symbol.
				2118	#
				2119	# The input C is the set of LR(0) items.
				2120	# -----------------------------------------------------------------------------
				2121
				2122	def find_nonterminal_transitions(self,C):
				2123	trans = []
				2124	for state in range(len(C)):
				2125	for p in C[state]:
				2126	if p.lr_index < p.len - 1:
				2127	t = (state,p.prod[p.lr_index+1])
				2128	if t[1] in self.grammar.Nonterminals:
				2129	if t not in trans: trans.append(t)
				2130	state = state + 1
				2131	return trans
				2132
				2133	# -----------------------------------------------------------------------------
				2134	# dr_relation()
				2135	#
				2136	# Computes the DR(p,A) relationships for non-terminal transitions. The input
				2137	# is a tuple (state,N) where state is a number and N is a nonterminal symbol.
				2138	#
				2139	# Returns a list of terminals.
				2140	# -----------------------------------------------------------------------------
				2141
				2142	def dr_relation(self,C,trans,nullable):
				2143	dr_set = { }
				2144	state,N = trans
				2145	terms = []
				2146
				2147	g = self.lr0_goto(C[state],N)
				2148	for p in g:
				2149	if p.lr_index < p.len - 1:
				2150	a = p.prod[p.lr_index+1]
				2151	if a in self.grammar.Terminals:
				2152	if a not in terms: terms.append(a)
				2153
				2154	# This extra bit is to handle the start state
				2155	if state == 0 and N == self.grammar.Productions[0].prod[0]:
				2156	terms.append('$end')
				2157
				2158	return terms
				2159
				2160	# -----------------------------------------------------------------------------
				2161	# reads_relation()
				2162	#
				2163	# Computes the READS() relation (p,A) READS (t,C).
				2164	# -----------------------------------------------------------------------------
				2165
				2166	def reads_relation(self,C, trans, empty):
				2167	# Look for empty transitions
				2168	rel = []
				2169	state, N = trans
				2170
				2171	g = self.lr0_goto(C[state],N)
				2172	j = self.lr0_cidhash.get(id(g),-1)
				2173	for p in g:
				2174	if p.lr_index < p.len - 1:
				2175	a = p.prod[p.lr_index + 1]
				2176	if a in empty:
				2177	rel.append((j,a))
				2178
				2179	return rel
				2180
				2181	# -----------------------------------------------------------------------------
				2182	# compute_lookback_includes()
				2183	#
				2184	# Determines the lookback and includes relations
				2185	#
				2186	# LOOKBACK:
				2187	#
				2188	# This relation is determined by running the LR(0) state machine forward.
				2189	# For example, starting with a production "N : . A B C", we run it forward
				2190	# to obtain "N : A B C ." We then build a relationship between this final
				2191	# state and the starting state. These relationships are stored in a dictionary
				2192	# lookdict.
				2193	#
				2194	# INCLUDES:
				2195	#
				2196	# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
				2197	#
				2198	# This relation is used to determine non-terminal transitions that occur
				2199	# inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
				2200	# if the following holds:
				2201	#
				2202	# B -> LAT, where T -> epsilon and p' -L-> p
				2203	#
				2204	# L is essentially a prefix (which may be empty), T is a suffix that must be
				2205	# able to derive an empty string. State p' must lead to state p with the string L.
				2206	#
				2207	# -----------------------------------------------------------------------------
				2208
				2209	def compute_lookback_includes(self,C,trans,nullable):
				2210
				2211	lookdict = {} # Dictionary of lookback relations
				2212	includedict = {} # Dictionary of include relations
				2213
				2214	# Make a dictionary of non-terminal transitions
				2215	dtrans = {}
				2216	for t in trans:
				2217	dtrans[t] = 1
				2218
				2219	# Loop over all transitions and compute lookbacks and includes
				2220	for state,N in trans:
				2221	lookb = []
				2222	includes = []
				2223	for p in C[state]:
				2224	if p.name != N: continue
				2225
				2226	# Okay, we have a name match. We now follow the production all the way
				2227	# through the state machine until we get the . on the right hand side
				2228
				2229	lr_index = p.lr_index
				2230	j = state
				2231	while lr_index < p.len - 1:
				2232	lr_index = lr_index + 1
				2233	t = p.prod[lr_index]
				2234
				2235	# Check to see if this symbol and state are a non-terminal transition
				2236	if (j,t) in dtrans:
				2237	# Yes. Okay, there is some chance that this is an includes relation
				2238	# the only way to know for certain is whether the rest of the
				2239	# production derives empty
				2240
				2241	li = lr_index + 1
				2242	while li < p.len:
				2243	if p.prod[li] in self.grammar.Terminals: break # No forget it
				2244	if not p.prod[li] in nullable: break
				2245	li = li + 1
				2246	else:
				2247	# Appears to be a relation between (j,t) and (state,N)
				2248	includes.append((j,t))
				2249
				2250	g = self.lr0_goto(C[j],t) # Go to next set
				2251	j = self.lr0_cidhash.get(id(g),-1) # Go to next state
				2252
				2253	# When we get here, j is the final state, now we have to locate the production
				2254	for r in C[j]:
				2255	if r.name != p.name: continue
				2256	if r.len != p.len: continue
				2257	i = 0
				2258	# This look is comparing a production ". A B C" with "A B C ."
				2259	while i < r.lr_index:
				2260	if r.prod[i] != p.prod[i+1]: break
				2261	i = i + 1
				2262	else:
				2263	lookb.append((j,r))
				2264	for i in includes:
				2265	if not i in includedict: includedict[i] = []
				2266	includedict[i].append((state,N))
				2267	lookdict[(state,N)] = lookb
				2268
				2269	return lookdict,includedict
				2270
				2271	# -----------------------------------------------------------------------------
				2272	# compute_read_sets()
				2273	#
				2274	# Given a set of LR(0) items, this function computes the read sets.
				2275	#
				2276	# Inputs: C = Set of LR(0) items
				2277	# ntrans = Set of nonterminal transitions
				2278	# nullable = Set of empty transitions
				2279	#
				2280	# Returns a set containing the read sets
				2281	# -----------------------------------------------------------------------------
				2282
				2283	def compute_read_sets(self,C, ntrans, nullable):
				2284	FP = lambda x: self.dr_relation(C,x,nullable)
				2285	R = lambda x: self.reads_relation(C,x,nullable)
				2286	F = digraph(ntrans,R,FP)
				2287	return F
				2288
				2289	# -----------------------------------------------------------------------------
				2290	# compute_follow_sets()
				2291	#
				2292	# Given a set of LR(0) items, a set of non-terminal transitions, a readset,
				2293	# and an include set, this function computes the follow sets
				2294	#
				2295	# Follow(p,A) = Read(p,A) U U {Follow(p',B) \| (p,A) INCLUDES (p',B)}
				2296	#
				2297	# Inputs:
				2298	# ntrans = Set of nonterminal transitions
				2299	# readsets = Readset (previously computed)
				2300	# inclsets = Include sets (previously computed)
				2301	#
				2302	# Returns a set containing the follow sets
				2303	# -----------------------------------------------------------------------------
				2304
				2305	def compute_follow_sets(self,ntrans,readsets,inclsets):
				2306	FP = lambda x: readsets[x]
				2307	R = lambda x: inclsets.get(x,[])
				2308	F = digraph(ntrans,R,FP)
				2309	return F
				2310
				2311	# -----------------------------------------------------------------------------
				2312	# add_lookaheads()
				2313	#
				2314	# Attaches the lookahead symbols to grammar rules.
				2315	#
				2316	# Inputs: lookbacks - Set of lookback relations
				2317	# followset - Computed follow set
				2318	#
				2319	# This function directly attaches the lookaheads to productions contained
				2320	# in the lookbacks set
				2321	# -----------------------------------------------------------------------------
				2322
				2323	def add_lookaheads(self,lookbacks,followset):
				2324	for trans,lb in lookbacks.items():
				2325	# Loop over productions in lookback
				2326	for state,p in lb:
				2327	if not state in p.lookaheads:
				2328	p.lookaheads[state] = []
				2329	f = followset.get(trans,[])
				2330	for a in f:
				2331	if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
				2332
				2333	# -----------------------------------------------------------------------------
				2334	# add_lalr_lookaheads()
				2335	#
				2336	# This function does all of the work of adding lookahead information for use
				2337	# with LALR parsing
				2338	# -----------------------------------------------------------------------------
				2339
				2340	def add_lalr_lookaheads(self,C):
				2341	# Determine all of the nullable nonterminals
				2342	nullable = self.compute_nullable_nonterminals()
				2343
				2344	# Find all non-terminal transitions
				2345	trans = self.find_nonterminal_transitions(C)
				2346
				2347	# Compute read sets
				2348	readsets = self.compute_read_sets(C,trans,nullable)
				2349
				2350	# Compute lookback/includes relations
				2351	lookd, included = self.compute_lookback_includes(C,trans,nullable)
				2352
				2353	# Compute LALR FOLLOW sets
				2354	followsets = self.compute_follow_sets(trans,readsets,included)
				2355
				2356	# Add all of the lookaheads
				2357	self.add_lookaheads(lookd,followsets)
				2358
				2359	# -----------------------------------------------------------------------------
				2360	# lr_parse_table()
				2361	#
				2362	# This function constructs the parse tables for SLR or LALR
				2363	# -----------------------------------------------------------------------------
				2364	def lr_parse_table(self):
				2365	Productions = self.grammar.Productions
				2366	Precedence = self.grammar.Precedence
				2367	goto = self.lr_goto # Goto array
				2368	action = self.lr_action # Action array
				2369	log = self.log # Logger for output
				2370
				2371	actionp = { } # Action production array (temporary)
				2372
				2373	log.info("Parsing method: %s", self.lr_method)
				2374
				2375	# Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
				2376	# This determines the number of states
				2377
				2378	C = self.lr0_items()
				2379
				2380	if self.lr_method == 'LALR':
				2381	self.add_lalr_lookaheads(C)
				2382
				2383	# Build the parser table, state by state
				2384	st = 0
				2385	for I in C:
				2386	# Loop over each production in I
				2387	actlist = [ ] # List of actions
				2388	st_action = { }
				2389	st_actionp = { }
				2390	st_goto = { }
				2391	log.info("")
				2392	log.info("state %d", st)
				2393	log.info("")
				2394	for p in I:
				2395	log.info(" (%d) %s", p.number, str(p))
				2396	log.info("")
				2397
				2398	for p in I:
				2399	if p.len == p.lr_index + 1:
				2400	if p.name == "S'":
				2401	# Start symbol. Accept!
				2402	st_action["$end"] = 0
				2403	st_actionp["$end"] = p
				2404	else:
				2405	# We are at the end of a production. Reduce!
				2406	if self.lr_method == 'LALR':
				2407	laheads = p.lookaheads[st]
				2408	else:
				2409	laheads = self.grammar.Follow[p.name]
				2410	for a in laheads:
				2411	actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
				2412	r = st_action.get(a,None)
				2413	if r is not None:
				2414	# Whoa. Have a shift/reduce or reduce/reduce conflict
				2415	if r > 0:
				2416	# Need to decide on shift or reduce here
				2417	# By default we favor shifting. Need to add
				2418	# some precedence rules here.
				2419	sprec,slevel = Productions[st_actionp[a].number].prec
				2420	rprec,rlevel = Precedence.get(a,('right',0))
				2421	if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
				2422	# We really need to reduce here.
				2423	st_action[a] = -p.number
				2424	st_actionp[a] = p
				2425	if not slevel and not rlevel:
				2426	log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
				2427	self.sr_conflicts.append((st,a,'reduce'))
				2428	Productions[p.number].reduced += 1
				2429	elif (slevel == rlevel) and (rprec == 'nonassoc'):
				2430	st_action[a] = None
				2431	else:
				2432	# Hmmm. Guess we'll keep the shift
				2433	if not rlevel:
				2434	log.info(" ! shift/reduce conflict for %s resolved as shift",a)
				2435	self.sr_conflicts.append((st,a,'shift'))
				2436	elif r < 0:
				2437	# Reduce/reduce conflict. In this case, we favor the rule
				2438	# that was defined first in the grammar file
				2439	oldp = Productions[-r]
				2440	pp = Productions[p.number]
				2441	if oldp.line > pp.line:
				2442	st_action[a] = -p.number
				2443	st_actionp[a] = p
				2444	chosenp,rejectp = pp,oldp
				2445	Productions[p.number].reduced += 1
				2446	Productions[oldp.number].reduced -= 1
				2447	else:
				2448	chosenp,rejectp = oldp,pp
				2449	self.rr_conflicts.append((st,chosenp,rejectp))
				2450	log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
				2451	else:
				2452	raise LALRError("Unknown conflict in state %d" % st)
				2453	else:
				2454	st_action[a] = -p.number
				2455	st_actionp[a] = p
				2456	Productions[p.number].reduced += 1
				2457	else:
				2458	i = p.lr_index
				2459	a = p.prod[i+1] # Get symbol right after the "."
				2460	if a in self.grammar.Terminals:
				2461	g = self.lr0_goto(I,a)
				2462	j = self.lr0_cidhash.get(id(g),-1)
				2463	if j >= 0:
				2464	# We are in a shift state
				2465	actlist.append((a,p,"shift and go to state %d" % j))
				2466	r = st_action.get(a,None)
				2467	if r is not None:
				2468	# Whoa have a shift/reduce or shift/shift conflict
				2469	if r > 0:
				2470	if r != j:
				2471	raise LALRError("Shift/shift conflict in state %d" % st)
				2472	elif r < 0:
				2473	# Do a precedence check.
				2474	# - if precedence of reduce rule is higher, we reduce.
				2475	# - if precedence of reduce is same and left assoc, we reduce.
				2476	# - otherwise we shift
				2477	rprec,rlevel = Productions[st_actionp[a].number].prec
				2478	sprec,slevel = Precedence.get(a,('right',0))
				2479	if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
				2480	# We decide to shift here... highest precedence to shift
				2481	Productions[st_actionp[a].number].reduced -= 1
				2482	st_action[a] = j
				2483	st_actionp[a] = p
				2484	if not rlevel:
				2485	log.info(" ! shift/reduce conflict for %s resolved as shift",a)
				2486	self.sr_conflicts.append((st,a,'shift'))
				2487	elif (slevel == rlevel) and (rprec == 'nonassoc'):
				2488	st_action[a] = None
				2489	else:
				2490	# Hmmm. Guess we'll keep the reduce
				2491	if not slevel and not rlevel:
				2492	log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
				2493	self.sr_conflicts.append((st,a,'reduce'))
				2494
				2495	else:
				2496	raise LALRError("Unknown conflict in state %d" % st)
				2497	else:
				2498	st_action[a] = j
				2499	st_actionp[a] = p
				2500
				2501	# Print the actions associated with each terminal
				2502	_actprint = { }
				2503	for a,p,m in actlist:
				2504	if a in st_action:
				2505	if p is st_actionp[a]:
				2506	log.info(" %-15s %s",a,m)
				2507	_actprint[(a,m)] = 1
				2508	log.info("")
				2509	# Print the actions that were not used. (debugging)
				2510	not_used = 0
				2511	for a,p,m in actlist:
				2512	if a in st_action:
				2513	if p is not st_actionp[a]:
				2514	if not (a,m) in _actprint:
				2515	log.debug(" ! %-15s [ %s ]",a,m)
				2516	not_used = 1
				2517	_actprint[(a,m)] = 1
				2518	if not_used:
				2519	log.debug("")
				2520
				2521	# Construct the goto table for this state
				2522
				2523	nkeys = { }
				2524	for ii in I:
				2525	for s in ii.usyms:
				2526	if s in self.grammar.Nonterminals:
				2527	nkeys[s] = None
				2528	for n in nkeys:
				2529	g = self.lr0_goto(I,n)
				2530	j = self.lr0_cidhash.get(id(g),-1)
				2531	if j >= 0:
				2532	st_goto[n] = j
				2533	log.info(" %-30s shift and go to state %d",n,j)
				2534
				2535	action[st] = st_action
				2536	actionp[st] = st_actionp
				2537	goto[st] = st_goto
				2538	st += 1
				2539
				2540
				2541	# -----------------------------------------------------------------------------
				2542	# write()
				2543	#
				2544	# This function writes the LR parsing tables to a file
				2545	# -----------------------------------------------------------------------------
				2546
				2547	def write_table(self,modulename,outputdir='',signature=""):
				2548	basemodulename = modulename.split(".")[-1]
				2549	filename = os.path.join(outputdir,basemodulename) + ".py"
				2550	try:
				2551	f = open(filename,"w")
				2552
				2553	f.write("""
				2554	# %s
				2555	# This file is automatically generated. Do not edit.
				2556	_tabversion = %r
				2557
				2558	_lr_method = %r
				2559
				2560	_lr_signature = %r
				2561	""" % (filename, __tabversion__, self.lr_method, signature))
				2562
				2563	# Change smaller to 0 to go back to original tables
				2564	smaller = 1
				2565
				2566	# Factor out names to try and make smaller
				2567	if smaller:
				2568	items = { }
				2569
				2570	for s,nd in self.lr_action.items():
				2571	for name,v in nd.items():
				2572	i = items.get(name)
				2573	if not i:
				2574	i = ([],[])
				2575	items[name] = i
				2576	i[0].append(s)
				2577	i[1].append(v)
				2578
				2579	f.write("\n_lr_action_items = {")
				2580	for k,v in items.items():
				2581	f.write("%r:([" % k)
				2582	for i in v[0]:
				2583	f.write("%r," % i)
				2584	f.write("],[")
				2585	for i in v[1]:
				2586	f.write("%r," % i)
				2587
				2588	f.write("]),")
				2589	f.write("}\n")
				2590
				2591	f.write("""
				2592	_lr_action = { }
				2593	for _k, _v in _lr_action_items.items():
				2594	for _x,_y in zip(_v[0],_v[1]):
				2595	if not _x in _lr_action: _lr_action[_x] = { }
				2596	_lr_action[_x][_k] = _y
				2597	del _lr_action_items
				2598	""")
				2599
				2600	else:
				2601	f.write("\n_lr_action = { ");
				2602	for k,v in self.lr_action.items():
				2603	f.write("(%r,%r):%r," % (k[0],k[1],v))
				2604	f.write("}\n");
				2605
				2606	if smaller:
				2607	# Factor out names to try and make smaller
				2608	items = { }
				2609
				2610	for s,nd in self.lr_goto.items():
				2611	for name,v in nd.items():
				2612	i = items.get(name)
				2613	if not i:
				2614	i = ([],[])
				2615	items[name] = i
				2616	i[0].append(s)
				2617	i[1].append(v)
				2618
				2619	f.write("\n_lr_goto_items = {")
				2620	for k,v in items.items():
				2621	f.write("%r:([" % k)
				2622	for i in v[0]:
				2623	f.write("%r," % i)
				2624	f.write("],[")
				2625	for i in v[1]:
				2626	f.write("%r," % i)
				2627
				2628	f.write("]),")
				2629	f.write("}\n")
				2630
				2631	f.write("""
				2632	_lr_goto = { }
				2633	for _k, _v in _lr_goto_items.items():
				2634	for _x,_y in zip(_v[0],_v[1]):
				2635	if not _x in _lr_goto: _lr_goto[_x] = { }
				2636	_lr_goto[_x][_k] = _y
				2637	del _lr_goto_items
				2638	""")
				2639	else:
				2640	f.write("\n_lr_goto = { ");
				2641	for k,v in self.lr_goto.items():
				2642	f.write("(%r,%r):%r," % (k[0],k[1],v))
				2643	f.write("}\n");
				2644
				2645	# Write production table
				2646	f.write("_lr_productions = [\n")
				2647	for p in self.lr_productions:
				2648	if p.func:
				2649	f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
				2650	else:
				2651	f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
				2652	f.write("]\n")
				2653	f.close()
				2654
				2655	except IOError:
				2656	e = sys.exc_info()[1]
				2657	sys.stderr.write("Unable to create '%s'\n" % filename)
				2658	sys.stderr.write(str(e)+"\n")
				2659	return
				2660
				2661
				2662	# -----------------------------------------------------------------------------
				2663	# pickle_table()
				2664	#
				2665	# This function pickles the LR parsing tables to a supplied file object
				2666	# -----------------------------------------------------------------------------
				2667
				2668	def pickle_table(self,filename,signature=""):
				2669	try:
				2670	import cPickle as pickle
				2671	except ImportError:
				2672	import pickle
				2673	outf = open(filename,"wb")
				2674	pickle.dump(__tabversion__,outf,pickle_protocol)
				2675	pickle.dump(self.lr_method,outf,pickle_protocol)
				2676	pickle.dump(signature,outf,pickle_protocol)
				2677	pickle.dump(self.lr_action,outf,pickle_protocol)
				2678	pickle.dump(self.lr_goto,outf,pickle_protocol)
				2679
				2680	outp = []
				2681	for p in self.lr_productions:
				2682	if p.func:
				2683	outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
				2684	else:
				2685	outp.append((str(p),p.name,p.len,None,None,None))
				2686	pickle.dump(outp,outf,pickle_protocol)
				2687	outf.close()
				2688
				2689	# -----------------------------------------------------------------------------
				2690	# === INTROSPECTION ===
				2691	#
				2692	# The following functions and classes are used to implement the PLY
				2693	# introspection features followed by the yacc() function itself.
				2694	# -----------------------------------------------------------------------------
				2695
				2696	# -----------------------------------------------------------------------------
				2697	# get_caller_module_dict()
				2698	#
				2699	# This function returns a dictionary containing all of the symbols defined within
				2700	# a caller further down the call stack. This is used to get the environment
				2701	# associated with the yacc() call if none was provided.
				2702	# -----------------------------------------------------------------------------
				2703
				2704	def get_caller_module_dict(levels):
				2705	try:
				2706	raise RuntimeError
				2707	except RuntimeError:
				2708	e,b,t = sys.exc_info()
				2709	f = t.tb_frame
				2710	while levels > 0:
				2711	f = f.f_back
				2712	levels -= 1
				2713	ldict = f.f_globals.copy()
				2714	if f.f_globals != f.f_locals:
				2715	ldict.update(f.f_locals)
				2716
				2717	return ldict
				2718
				2719	# -----------------------------------------------------------------------------
				2720	# parse_grammar()
				2721	#
				2722	# This takes a raw grammar rule string and parses it into production data
				2723	# -----------------------------------------------------------------------------
				2724	def parse_grammar(doc,file,line):
				2725	grammar = []
				2726	# Split the doc string into lines
				2727	pstrings = doc.splitlines()
				2728	lastp = None
				2729	dline = line
				2730	for ps in pstrings:
				2731	dline += 1
				2732	p = ps.split()
				2733	if not p: continue
				2734	try:
				2735	if p[0] == '\|':
				2736	# This is a continuation of a previous rule
				2737	if not lastp:
				2738	raise SyntaxError("%s:%d: Misplaced '\|'" % (file,dline))
				2739	prodname = lastp
				2740	syms = p[1:]
				2741	else:
				2742	prodname = p[0]
				2743	lastp = prodname
				2744	syms = p[2:]
				2745	assign = p[1]
				2746	if assign != ':' and assign != '::=':
				2747	raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
				2748
				2749	grammar.append((file,dline,prodname,syms))
				2750	except SyntaxError:
				2751	raise
				2752	except Exception:
				2753	raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
				2754
				2755	return grammar
				2756
				2757	# -----------------------------------------------------------------------------
				2758	# ParserReflect()
				2759	#
				2760	# This class represents information extracted for building a parser including
				2761	# start symbol, error function, tokens, precedence list, action functions,
				2762	# etc.
				2763	# -----------------------------------------------------------------------------
				2764	class ParserReflect(object):
				2765	def __init__(self,pdict,log=None):
				2766	self.pdict = pdict
				2767	self.start = None
				2768	self.error_func = None
				2769	self.tokens = None
				2770	self.files = {}
				2771	self.grammar = []
				2772	self.error = 0
				2773
				2774	if log is None:
				2775	self.log = PlyLogger(sys.stderr)
				2776	else:
				2777	self.log = log
				2778
				2779	# Get all of the basic information
				2780	def get_all(self):
				2781	self.get_start()
				2782	self.get_error_func()
				2783	self.get_tokens()
				2784	self.get_precedence()
				2785	self.get_pfunctions()
				2786
				2787	# Validate all of the information
				2788	def validate_all(self):
				2789	self.validate_start()
				2790	self.validate_error_func()
				2791	self.validate_tokens()
				2792	self.validate_precedence()
				2793	self.validate_pfunctions()
				2794	self.validate_files()
				2795	return self.error
				2796
				2797	# Compute a signature over the grammar
				2798	def signature(self):
				2799	try:
Andrew Geissler	9aee500	2022-03-30 16:27:02 +0000	[diff] [blame]	2800	import hashlib
Patrick Williams	2390b1b	2022-11-03 13:47:49 -0500	[diff] [blame^]	2801	except ImportError:
				2802	raise RuntimeError("Unable to import hashlib")
				2803	try:
Andrew Geissler	9aee500	2022-03-30 16:27:02 +0000	[diff] [blame]	2804	sig = hashlib.new('MD5', usedforsecurity=False)
Patrick Williams	2390b1b	2022-11-03 13:47:49 -0500	[diff] [blame^]	2805	except TypeError:
				2806	# Some configurations don't appear to support two arguments
				2807	sig = hashlib.new('MD5')
				2808	try:
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	2809	if self.start:
				2810	sig.update(self.start.encode('latin-1'))
				2811	if self.prec:
				2812	sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
				2813	if self.tokens:
				2814	sig.update(" ".join(self.tokens).encode('latin-1'))
				2815	for f in self.pfuncs:
				2816	if f[3]:
				2817	sig.update(f[3].encode('latin-1'))
				2818	except (TypeError,ValueError):
				2819	pass
				2820	return sig.digest()
				2821
				2822	# -----------------------------------------------------------------------------
				2823	# validate_file()
				2824	#
				2825	# This method checks to see if there are duplicated p_rulename() functions
				2826	# in the parser module file. Without this function, it is really easy for
				2827	# users to make mistakes by cutting and pasting code fragments (and it's a real
				2828	# bugger to try and figure out why the resulting parser doesn't work). Therefore,
				2829	# we just do a little regular expression pattern matching of def statements
				2830	# to try and detect duplicates.
				2831	# -----------------------------------------------------------------------------
				2832
				2833	def validate_files(self):
				2834	# Match def p_funcname(
				2835	fre = re.compile(r'\sdef\s+(p_[a-zA-Z_0-9])\(')
				2836
				2837	for filename in self.files.keys():
				2838	base,ext = os.path.splitext(filename)
				2839	if ext != '.py': return 1 # No idea. Assume it's okay.
				2840
				2841	try:
				2842	f = open(filename)
				2843	lines = f.readlines()
				2844	f.close()
				2845	except IOError:
				2846	continue
				2847
				2848	counthash = { }
				2849	for linen,l in enumerate(lines):
				2850	linen += 1
				2851	m = fre.match(l)
				2852	if m:
				2853	name = m.group(1)
				2854	prev = counthash.get(name)
				2855	if not prev:
				2856	counthash[name] = linen
				2857	else:
				2858	self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
				2859
				2860	# Get the start symbol
				2861	def get_start(self):
				2862	self.start = self.pdict.get('start')
				2863
				2864	# Validate the start symbol
				2865	def validate_start(self):
				2866	if self.start is not None:
				2867	if not isinstance(self.start,str):
				2868	self.log.error("'start' must be a string")
				2869
				2870	# Look for error handler
				2871	def get_error_func(self):
				2872	self.error_func = self.pdict.get('p_error')
				2873
				2874	# Validate the error function
				2875	def validate_error_func(self):
				2876	if self.error_func:
				2877	if isinstance(self.error_func,types.FunctionType):
				2878	ismethod = 0
				2879	elif isinstance(self.error_func, types.MethodType):
				2880	ismethod = 1
				2881	else:
				2882	self.log.error("'p_error' defined, but is not a function or method")
				2883	self.error = 1
				2884	return
				2885
				2886	eline = func_code(self.error_func).co_firstlineno
				2887	efile = func_code(self.error_func).co_filename
				2888	self.files[efile] = 1
				2889
				2890	if (func_code(self.error_func).co_argcount != 1+ismethod):
				2891	self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
				2892	self.error = 1
				2893
				2894	# Get the tokens map
				2895	def get_tokens(self):
				2896	tokens = self.pdict.get("tokens",None)
				2897	if not tokens:
				2898	self.log.error("No token list is defined")
				2899	self.error = 1
				2900	return
				2901
				2902	if not isinstance(tokens,(list, tuple)):
				2903	self.log.error("tokens must be a list or tuple")
				2904	self.error = 1
				2905	return
				2906
				2907	if not tokens:
				2908	self.log.error("tokens is empty")
				2909	self.error = 1
				2910	return
				2911
				2912	self.tokens = tokens
				2913
				2914	# Validate the tokens
				2915	def validate_tokens(self):
				2916	# Validate the tokens.
				2917	if 'error' in self.tokens:
				2918	self.log.error("Illegal token name 'error'. Is a reserved word")
				2919	self.error = 1
				2920	return
				2921
				2922	terminals = {}
				2923	for n in self.tokens:
				2924	if n in terminals:
				2925	self.log.warning("Token '%s' multiply defined", n)
				2926	terminals[n] = 1
				2927
				2928	# Get the precedence map (if any)
				2929	def get_precedence(self):
				2930	self.prec = self.pdict.get("precedence",None)
				2931
				2932	# Validate and parse the precedence map
				2933	def validate_precedence(self):
				2934	preclist = []
				2935	if self.prec:
				2936	if not isinstance(self.prec,(list,tuple)):
				2937	self.log.error("precedence must be a list or tuple")
				2938	self.error = 1
				2939	return
				2940	for level,p in enumerate(self.prec):
				2941	if not isinstance(p,(list,tuple)):
				2942	self.log.error("Bad precedence table")
				2943	self.error = 1
				2944	return
				2945
				2946	if len(p) < 2:
				2947	self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
				2948	self.error = 1
				2949	return
				2950	assoc = p[0]
				2951	if not isinstance(assoc,str):
				2952	self.log.error("precedence associativity must be a string")
				2953	self.error = 1
				2954	return
				2955	for term in p[1:]:
				2956	if not isinstance(term,str):
				2957	self.log.error("precedence items must be strings")
				2958	self.error = 1
				2959	return
				2960	preclist.append((term,assoc,level+1))
				2961	self.preclist = preclist
				2962
				2963	# Get all p_functions from the grammar
				2964	def get_pfunctions(self):
				2965	p_functions = []
				2966	for name, item in self.pdict.items():
				2967	if name[:2] != 'p_': continue
				2968	if name == 'p_error': continue
				2969	if isinstance(item,(types.FunctionType,types.MethodType)):
				2970	line = func_code(item).co_firstlineno
				2971	file = func_code(item).co_filename
				2972	p_functions.append((line,file,name,item.__doc__))
				2973
				2974	# Sort all of the actions by line number
				2975	p_functions.sort()
				2976	self.pfuncs = p_functions
				2977
				2978
				2979	# Validate all of the p_functions
				2980	def validate_pfunctions(self):
				2981	grammar = []
				2982	# Check for non-empty symbols
				2983	if len(self.pfuncs) == 0:
				2984	self.log.error("no rules of the form p_rulename are defined")
				2985	self.error = 1
				2986	return
				2987
				2988	for line, file, name, doc in self.pfuncs:
				2989	func = self.pdict[name]
				2990	if isinstance(func, types.MethodType):
				2991	reqargs = 2
				2992	else:
				2993	reqargs = 1
				2994	if func_code(func).co_argcount > reqargs:
				2995	self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
				2996	self.error = 1
				2997	elif func_code(func).co_argcount < reqargs:
				2998	self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
				2999	self.error = 1
				3000	elif not func.__doc__:
				3001	self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
				3002	else:
				3003	try:
				3004	parsed_g = parse_grammar(doc,file,line)
				3005	for g in parsed_g:
				3006	grammar.append((name, g))
				3007	except SyntaxError:
				3008	e = sys.exc_info()[1]
				3009	self.log.error(str(e))
				3010	self.error = 1
				3011
				3012	# Looks like a valid grammar rule
				3013	# Mark the file in which defined.
				3014	self.files[file] = 1
				3015
				3016	# Secondary validation step that looks for p_ definitions that are not functions
				3017	# or functions that look like they might be grammar rules.
				3018
				3019	for n,v in self.pdict.items():
				3020	if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
				3021	if n[0:2] == 't_': continue
				3022	if n[0:2] == 'p_' and n != 'p_error':
				3023	self.log.warning("'%s' not defined as a function", n)
				3024	if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
				3025	(isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
				3026	try:
				3027	doc = v.__doc__.split(" ")
				3028	if doc[1] == ':':
				3029	self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
				3030	func_code(v).co_filename, func_code(v).co_firstlineno,n)
				3031	except Exception:
				3032	pass
				3033
				3034	self.grammar = grammar
				3035
				3036	# -----------------------------------------------------------------------------
				3037	# yacc(module)
				3038	#
				3039	# Build a parser
				3040	# -----------------------------------------------------------------------------
				3041
				3042	def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
				3043	check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
				3044	debuglog=None, errorlog = None, picklefile=None):
				3045
				3046	global parse # Reference to the parsing method of the last built parser
				3047
				3048	# If pickling is enabled, table files are not created
				3049
				3050	if picklefile:
				3051	write_tables = 0
				3052
				3053	if errorlog is None:
				3054	errorlog = PlyLogger(sys.stderr)
				3055
				3056	# Get the module dictionary used for the parser
				3057	if module:
				3058	_items = [(k,getattr(module,k)) for k in dir(module)]
				3059	pdict = dict(_items)
				3060	else:
				3061	pdict = get_caller_module_dict(2)
				3062
				3063	# Collect parser information from the dictionary
				3064	pinfo = ParserReflect(pdict,log=errorlog)
				3065	pinfo.get_all()
				3066
				3067	if pinfo.error:
				3068	raise YaccError("Unable to build parser")
				3069
				3070	# Check signature against table files (if any)
				3071	signature = pinfo.signature()
				3072
				3073	# Read the tables
				3074	try:
				3075	lr = LRTable()
				3076	if picklefile:
				3077	read_signature = lr.read_pickle(picklefile)
				3078	else:
				3079	read_signature = lr.read_table(tabmodule)
				3080	if optimize or (read_signature == signature):
				3081	try:
				3082	lr.bind_callables(pinfo.pdict)
				3083	parser = LRParser(lr,pinfo.error_func)
				3084	parse = parser.parse
				3085	return parser
				3086	except Exception:
				3087	e = sys.exc_info()[1]
				3088	errorlog.warning("There was a problem loading the table file: %s", repr(e))
				3089	except VersionError:
				3090	e = sys.exc_info()
				3091	errorlog.warning(str(e))
				3092	except Exception:
				3093	pass
				3094
				3095	if debuglog is None:
				3096	if debug:
				3097	debuglog = PlyLogger(open(debugfile,"w"))
				3098	else:
				3099	debuglog = NullLogger()
				3100
				3101	debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
				3102
				3103
				3104	errors = 0
				3105
				3106	# Validate the parser information
				3107	if pinfo.validate_all():
				3108	raise YaccError("Unable to build parser")
				3109
				3110	if not pinfo.error_func:
				3111	errorlog.warning("no p_error() function is defined")
				3112
				3113	# Create a grammar object
				3114	grammar = Grammar(pinfo.tokens)
				3115
				3116	# Set precedence level for terminals
				3117	for term, assoc, level in pinfo.preclist:
				3118	try:
				3119	grammar.set_precedence(term,assoc,level)
				3120	except GrammarError:
				3121	e = sys.exc_info()[1]
				3122	errorlog.warning("%s",str(e))
				3123
				3124	# Add productions to the grammar
				3125	for funcname, gram in pinfo.grammar:
				3126	file, line, prodname, syms = gram
				3127	try:
				3128	grammar.add_production(prodname,syms,funcname,file,line)
				3129	except GrammarError:
				3130	e = sys.exc_info()[1]
				3131	errorlog.error("%s",str(e))
				3132	errors = 1
				3133
				3134	# Set the grammar start symbols
				3135	try:
				3136	if start is None:
				3137	grammar.set_start(pinfo.start)
				3138	else:
				3139	grammar.set_start(start)
				3140	except GrammarError:
				3141	e = sys.exc_info()[1]
				3142	errorlog.error(str(e))
				3143	errors = 1
				3144
				3145	if errors:
				3146	raise YaccError("Unable to build parser")
				3147
				3148	# Verify the grammar structure
				3149	undefined_symbols = grammar.undefined_symbols()
				3150	for sym, prod in undefined_symbols:
				3151	errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
				3152	errors = 1
				3153
				3154	unused_terminals = grammar.unused_terminals()
				3155	if unused_terminals:
				3156	debuglog.info("")
				3157	debuglog.info("Unused terminals:")
				3158	debuglog.info("")
				3159	for term in unused_terminals:
				3160	errorlog.warning("Token '%s' defined, but not used", term)
				3161	debuglog.info(" %s", term)
				3162
				3163	# Print out all productions to the debug log
				3164	if debug:
				3165	debuglog.info("")
				3166	debuglog.info("Grammar")
				3167	debuglog.info("")
				3168	for n,p in enumerate(grammar.Productions):
				3169	debuglog.info("Rule %-5d %s", n, p)
				3170
				3171	# Find unused non-terminals
				3172	unused_rules = grammar.unused_rules()
				3173	for prod in unused_rules:
				3174	errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
				3175
				3176	if len(unused_terminals) == 1:
				3177	errorlog.warning("There is 1 unused token")
				3178	if len(unused_terminals) > 1:
				3179	errorlog.warning("There are %d unused tokens", len(unused_terminals))
				3180
				3181	if len(unused_rules) == 1:
				3182	errorlog.warning("There is 1 unused rule")
				3183	if len(unused_rules) > 1:
				3184	errorlog.warning("There are %d unused rules", len(unused_rules))
				3185
				3186	if debug:
				3187	debuglog.info("")
				3188	debuglog.info("Terminals, with rules where they appear")
				3189	debuglog.info("")
				3190	terms = list(grammar.Terminals)
				3191	terms.sort()
				3192	for term in terms:
				3193	debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
				3194
				3195	debuglog.info("")
				3196	debuglog.info("Nonterminals, with rules where they appear")
				3197	debuglog.info("")
				3198	nonterms = list(grammar.Nonterminals)
				3199	nonterms.sort()
				3200	for nonterm in nonterms:
				3201	debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
				3202	debuglog.info("")
				3203
				3204	if check_recursion:
				3205	unreachable = grammar.find_unreachable()
				3206	for u in unreachable:
				3207	errorlog.warning("Symbol '%s' is unreachable",u)
				3208
				3209	infinite = grammar.infinite_cycles()
				3210	for inf in infinite:
				3211	errorlog.error("Infinite recursion detected for symbol '%s'", inf)
				3212	errors = 1
				3213
				3214	unused_prec = grammar.unused_precedence()
				3215	for term, assoc in unused_prec:
				3216	errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
				3217	errors = 1
				3218
				3219	if errors:
				3220	raise YaccError("Unable to build parser")
				3221
				3222	# Run the LRGeneratedTable on the grammar
				3223	if debug:
				3224	errorlog.debug("Generating %s tables", method)
				3225
				3226	lr = LRGeneratedTable(grammar,method,debuglog)
				3227
				3228	if debug:
				3229	num_sr = len(lr.sr_conflicts)
				3230
				3231	# Report shift/reduce and reduce/reduce conflicts
				3232	if num_sr == 1:
				3233	errorlog.warning("1 shift/reduce conflict")
				3234	elif num_sr > 1:
				3235	errorlog.warning("%d shift/reduce conflicts", num_sr)
				3236
				3237	num_rr = len(lr.rr_conflicts)
				3238	if num_rr == 1:
				3239	errorlog.warning("1 reduce/reduce conflict")
				3240	elif num_rr > 1:
				3241	errorlog.warning("%d reduce/reduce conflicts", num_rr)
				3242
				3243	# Write out conflicts to the output file
				3244	if debug and (lr.sr_conflicts or lr.rr_conflicts):
				3245	debuglog.warning("")
				3246	debuglog.warning("Conflicts:")
				3247	debuglog.warning("")
				3248
				3249	for state, tok, resolution in lr.sr_conflicts:
				3250	debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
				3251
				3252	already_reported = {}
				3253	for state, rule, rejected in lr.rr_conflicts:
				3254	if (state,id(rule),id(rejected)) in already_reported:
				3255	continue
				3256	debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
				3257	debuglog.warning("rejected rule (%s) in state %d", rejected,state)
				3258	errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
				3259	errorlog.warning("rejected rule (%s) in state %d", rejected, state)
				3260	already_reported[state,id(rule),id(rejected)] = 1
				3261
				3262	warned_never = []
				3263	for state, rule, rejected in lr.rr_conflicts:
				3264	if not rejected.reduced and (rejected not in warned_never):
				3265	debuglog.warning("Rule (%s) is never reduced", rejected)
				3266	errorlog.warning("Rule (%s) is never reduced", rejected)
				3267	warned_never.append(rejected)
				3268
				3269	# Write the table file if requested
				3270	if write_tables:
				3271	lr.write_table(tabmodule,outputdir,signature)
				3272
				3273	# Write a pickled version of the tables
				3274	if picklefile:
				3275	lr.pickle_table(picklefile,signature)
				3276
				3277	# Build the parser
				3278	lr.bind_callables(pinfo.pdict)
				3279	parser = LRParser(lr,pinfo.error_func)
				3280
				3281	parse = parser.parse
				3282	return parser