|
@@ -1,4 +1,4 @@
|
|
-#-----------------------------------------------------------------------------
|
|
|
|
|
|
+# -----------------------------------------------------------------------------
|
|
# ply: lex.py
|
|
# ply: lex.py
|
|
#
|
|
#
|
|
# Author: David M. Beazley (dave@dabeaz.com)
|
|
# Author: David M. Beazley (dave@dabeaz.com)
|
|
@@ -19,83 +19,30 @@
|
|
#
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public
|
|
# You should have received a copy of the GNU Lesser General Public
|
|
# License along with this library; if not, write to the Free Software
|
|
# License along with this library; if not, write to the Free Software
|
|
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
#
|
|
#
|
|
# See the file LICENSE for a complete copy of the LGPL.
|
|
# See the file LICENSE for a complete copy of the LGPL.
|
|
-#-----------------------------------------------------------------------------
|
|
|
|
-from __future__ import print_function
|
|
|
|
|
|
+# -----------------------------------------------------------------------------
|
|
|
|
|
|
__version__ = "2.2"
|
|
__version__ = "2.2"
|
|
|
|
|
|
-
|
|
|
|
-try:
|
|
|
|
- from builtins import bytes
|
|
|
|
- PY3 = True
|
|
|
|
-except ImportError:
|
|
|
|
- # python2
|
|
|
|
- bytes = str
|
|
|
|
- PY3 = False
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-import operator
|
|
|
|
-import os.path
|
|
|
|
-import re
|
|
|
|
-import sys
|
|
|
|
-import types
|
|
|
|
-import collections
|
|
|
|
-import functools
|
|
|
|
-
|
|
|
|
-if PY3:
|
|
|
|
- _meth_func = "__func__"
|
|
|
|
- _meth_self = "__self__"
|
|
|
|
-
|
|
|
|
- _func_closure = "__closure__"
|
|
|
|
- _func_code = "__code__"
|
|
|
|
- _func_defaults = "__defaults__"
|
|
|
|
- _func_globals = "__globals__"
|
|
|
|
-else:
|
|
|
|
- _meth_func = "im_func"
|
|
|
|
- _meth_self = "im_self"
|
|
|
|
-
|
|
|
|
- _func_closure = "func_closure"
|
|
|
|
- _func_code = "func_code"
|
|
|
|
- _func_defaults = "func_defaults"
|
|
|
|
- _func_globals = "func_globals"
|
|
|
|
-
|
|
|
|
-# define compatible function to support PY2 & PY3
|
|
|
|
-get_mth_func = operator.attrgetter(_meth_func)
|
|
|
|
-get_mth_self = operator.attrgetter(_meth_self)
|
|
|
|
-get_func_closure = operator.attrgetter(_func_closure)
|
|
|
|
-get_func_code = operator.attrgetter(_func_code)
|
|
|
|
-get_func_defaults = operator.attrgetter(_func_defaults)
|
|
|
|
-get_func_globals = operator.attrgetter(_func_globals)
|
|
|
|
-
|
|
|
|
|
|
+import re, sys, types, os.path
|
|
|
|
|
|
# Regular expression used to match valid token names
|
|
# Regular expression used to match valid token names
|
|
-_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
|
|
|
|
-
|
|
|
|
-# Available instance types. This is used when lexers are defined by a class.
|
|
|
|
-# It's a little funky because I want to preserve backwards compatibility
|
|
|
|
-# with Python 2.0 where types.ObjectType is undefined.
|
|
|
|
-
|
|
|
|
-_INSTANCETYPE = getattr(types, 'InstanceType', object)
|
|
|
|
|
|
+_is_identifier = re.compile(r"^[a-zA-Z0-9_]+$")
|
|
|
|
|
|
|
|
+_INSTANCETYPE = object
|
|
|
|
|
|
# Exception thrown when invalid token encountered and no default error
|
|
# Exception thrown when invalid token encountered and no default error
|
|
# handler is defined.
|
|
# handler is defined.
|
|
-
|
|
|
|
-
|
|
|
|
class LexError(Exception):
|
|
class LexError(Exception):
|
|
-
|
|
|
|
def __init__(self, message, s):
|
|
def __init__(self, message, s):
|
|
self.args = (message,)
|
|
self.args = (message,)
|
|
self.text = s
|
|
self.text = s
|
|
|
|
|
|
-# Token class
|
|
|
|
-
|
|
|
|
|
|
|
|
|
|
+# Token class
|
|
class LexToken(object):
|
|
class LexToken(object):
|
|
-
|
|
|
|
def __str__(self):
|
|
def __str__(self):
|
|
return "LexToken(%s,%r,%d,%d)" % (self.type, self.value, self.lineno, self.lexpos)
|
|
return "LexToken(%s,%r,%d,%d)" % (self.type, self.value, self.lineno, self.lexpos)
|
|
|
|
|
|
@@ -105,6 +52,7 @@ class LexToken(object):
|
|
def skip(self, n):
|
|
def skip(self, n):
|
|
self.lexer.skip(n)
|
|
self.lexer.skip(n)
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# Lexer class
|
|
# Lexer class
|
|
#
|
|
#
|
|
@@ -116,32 +64,31 @@ class LexToken(object):
|
|
|
|
|
|
|
|
|
|
class Lexer:
|
|
class Lexer:
|
|
-
|
|
|
|
def __init__(self):
|
|
def __init__(self):
|
|
- self.lexre = None # Master regular expression. This is a list of
|
|
|
|
|
|
+ self.lexre = None # Master regular expression. This is a list of
|
|
# tuples (re,findex) where re is a compiled
|
|
# tuples (re,findex) where re is a compiled
|
|
# regular expression and findex is a list
|
|
# regular expression and findex is a list
|
|
# mapping regex group numbers to rules
|
|
# mapping regex group numbers to rules
|
|
- self.lexretext = None # Current regular expression strings
|
|
|
|
- self.lexstatere = {} # Dictionary mapping lexer states to master regexs
|
|
|
|
- self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
|
|
|
|
- self.lexstate = "INITIAL" # Current lexer state
|
|
|
|
- self.lexstatestack = [] # Stack of lexer states
|
|
|
|
- self.lexstateinfo = None # State information
|
|
|
|
- self.lexstateignore = {} # Dictionary of ignored characters for each state
|
|
|
|
- self.lexstateerrorf = {} # Dictionary of error functions for each state
|
|
|
|
- self.lexreflags = 0 # Optional re compile flags
|
|
|
|
- self.lexdata = None # Actual input data (as a string)
|
|
|
|
- self.lexpos = 0 # Current position in input text
|
|
|
|
- self.lexlen = 0 # Length of the input text
|
|
|
|
- self.lexerrorf = None # Error rule (if any)
|
|
|
|
- self.lextokens = None # List of valid tokens
|
|
|
|
- self.lexignore = "" # Ignored characters
|
|
|
|
- self.lexliterals = "" # Literal characters that can be passed through
|
|
|
|
- self.lexmodule = None # Module
|
|
|
|
- self.lineno = 1 # Current line number
|
|
|
|
- self.lexdebug = 0 # Debugging mode
|
|
|
|
- self.lexoptimize = 0 # Optimized mode
|
|
|
|
|
|
+ self.lexretext = None # Current regular expression strings
|
|
|
|
+ self.lexstatere = {} # Dictionary mapping lexer states to master regexs
|
|
|
|
+ self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
|
|
|
|
+ self.lexstate = "INITIAL" # Current lexer state
|
|
|
|
+ self.lexstatestack = [] # Stack of lexer states
|
|
|
|
+ self.lexstateinfo = None # State information
|
|
|
|
+ self.lexstateignore = {} # Dictionary of ignored characters for each state
|
|
|
|
+ self.lexstateerrorf = {} # Dictionary of error functions for each state
|
|
|
|
+ self.lexreflags = 0 # Optional re compile flags
|
|
|
|
+ self.lexdata = None # Actual input data (as a string)
|
|
|
|
+ self.lexpos = 0 # Current position in input text
|
|
|
|
+ self.lexlen = 0 # Length of the input text
|
|
|
|
+ self.lexerrorf = None # Error rule (if any)
|
|
|
|
+ self.lextokens = None # List of valid tokens
|
|
|
|
+ self.lexignore = "" # Ignored characters
|
|
|
|
+ self.lexliterals = "" # Literal characters that can be passed through
|
|
|
|
+ self.lexmodule = None # Module
|
|
|
|
+ self.lineno = 1 # Current line number
|
|
|
|
+ self.lexdebug = 0 # Debugging mode
|
|
|
|
+ self.lexoptimize = 0 # Optimized mode
|
|
|
|
|
|
def clone(self, object=None):
|
|
def clone(self, object=None):
|
|
c = Lexer()
|
|
c = Lexer()
|
|
@@ -194,11 +141,12 @@ class Lexer:
|
|
# writetab() - Write lexer information to a table file
|
|
# writetab() - Write lexer information to a table file
|
|
# ------------------------------------------------------------
|
|
# ------------------------------------------------------------
|
|
# <tm> 25 June 2008 added 'outputdir'
|
|
# <tm> 25 June 2008 added 'outputdir'
|
|
- def writetab(self, tabfile, outputdir=''):
|
|
|
|
|
|
+ def writetab(self, tabfile, outputdir=""):
|
|
tf = open(os.path.join(outputdir, tabfile) + ".py", "w")
|
|
tf = open(os.path.join(outputdir, tabfile) + ".py", "w")
|
|
tf.write(
|
|
tf.write(
|
|
- "# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" %
|
|
|
|
- (tabfile, __version__))
|
|
|
|
|
|
+ "# %s.py. This file automatically created by PLY (version %s). Don't edit!\n"
|
|
|
|
+ % (tabfile, __version__)
|
|
|
|
+ )
|
|
tf.write("_lextokens = %s\n" % repr(self.lextokens))
|
|
tf.write("_lextokens = %s\n" % repr(self.lextokens))
|
|
tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
|
|
tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
|
|
tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
|
|
tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
|
|
@@ -240,16 +188,15 @@ class Lexer:
|
|
txtitem = []
|
|
txtitem = []
|
|
for i in range(len(lre)):
|
|
for i in range(len(lre)):
|
|
titem.append(
|
|
titem.append(
|
|
- (re.compile(
|
|
|
|
- lre[i][0], lextab._lexreflags), _names_to_funcs(
|
|
|
|
- lre[i][1], fdict)))
|
|
|
|
|
|
+ (re.compile(lre[i][0], lextab._lexreflags), _names_to_funcs(lre[i][1], fdict))
|
|
|
|
+ )
|
|
txtitem.append(lre[i][0])
|
|
txtitem.append(lre[i][0])
|
|
self.lexstatere[key] = titem
|
|
self.lexstatere[key] = titem
|
|
self.lexstateretext[key] = txtitem
|
|
self.lexstateretext[key] = txtitem
|
|
self.lexstateerrorf = {}
|
|
self.lexstateerrorf = {}
|
|
for key, ef in lextab._lexstateerrorf.items():
|
|
for key, ef in lextab._lexstateerrorf.items():
|
|
self.lexstateerrorf[key] = fdict[ef]
|
|
self.lexstateerrorf[key] = fdict[ef]
|
|
- self.begin('INITIAL')
|
|
|
|
|
|
+ self.begin("INITIAL")
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# ------------------------------------------------------------
|
|
# input() - Push a new string into the lexer
|
|
# input() - Push a new string into the lexer
|
|
@@ -313,8 +260,7 @@ class Lexer:
|
|
lexdata = self.lexdata
|
|
lexdata = self.lexdata
|
|
|
|
|
|
while lexpos < lexlen:
|
|
while lexpos < lexlen:
|
|
- # This code provides some short-circuit code for whitespace, tabs, and
|
|
|
|
- # other ignored characters
|
|
|
|
|
|
+ # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
|
|
if lexdata[lexpos] in lexignore:
|
|
if lexdata[lexpos] in lexignore:
|
|
lexpos += 1
|
|
lexpos += 1
|
|
continue
|
|
continue
|
|
@@ -348,7 +294,7 @@ class Lexer:
|
|
break
|
|
break
|
|
|
|
|
|
# if func not callable, it means it's an ignored token
|
|
# if func not callable, it means it's an ignored token
|
|
- if not isinstance(func, collections.abc.Callable):
|
|
|
|
|
|
+ if not hasattr(func, "__call__"):
|
|
break
|
|
break
|
|
|
|
|
|
# If token is processed by a function, call it
|
|
# If token is processed by a function, call it
|
|
@@ -356,7 +302,7 @@ class Lexer:
|
|
|
|
|
|
# Every function must return a token, if nothing, we just move to next token
|
|
# Every function must return a token, if nothing, we just move to next token
|
|
if not newtok:
|
|
if not newtok:
|
|
- lexpos = self.lexpos # This is here in case user has updated lexpos.
|
|
|
|
|
|
+ lexpos = self.lexpos # This is here in case user has updated lexpos.
|
|
|
|
|
|
# Added for pyglet/tools/wrapper/cparser.py by Alex
|
|
# Added for pyglet/tools/wrapper/cparser.py by Alex
|
|
# Holkner on 20/Jan/2007
|
|
# Holkner on 20/Jan/2007
|
|
@@ -369,9 +315,16 @@ class Lexer:
|
|
# pyglet/tools/wrapper/cparser.py by Alex Holkner on
|
|
# pyglet/tools/wrapper/cparser.py by Alex Holkner on
|
|
# 20/Jan/2007
|
|
# 20/Jan/2007
|
|
if newtok.type not in self.lextokens and len(newtok.type) > 1:
|
|
if newtok.type not in self.lextokens and len(newtok.type) > 1:
|
|
- raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
|
|
|
|
- get_func_code(func).co_filename, get_func_code(func).co_firstlineno,
|
|
|
|
- func.__name__, newtok.type), lexdata[lexpos:])
|
|
|
|
|
|
+ raise LexError(
|
|
|
|
+ "%s:%d: Rule '%s' returned an unknown token type '%s'"
|
|
|
|
+ % (
|
|
|
|
+ func.__code__.co_filename,
|
|
|
|
+ func.__code__.co_firstlineno,
|
|
|
|
+ func.__name__,
|
|
|
|
+ newtok.type,
|
|
|
|
+ ),
|
|
|
|
+ lexdata[lexpos:],
|
|
|
|
+ )
|
|
|
|
|
|
return newtok
|
|
return newtok
|
|
else:
|
|
else:
|
|
@@ -399,9 +352,9 @@ class Lexer:
|
|
if lexpos == self.lexpos:
|
|
if lexpos == self.lexpos:
|
|
# Error method didn't change text position at all. This is an error.
|
|
# Error method didn't change text position at all. This is an error.
|
|
raise LexError(
|
|
raise LexError(
|
|
- "Scanning error. Illegal character '%s'" %
|
|
|
|
- (lexdata[lexpos]), lexdata[
|
|
|
|
- lexpos:])
|
|
|
|
|
|
+ "Scanning error. Illegal character '%s'" % (lexdata[lexpos]),
|
|
|
|
+ lexdata[lexpos:],
|
|
|
|
+ )
|
|
lexpos = self.lexpos
|
|
lexpos = self.lexpos
|
|
if not newtok:
|
|
if not newtok:
|
|
continue
|
|
continue
|
|
@@ -409,15 +362,16 @@ class Lexer:
|
|
|
|
|
|
self.lexpos = lexpos
|
|
self.lexpos = lexpos
|
|
raise LexError(
|
|
raise LexError(
|
|
- "Illegal character '%s' at index %d" %
|
|
|
|
- (lexdata[lexpos], lexpos), lexdata[
|
|
|
|
- lexpos:])
|
|
|
|
|
|
+ "Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos),
|
|
|
|
+ lexdata[lexpos:],
|
|
|
|
+ )
|
|
|
|
|
|
self.lexpos = lexpos + 1
|
|
self.lexpos = lexpos + 1
|
|
if self.lexdata is None:
|
|
if self.lexdata is None:
|
|
raise RuntimeError("No input string given with input()")
|
|
raise RuntimeError("No input string given with input()")
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# _validate_file()
|
|
# _validate_file()
|
|
#
|
|
#
|
|
@@ -429,19 +383,20 @@ class Lexer:
|
|
|
|
|
|
def _validate_file(filename):
|
|
def _validate_file(filename):
|
|
import os.path
|
|
import os.path
|
|
|
|
+
|
|
base, ext = os.path.splitext(filename)
|
|
base, ext = os.path.splitext(filename)
|
|
- if ext != '.py':
|
|
|
|
- return 1 # No idea what the file is. Return OK
|
|
|
|
|
|
+ if ext != ".py":
|
|
|
|
+ return 1 # No idea what the file is. Return OK
|
|
|
|
|
|
try:
|
|
try:
|
|
f = open(filename)
|
|
f = open(filename)
|
|
lines = f.readlines()
|
|
lines = f.readlines()
|
|
f.close()
|
|
f.close()
|
|
except IOError:
|
|
except IOError:
|
|
- return 1 # Oh well
|
|
|
|
|
|
+ return 1 # Oh well
|
|
|
|
|
|
- fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
|
|
|
|
- sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
|
|
|
|
|
|
+ fre = re.compile(r"\s*def\s+(t_[a-zA-Z_0-9]*)\(")
|
|
|
|
+ sre = re.compile(r"\s*(t_[a-zA-Z_0-9]*)\s*=")
|
|
counthash = {}
|
|
counthash = {}
|
|
linen = 1
|
|
linen = 1
|
|
noerror = 1
|
|
noerror = 1
|
|
@@ -455,11 +410,15 @@ def _validate_file(filename):
|
|
if not prev:
|
|
if not prev:
|
|
counthash[name] = linen
|
|
counthash[name] = linen
|
|
else:
|
|
else:
|
|
- print("%s:%d: Rule %s redefined. Previously defined on line %d" % (filename, linen, name, prev))
|
|
|
|
|
|
+ print(
|
|
|
|
+ "%s:%d: Rule %s redefined. Previously defined on line %d"
|
|
|
|
+ % (filename, linen, name, prev)
|
|
|
|
+ )
|
|
noerror = 0
|
|
noerror = 0
|
|
linen += 1
|
|
linen += 1
|
|
return noerror
|
|
return noerror
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# _funcs_to_names()
|
|
# _funcs_to_names()
|
|
#
|
|
#
|
|
@@ -477,6 +436,7 @@ def _funcs_to_names(funclist):
|
|
result.append(f)
|
|
result.append(f)
|
|
return result
|
|
return result
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# _names_to_funcs()
|
|
# _names_to_funcs()
|
|
#
|
|
#
|
|
@@ -494,6 +454,7 @@ def _names_to_funcs(namelist, fdict):
|
|
result.append(n)
|
|
result.append(n)
|
|
return result
|
|
return result
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# _form_master_re()
|
|
# _form_master_re()
|
|
#
|
|
#
|
|
@@ -534,6 +495,7 @@ def _form_master_re(relist, reflags, ldict):
|
|
rlist, rre = _form_master_re(relist[m:], reflags, ldict)
|
|
rlist, rre = _form_master_re(relist[m:], reflags, ldict)
|
|
return llist + rlist, lre + rre
|
|
return llist + rlist, lre + rre
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# def _statetoken(s,names)
|
|
# def _statetoken(s,names)
|
|
#
|
|
#
|
|
@@ -548,19 +510,20 @@ def _statetoken(s, names):
|
|
nonstate = 1
|
|
nonstate = 1
|
|
parts = s.split("_")
|
|
parts = s.split("_")
|
|
for i in range(1, len(parts)):
|
|
for i in range(1, len(parts)):
|
|
- if parts[i] not in names and parts[i] != 'ANY':
|
|
|
|
|
|
+ if parts[i] not in names and parts[i] != "ANY":
|
|
break
|
|
break
|
|
if i > 1:
|
|
if i > 1:
|
|
states = tuple(parts[1:i])
|
|
states = tuple(parts[1:i])
|
|
else:
|
|
else:
|
|
- states = ('INITIAL',)
|
|
|
|
|
|
+ states = ("INITIAL",)
|
|
|
|
|
|
- if 'ANY' in states:
|
|
|
|
|
|
+ if "ANY" in states:
|
|
states = tuple(names.keys())
|
|
states = tuple(names.keys())
|
|
|
|
|
|
tokenname = "_".join(parts[i:])
|
|
tokenname = "_".join(parts[i:])
|
|
return (states, tokenname)
|
|
return (states, tokenname)
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# lex(module)
|
|
# lex(module)
|
|
#
|
|
#
|
|
@@ -568,13 +531,20 @@ def _statetoken(s, names):
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# cls added for pyglet/tools/wrapper/cparser.py by Alex Holkner on 22/Jan/2007
|
|
# cls added for pyglet/tools/wrapper/cparser.py by Alex Holkner on 22/Jan/2007
|
|
# <tm> 25 June 2008 added 'outputdir'
|
|
# <tm> 25 June 2008 added 'outputdir'
|
|
-
|
|
|
|
-
|
|
|
|
-def lex(module=None, object=None, debug=0, optimize=0,
|
|
|
|
- lextab="lextab", reflags=0, nowarn=0, outputdir='', cls=Lexer):
|
|
|
|
|
|
+def lex(
|
|
|
|
+ module=None,
|
|
|
|
+ object=None,
|
|
|
|
+ debug=0,
|
|
|
|
+ optimize=0,
|
|
|
|
+ lextab="lextab",
|
|
|
|
+ reflags=0,
|
|
|
|
+ nowarn=0,
|
|
|
|
+ outputdir="",
|
|
|
|
+ cls=Lexer,
|
|
|
|
+):
|
|
global lexer
|
|
global lexer
|
|
ldict = None
|
|
ldict = None
|
|
- stateinfo = {'INITIAL': 'inclusive'}
|
|
|
|
|
|
+ stateinfo = {"INITIAL": "inclusive"}
|
|
error = 0
|
|
error = 0
|
|
files = {}
|
|
files = {}
|
|
lexobj = cls()
|
|
lexobj = cls()
|
|
@@ -610,8 +580,8 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
except RuntimeError:
|
|
except RuntimeError:
|
|
e, b, t = sys.exc_info()
|
|
e, b, t = sys.exc_info()
|
|
f = t.tb_frame
|
|
f = t.tb_frame
|
|
- f = f.f_back # Walk out to our calling function
|
|
|
|
- ldict = f.f_globals # Grab its globals dictionary
|
|
|
|
|
|
+ f = f.f_back # Walk out to our calling function
|
|
|
|
+ ldict = f.f_globals # Grab its globals dictionary
|
|
|
|
|
|
if optimize and lextab:
|
|
if optimize and lextab:
|
|
try:
|
|
try:
|
|
@@ -625,7 +595,7 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
pass
|
|
pass
|
|
|
|
|
|
# Get the tokens, states, and literals variables (if any)
|
|
# Get the tokens, states, and literals variables (if any)
|
|
- if (module and isinstance(module, _INSTANCETYPE)):
|
|
|
|
|
|
+ if module and isinstance(module, _INSTANCETYPE):
|
|
tokens = getattr(module, "tokens", None)
|
|
tokens = getattr(module, "tokens", None)
|
|
states = getattr(module, "states", None)
|
|
states = getattr(module, "states", None)
|
|
literals = getattr(module, "literals", "")
|
|
literals = getattr(module, "literals", "")
|
|
@@ -658,8 +628,7 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
|
|
|
|
try:
|
|
try:
|
|
for c in literals:
|
|
for c in literals:
|
|
- if not (isinstance(c, bytes) or isinstance(
|
|
|
|
- c, str)) or len(c) > 1:
|
|
|
|
|
|
+ if not (isinstance(c, bytes) or isinstance(c, str)) or len(c) > 1:
|
|
print("lex: Invalid literal %s. Must be a single character" % repr(c))
|
|
print("lex: Invalid literal %s. Must be a single character" % repr(c))
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
@@ -678,7 +647,10 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
else:
|
|
else:
|
|
for s in states:
|
|
for s in states:
|
|
if not isinstance(s, tuple) or len(s) != 2:
|
|
if not isinstance(s, tuple) or len(s) != 2:
|
|
- print("lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s))
|
|
|
|
|
|
+ print(
|
|
|
|
+ "lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')"
|
|
|
|
+ % repr(s)
|
|
|
|
+ )
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
name, statetype = s
|
|
name, statetype = s
|
|
@@ -686,7 +658,7 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
print("lex: state name %s must be a string" % repr(name))
|
|
print("lex: state name %s must be a string" % repr(name))
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
- if not (statetype == 'inclusive' or statetype == 'exclusive'):
|
|
|
|
|
|
+ if not (statetype == "inclusive" or statetype == "exclusive"):
|
|
print("lex: state type for state %s must be 'inclusive' or 'exclusive'" % name)
|
|
print("lex: state type for state %s must be 'inclusive' or 'exclusive'" % name)
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
@@ -697,20 +669,20 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
stateinfo[name] = statetype
|
|
stateinfo[name] = statetype
|
|
|
|
|
|
# Get a list of symbols with the t_ or s_ prefix
|
|
# Get a list of symbols with the t_ or s_ prefix
|
|
- tsymbols = [f for f in ldict.keys() if f[:2] == 't_']
|
|
|
|
|
|
+ tsymbols = [f for f in ldict.keys() if f[:2] == "t_"]
|
|
|
|
|
|
# Now build up a list of functions and a list of strings
|
|
# Now build up a list of functions and a list of strings
|
|
|
|
|
|
- funcsym = {} # Symbols defined as functions
|
|
|
|
- strsym = {} # Symbols defined as strings
|
|
|
|
- toknames = {} # Mapping of symbols to token names
|
|
|
|
|
|
+ funcsym = {} # Symbols defined as functions
|
|
|
|
+ strsym = {} # Symbols defined as strings
|
|
|
|
+ toknames = {} # Mapping of symbols to token names
|
|
|
|
|
|
for s in stateinfo.keys():
|
|
for s in stateinfo.keys():
|
|
funcsym[s] = []
|
|
funcsym[s] = []
|
|
strsym[s] = []
|
|
strsym[s] = []
|
|
|
|
|
|
- ignore = {} # Ignore strings by state
|
|
|
|
- errorf = {} # Error functions by state
|
|
|
|
|
|
+ ignore = {} # Ignore strings by state
|
|
|
|
+ errorf = {} # Error functions by state
|
|
|
|
|
|
if len(tsymbols) == 0:
|
|
if len(tsymbols) == 0:
|
|
raise SyntaxError("lex: no rules of the form t_rulename are defined.")
|
|
raise SyntaxError("lex: no rules of the form t_rulename are defined.")
|
|
@@ -720,10 +692,10 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
states, tokname = _statetoken(f, stateinfo)
|
|
states, tokname = _statetoken(f, stateinfo)
|
|
toknames[f] = tokname
|
|
toknames[f] = tokname
|
|
|
|
|
|
- if isinstance(t, collections.abc.Callable):
|
|
|
|
|
|
+ if hasattr(t, "__call__"):
|
|
for s in states:
|
|
for s in states:
|
|
funcsym[s].append((f, t))
|
|
funcsym[s].append((f, t))
|
|
- elif (isinstance(t, bytes) or isinstance(t, str)):
|
|
|
|
|
|
+ elif isinstance(t, bytes) or isinstance(t, str):
|
|
for s in states:
|
|
for s in states:
|
|
strsym[s].append((f, t))
|
|
strsym[s].append((f, t))
|
|
else:
|
|
else:
|
|
@@ -732,21 +704,11 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
|
|
|
|
# Sort the functions by line number
|
|
# Sort the functions by line number
|
|
for f in funcsym.values():
|
|
for f in funcsym.values():
|
|
- if os.sys.version_info.major >= 3:
|
|
|
|
- f.sort(key=lambda x: get_func_code(x[1]).co_firstlineno)
|
|
|
|
- else:
|
|
|
|
- f.sort(key=lambda x, y: cmp(get_func_code(x[1]).co_firstlineno,
|
|
|
|
- get_func_code(y[1]).co_firstlineno))
|
|
|
|
|
|
+ f.sort(key=lambda x: x[1].__code__.co_firstlineno)
|
|
|
|
|
|
# Sort the strings by regular expression length
|
|
# Sort the strings by regular expression length
|
|
for s in strsym.values():
|
|
for s in strsym.values():
|
|
- if os.sys.version_info.major >= 3:
|
|
|
|
- s.sort(key=functools.cmp_to_key(lambda x, y:
|
|
|
|
- (len(x[1]) < len(y[1])) -
|
|
|
|
- (len(x[1]) > len(y[1]))))
|
|
|
|
- else:
|
|
|
|
- s.sort(key=lambda x, y: (len(x[1]) < len(y[1])) -
|
|
|
|
- (len(x[1]) > len(y[1])))
|
|
|
|
|
|
+ s.sort(key=lambda x: len(x[1]))
|
|
|
|
|
|
regexs = {}
|
|
regexs = {}
|
|
|
|
|
|
@@ -756,38 +718,37 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
|
|
|
|
# Add rules defined by functions first
|
|
# Add rules defined by functions first
|
|
for fname, f in funcsym[state]:
|
|
for fname, f in funcsym[state]:
|
|
- line = get_func_code(f).co_firstlineno
|
|
|
|
- file_ = get_func_code(f).co_filename
|
|
|
|
- files[file_] = None
|
|
|
|
|
|
+ line = f.__code__.co_firstlineno
|
|
|
|
+ file = f.__code__.co_filename
|
|
|
|
+ files[file] = None
|
|
tokname = toknames[fname]
|
|
tokname = toknames[fname]
|
|
|
|
|
|
ismethod = isinstance(f, types.MethodType)
|
|
ismethod = isinstance(f, types.MethodType)
|
|
|
|
|
|
if not optimize:
|
|
if not optimize:
|
|
- nargs = get_func_code(f).co_argcount
|
|
|
|
|
|
+ nargs = f.__code__.co_argcount
|
|
if ismethod:
|
|
if ismethod:
|
|
reqargs = 2
|
|
reqargs = 2
|
|
else:
|
|
else:
|
|
reqargs = 1
|
|
reqargs = 1
|
|
if nargs > reqargs:
|
|
if nargs > reqargs:
|
|
- print("%s:%d: Rule '%s' has too many arguments."
|
|
|
|
- % (file_, line, f.__name__))
|
|
|
|
|
|
+ print("%s:%d: Rule '%s' has too many arguments." % (file, line, f.__name__))
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
|
|
|
|
if nargs < reqargs:
|
|
if nargs < reqargs:
|
|
- print("%s:%d: Rule '%s' requires an argument."
|
|
|
|
- % (file_, line, f.__name__))
|
|
|
|
|
|
+ print("%s:%d: Rule '%s' requires an argument." % (file, line, f.__name__))
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
|
|
|
|
- if tokname == 'ignore':
|
|
|
|
- print("%s:%d: Rule '%s' must be defined as a string."
|
|
|
|
- % (file_, line, f.__name__))
|
|
|
|
|
|
+ if tokname == "ignore":
|
|
|
|
+ print(
|
|
|
|
+ "%s:%d: Rule '%s' must be defined as a string." % (file, line, f.__name__)
|
|
|
|
+ )
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
|
|
|
|
- if tokname == 'error':
|
|
|
|
|
|
+ if tokname == "error":
|
|
errorf[state] = f
|
|
errorf[state] = f
|
|
continue
|
|
continue
|
|
|
|
|
|
@@ -796,42 +757,50 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
try:
|
|
try:
|
|
c = re.compile("(?P<%s>%s)" % (f.__name__, f.__doc__), re.VERBOSE | reflags)
|
|
c = re.compile("(?P<%s>%s)" % (f.__name__, f.__doc__), re.VERBOSE | reflags)
|
|
if c.match(""):
|
|
if c.match(""):
|
|
- print("%s:%d: Regular expression for rule '%s' "
|
|
|
|
- "matches empty string."
|
|
|
|
- % (file_, line, f.__name__))
|
|
|
|
|
|
+ print(
|
|
|
|
+ "%s:%d: Regular expression for rule '%s' matches empty string."
|
|
|
|
+ % (file, line, f.__name__)
|
|
|
|
+ )
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
except re.error as e:
|
|
except re.error as e:
|
|
- print("%s:%d: Invalid regular expression for rule '%s'. %s"
|
|
|
|
- % (file_, line, f.__name__, e))
|
|
|
|
- if '#' in f.__doc__:
|
|
|
|
- print("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'."
|
|
|
|
- % (file_, line, f.__name__))
|
|
|
|
|
|
+ print(
|
|
|
|
+ "%s:%d: Invalid regular expression for rule '%s'. %s"
|
|
|
|
+ % (file, line, f.__name__, e)
|
|
|
|
+ )
|
|
|
|
+ if "#" in f.__doc__:
|
|
|
|
+ print(
|
|
|
|
+ "%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'."
|
|
|
|
+ % (file, line, f.__name__)
|
|
|
|
+ )
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
|
|
|
|
if debug:
|
|
if debug:
|
|
- print("lex: Adding rule %s -> '%s' (state '%s')"
|
|
|
|
- % (f.__name__, f.__doc__, state))
|
|
|
|
|
|
+ print(
|
|
|
|
+ "lex: Adding rule %s -> '%s' (state '%s')"
|
|
|
|
+ % (f.__name__, f.__doc__, state)
|
|
|
|
+ )
|
|
|
|
|
|
# Okay. The regular expression seemed okay. Let's append it to the master regular
|
|
# Okay. The regular expression seemed okay. Let's append it to the master regular
|
|
# expression we're building
|
|
# expression we're building
|
|
|
|
|
|
regex_list.append("(?P<%s>%s)" % (f.__name__, f.__doc__))
|
|
regex_list.append("(?P<%s>%s)" % (f.__name__, f.__doc__))
|
|
else:
|
|
else:
|
|
- print("%s:%d: No regular expression defined for rule '%s'"
|
|
|
|
- % (file_, line, f.__name__))
|
|
|
|
|
|
+ print(
|
|
|
|
+ "%s:%d: No regular expression defined for rule '%s'" % (file, line, f.__name__)
|
|
|
|
+ )
|
|
|
|
|
|
# Now add all of the simple rules
|
|
# Now add all of the simple rules
|
|
for name, r in strsym[state]:
|
|
for name, r in strsym[state]:
|
|
tokname = toknames[name]
|
|
tokname = toknames[name]
|
|
|
|
|
|
- if tokname == 'ignore':
|
|
|
|
|
|
+ if tokname == "ignore":
|
|
ignore[state] = r
|
|
ignore[state] = r
|
|
continue
|
|
continue
|
|
|
|
|
|
if not optimize:
|
|
if not optimize:
|
|
- if tokname == 'error':
|
|
|
|
|
|
+ if tokname == "error":
|
|
raise SyntaxError("lex: Rule '%s' must be defined as a function" % name)
|
|
raise SyntaxError("lex: Rule '%s' must be defined as a function" % name)
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
@@ -842,13 +811,13 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
continue
|
|
continue
|
|
try:
|
|
try:
|
|
c = re.compile("(?P<%s>%s)" % (name, r), re.VERBOSE | reflags)
|
|
c = re.compile("(?P<%s>%s)" % (name, r), re.VERBOSE | reflags)
|
|
- if (c.match("")):
|
|
|
|
|
|
+ if c.match(""):
|
|
print("lex: Regular expression for rule '%s' matches empty string." % name)
|
|
print("lex: Regular expression for rule '%s' matches empty string." % name)
|
|
error = 1
|
|
error = 1
|
|
continue
|
|
continue
|
|
except re.error as e:
|
|
except re.error as e:
|
|
print("lex: Invalid regular expression for rule '%s'. %s" % (name, e))
|
|
print("lex: Invalid regular expression for rule '%s'. %s" % (name, e))
|
|
- if '#' in r:
|
|
|
|
|
|
+ if "#" in r:
|
|
print("lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name)
|
|
print("lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name)
|
|
|
|
|
|
error = 1
|
|
error = 1
|
|
@@ -887,9 +856,9 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
|
|
|
|
# For inclusive states, we need to add the INITIAL state
|
|
# For inclusive states, we need to add the INITIAL state
|
|
for state, type in stateinfo.items():
|
|
for state, type in stateinfo.items():
|
|
- if state != "INITIAL" and type == 'inclusive':
|
|
|
|
- lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
|
|
|
|
- lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
|
|
|
|
|
|
+ if state != "INITIAL" and type == "inclusive":
|
|
|
|
+ lexobj.lexstatere[state].extend(lexobj.lexstatere["INITIAL"])
|
|
|
|
+ lexobj.lexstateretext[state].extend(lexobj.lexstateretext["INITIAL"])
|
|
|
|
|
|
lexobj.lexstateinfo = stateinfo
|
|
lexobj.lexstateinfo = stateinfo
|
|
lexobj.lexre = lexobj.lexstatere["INITIAL"]
|
|
lexobj.lexre = lexobj.lexstatere["INITIAL"]
|
|
@@ -907,12 +876,12 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
|
|
|
|
# Check state information for ignore and error rules
|
|
# Check state information for ignore and error rules
|
|
for s, stype in stateinfo.items():
|
|
for s, stype in stateinfo.items():
|
|
- if stype == 'exclusive':
|
|
|
|
|
|
+ if stype == "exclusive":
|
|
if warn and s not in errorf:
|
|
if warn and s not in errorf:
|
|
print("lex: Warning. no error rule is defined for exclusive state '%s'" % s)
|
|
print("lex: Warning. no error rule is defined for exclusive state '%s'" % s)
|
|
if warn and s not in ignore and lexobj.lexignore:
|
|
if warn and s not in ignore and lexobj.lexignore:
|
|
print("lex: Warning. no ignore rule is defined for exclusive state '%s'" % s)
|
|
print("lex: Warning. no ignore rule is defined for exclusive state '%s'" % s)
|
|
- elif stype == 'inclusive':
|
|
|
|
|
|
+ elif stype == "inclusive":
|
|
if s not in errorf:
|
|
if s not in errorf:
|
|
errorf[s] = errorf.get("INITIAL", None)
|
|
errorf[s] = errorf.get("INITIAL", None)
|
|
if s not in ignore:
|
|
if s not in ignore:
|
|
@@ -929,6 +898,7 @@ def lex(module=None, object=None, debug=0, optimize=0,
|
|
|
|
|
|
return lexobj
|
|
return lexobj
|
|
|
|
|
|
|
|
+
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# runmain()
|
|
# runmain()
|
|
#
|
|
#
|
|
@@ -971,11 +941,14 @@ def runmain(lexer=None, data=None):
|
|
# when its docstring might need to be set in an alternative way
|
|
# when its docstring might need to be set in an alternative way
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
+
|
|
def TOKEN(r):
|
|
def TOKEN(r):
|
|
def set_doc(f):
|
|
def set_doc(f):
|
|
f.__doc__ = r
|
|
f.__doc__ = r
|
|
return f
|
|
return f
|
|
|
|
+
|
|
return set_doc
|
|
return set_doc
|
|
|
|
|
|
|
|
+
|
|
# Alternative spelling of the TOKEN decorator
|
|
# Alternative spelling of the TOKEN decorator
|
|
Token = TOKEN
|
|
Token = TOKEN
|