--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+import abc
+
+
+class XDRReader:
+ def __init__(self, fp):
+ self.fp = fp
+ self.lookahead = ""
+ self.lookbehind = ""
+ self.line = 1
+ self.column = 0
+
+ def _read(self):
+ if len(self.lookahead) > 0:
+ c = self.lookahead[0:1]
+ self.lookahead = self.lookahead[1:]
+ return c
+ return self.fp.read(1)
+
+ def peek(self, skip=0):
+ need = 1 + skip
+ if len(self.lookahead) < need:
+ self.lookahead = self.lookahead + self.fp.read(need - len(self.lookahead))
+ if len(self.lookahead) < need:
+ return None
+
+ return self.lookahead[skip : skip + 1]
+
+ def last(self, skip=0):
+ if (skip + 1) > len(self.lookbehind):
+ return None
+ return self.lookbehind[skip]
+
+ def next(self):
+ c = self._read()
+ line = self.line
+ column = self.column
+ if c == "\n":
+ self.line = self.line + 1
+ self.column = 0
+ else:
+ self.column = self.column + 1
+ self.lookbehind = c + self.lookbehind
+ if len(self.lookbehind) > 2:
+ self.lookbehind = self.lookbehind[0:2]
+ return c, line, column
+
+
+class XDRToken(abc.ABC):
+ def __init__(self, line, column, value):
+ self.line = line
+ self.column = column
+ self.value = value
+
+ def __eq__(self, other):
+ return (
+ type(self) == type(other)
+ and self.line == other.line
+ and self.column == other.column
+ and self.value == other.value
+ )
+
+ @classmethod
+ @abc.abstractmethod
+ def start(cls, reader):
+ pass
+
+ @classmethod
+ @abc.abstractmethod
+ def end(cls, reader):
+ pass
+
+ @classmethod
+ def consume(cls, reader):
+ c, line, col = reader.next()
+ buf = c
+ while True:
+ if cls.end(reader):
+ break
+ c, _, _ = reader.next()
+ buf = buf + c
+ return cls(line, col, buf)
+
+ def __repr__(self):
+ return "%s{line=%d,col=%d,value={{{%s}}}}" % (
+ self.__class__.__name__,
+ self.line,
+ self.column,
+ self.value,
+ )
+
+
+class XDRTokenComment(XDRToken):
+ @classmethod
+ def start(cls, reader):
+ return reader.peek() == "/" and reader.peek(skip=1) == "*"
+
+ @classmethod
+ def end(cls, reader):
+ c1 = reader.last(skip=1)
+ c2 = reader.last()
+ if c1 == "*" and c2 == "/":
+ return True
+
+ if reader.peek() is None:
+ raise Exception(
+ "EOF before closing comment starting at %d:%d"
+ % (reader.line, reader.column)
+ )
+
+
+class XDRTokenIdentifier(XDRToken):
+ @classmethod
+ def start(cls, reader):
+ c = reader.peek()
+ return c.isalpha()
+
+ @classmethod
+ def end(cls, reader):
+ c = reader.peek()
+ if c is None:
+ return True
+ return not c.isalnum() and c != "_"
+
+
+class XDRTokenPunctuation(XDRToken):
+ @classmethod
+ def start(cls, reader):
+ c = reader.peek()
+ return c in [";", "=", "{", "}", ",", "[", "]", "<", ">", "*", "(", ")", ":"]
+
+ @classmethod
+ def end(cls, reader):
+ return True
+
+
+class XDRTokenConstant(XDRToken):
+ @classmethod
+ def start(cls, reader):
+ c1 = reader.peek()
+ c2 = reader.peek(skip=1)
+ return c1.isdecimal() or (c1 == "-" and c2 is not None and c2.isdecimal())
+
+ @classmethod
+ def end(cls, reader):
+ c = reader.peek()
+ return (
+ not c.isdecimal()
+ and not c == "."
+ and not c.lower() in ["x", "a", "b", "c", "d", "e", "f"]
+ )
+
+
+class XDRTokenCEscape(XDRToken):
+ @classmethod
+ def start(cls, reader):
+ return reader.column == 0 and reader.peek() == "%"
+
+ @classmethod
+ def end(cls, reader):
+ return reader.peek() == "\n"
+
+
+class XDRTokenSpace(XDRToken):
+ @classmethod
+ def start(cls, reader):
+ return reader.peek().isspace()
+
+ @classmethod
+ def end(cls, reader):
+ c = reader.peek()
+ return c is None or not c.isspace()
+
+
+class XDRLexer:
+ def __init__(self, fp):
+ self.reader = XDRReader(fp)
+ self.lookahead = []
+
+ def _token(self):
+ tokenTypes = [
+ XDRTokenComment,
+ XDRTokenIdentifier,
+ XDRTokenCEscape,
+ XDRTokenPunctuation,
+ XDRTokenConstant,
+ XDRTokenSpace,
+ ]
+ while True:
+ if self.reader.peek() is None:
+ return None
+
+ for tokenType in tokenTypes:
+ if tokenType.start(self.reader):
+ ret = tokenType.consume(self.reader)
+ if type(ret) not in [XDRTokenSpace, XDRTokenComment]:
+ return ret
+
+ def next(self):
+ if len(self.lookahead) > 0:
+ token = self.lookahead[0]
+ self.lookahead = self.lookahead[1:]
+ return token
+ return self._token()
+
+ def peek(self):
+ if len(self.lookahead) == 0:
+ token = self._token()
+ if token is None:
+ return None
+ self.lookahead.append(token)
+ return self.lookahead[0]
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+from pathlib import Path
+
+from rpcgen.lexer import (
+ XDRLexer,
+ XDRTokenIdentifier,
+ XDRTokenPunctuation,
+ XDRTokenConstant,
+)
+
+
+def test_lexer():
+ p = Path(Path(__file__).parent, "simple.x")
+ with p.open("r") as fp:
+ lexer = XDRLexer(fp)
+
+ tokens = []
+ while True:
+ tok = lexer.next()
+ if tok is None:
+ break
+ tokens.append(tok)
+
+ assert tokens == [
+ XDRTokenIdentifier(line=3, column=0, value="const"),
+ XDRTokenIdentifier(line=3, column=6, value="MAXUSERNAME"),
+ XDRTokenPunctuation(line=3, column=18, value="="),
+ XDRTokenConstant(line=3, column=20, value="32"),
+ XDRTokenPunctuation(line=3, column=22, value=";"),
+ XDRTokenIdentifier(line=4, column=0, value="const"),
+ XDRTokenIdentifier(line=4, column=6, value="MAXFILELEN"),
+ XDRTokenPunctuation(line=4, column=17, value="="),
+ XDRTokenConstant(line=4, column=19, value="65535"),
+ XDRTokenPunctuation(line=4, column=24, value=";"),
+ XDRTokenIdentifier(line=5, column=0, value="const"),
+ XDRTokenIdentifier(line=5, column=6, value="MAXNAMELEN"),
+ XDRTokenPunctuation(line=5, column=17, value="="),
+ XDRTokenConstant(line=5, column=19, value="255"),
+ XDRTokenPunctuation(line=5, column=22, value=";"),
+ XDRTokenIdentifier(line=10, column=0, value="enum"),
+ XDRTokenIdentifier(line=10, column=5, value="filekind"),
+ XDRTokenPunctuation(line=10, column=14, value="{"),
+ XDRTokenIdentifier(line=11, column=3, value="TEXT"),
+ XDRTokenPunctuation(line=11, column=8, value="="),
+ XDRTokenConstant(line=11, column=10, value="0"),
+ XDRTokenPunctuation(line=11, column=11, value=","),
+ XDRTokenIdentifier(line=12, column=3, value="DATA"),
+ XDRTokenPunctuation(line=12, column=8, value="="),
+ XDRTokenConstant(line=12, column=10, value="1"),
+ XDRTokenPunctuation(line=12, column=11, value=","),
+ XDRTokenIdentifier(line=13, column=3, value="EXEC"),
+ XDRTokenPunctuation(line=13, column=8, value="="),
+ XDRTokenConstant(line=13, column=10, value="2"),
+ XDRTokenPunctuation(line=14, column=0, value="}"),
+ XDRTokenPunctuation(line=14, column=1, value=";"),
+ XDRTokenIdentifier(line=19, column=0, value="union"),
+ XDRTokenIdentifier(line=19, column=6, value="filetype"),
+ XDRTokenIdentifier(line=19, column=15, value="switch"),
+ XDRTokenPunctuation(line=19, column=22, value="("),
+ XDRTokenIdentifier(line=19, column=23, value="filekind"),
+ XDRTokenIdentifier(line=19, column=32, value="kind"),
+ XDRTokenPunctuation(line=19, column=36, value=")"),
+ XDRTokenPunctuation(line=19, column=38, value="{"),
+ XDRTokenIdentifier(line=20, column=0, value="case"),
+ XDRTokenIdentifier(line=20, column=5, value="TEXT"),
+ XDRTokenPunctuation(line=20, column=9, value=":"),
+ XDRTokenIdentifier(line=21, column=3, value="void"),
+ XDRTokenPunctuation(line=21, column=7, value=";"),
+ XDRTokenIdentifier(line=22, column=0, value="case"),
+ XDRTokenIdentifier(line=22, column=5, value="DATA"),
+ XDRTokenPunctuation(line=22, column=9, value=":"),
+ XDRTokenIdentifier(line=23, column=3, value="string"),
+ XDRTokenIdentifier(line=23, column=10, value="creator"),
+ XDRTokenPunctuation(line=23, column=17, value="<"),
+ XDRTokenIdentifier(line=23, column=18, value="MAXNAMELEN"),
+ XDRTokenPunctuation(line=23, column=28, value=">"),
+ XDRTokenPunctuation(line=23, column=29, value=";"),
+ XDRTokenIdentifier(line=24, column=0, value="case"),
+ XDRTokenIdentifier(line=24, column=5, value="EXEC"),
+ XDRTokenPunctuation(line=24, column=9, value=":"),
+ XDRTokenIdentifier(line=25, column=3, value="string"),
+ XDRTokenIdentifier(line=25, column=10, value="interpretor"),
+ XDRTokenPunctuation(line=25, column=21, value="<"),
+ XDRTokenIdentifier(line=25, column=22, value="MAXNAMELEN"),
+ XDRTokenPunctuation(line=25, column=32, value=">"),
+ XDRTokenPunctuation(line=25, column=33, value=";"),
+ XDRTokenPunctuation(line=26, column=0, value="}"),
+ XDRTokenPunctuation(line=26, column=1, value=";"),
+ XDRTokenIdentifier(line=30, column=0, value="struct"),
+ XDRTokenIdentifier(line=30, column=7, value="file"),
+ XDRTokenPunctuation(line=30, column=12, value="{"),
+ XDRTokenIdentifier(line=31, column=3, value="string"),
+ XDRTokenIdentifier(line=31, column=10, value="filename"),
+ XDRTokenPunctuation(line=31, column=18, value="<"),
+ XDRTokenIdentifier(line=31, column=19, value="MAXNAMELEN"),
+ XDRTokenPunctuation(line=31, column=29, value=">"),
+ XDRTokenPunctuation(line=31, column=30, value=";"),
+ XDRTokenIdentifier(line=32, column=3, value="filetype"),
+ XDRTokenIdentifier(line=32, column=12, value="type"),
+ XDRTokenPunctuation(line=32, column=16, value=";"),
+ XDRTokenIdentifier(line=33, column=3, value="string"),
+ XDRTokenIdentifier(line=33, column=10, value="owner"),
+ XDRTokenPunctuation(line=33, column=15, value="<"),
+ XDRTokenIdentifier(line=33, column=16, value="MAXUSERNAME"),
+ XDRTokenPunctuation(line=33, column=27, value=">"),
+ XDRTokenPunctuation(line=33, column=28, value=";"),
+ XDRTokenIdentifier(line=34, column=3, value="opaque"),
+ XDRTokenIdentifier(line=34, column=10, value="data"),
+ XDRTokenPunctuation(line=34, column=14, value="<"),
+ XDRTokenIdentifier(line=34, column=15, value="MAXFILELEN"),
+ XDRTokenPunctuation(line=34, column=25, value=">"),
+ XDRTokenPunctuation(line=34, column=26, value=";"),
+ XDRTokenPunctuation(line=35, column=0, value="}"),
+ XDRTokenPunctuation(line=35, column=1, value=";"),
+ ]