1
1
mirror of https://github.com/KenanZhu/AutoLibrary.git synced 2026-06-17 23:13:03 +08:00
Files
AutoLibrary/src/autoscript/ASTokenizer.py
T
2026-05-21 00:27:43 +08:00

585 lines
14 KiB
Python

# -*- coding: utf-8 -*-
"""
Copyright (c) 2026 KenanZhu.
All rights reserved.
This software is provided "as is", without any warranty of any kind.
You may use, modify, and distribute this file under the terms of the MIT License.
See the LICENSE file for details.
"""
import re
__all__ = [
"ASTokenizer",
"Stmt",
"Script",
"IfNode",
"ElifNode",
"SetNode",
"OpNode",
"PassNode",
"UnrecogNode",
"NodeVisitor",
"LineStrategy"
]
# Token kind constants
K_IF = "IF"
K_ELSE_IF = "ELSE IF"
K_ELSE = "ELSE"
K_ENDIF = "ENDIF"
K_SET = "SET"
K_ADD = "ADD"
K_SUB = "SUB"
K_PASS = "PASS"
# Op-type constants
OP_SET = "set"
OP_ADD = "add"
OP_SUB = "sub"
# Compiled line patterns
_RE_IF = re.compile(r"^IF\((.+)\)(?:\s+THEN\s*)?$", re.IGNORECASE)
_RE_ELSE_IF = re.compile(r"^ELSE\s+IF\((.+)\)(?:\s+THEN\s*)?$", re.IGNORECASE)
_RE_ELSE = re.compile(r"^ELSE\s*$", re.IGNORECASE)
_RE_ENDIF = re.compile(r"^(ENDIF|END IF)$", re.IGNORECASE)
_RE_SET = re.compile(r"^SET\s+(\w+)\s*=\s*(.+)$", re.IGNORECASE)
_RE_ADD = re.compile(r"^(\w+)\s+\.ADD\.\s+(-?\d+(?:\.\d+)?|\w+)$", re.IGNORECASE)
_RE_SUB = re.compile(r"^(\w+)\s+\.SUB\.\s+(-?\d+(?:\.\d+)?|\w+)$", re.IGNORECASE)
_RE_PASS = re.compile(r"^\s*PASS\s*$", re.IGNORECASE)
class Script:
"""
Root AST node for an entire AutoScript.
Contains an ordered list of top-level statement nodes.
"""
def __init__(
self,
body: list = None
):
self.body = body or []
def accept(
self,
visitor
):
return visitor.visitScript(self)
class IfNode:
"""
IF conditional block with optional ELSE IF / ELSE branches.
Attributes:
condition (str): Raw condition expression.
body (list): Statements executed when condition is true.
elif_branches (list[ElifNode]): ELSE IF branches in order.
else_body (list): Statements executed for the ELSE branch.
closed (bool): Whether this IF has a matching ENDIF token.
"""
def __init__(
self,
condition: str = "",
body: list = None,
elif_branches: list = None,
else_body: list = None,
closed: bool = True
):
self.condition = condition
self.body = body or []
self.elif_branches = elif_branches or []
self.else_body = else_body or []
self.closed = closed
def accept(
self,
visitor
):
return visitor.visitIf(self)
class ElifNode:
"""
ELSE IF branch within an IfNode.
"""
def __init__(
self,
condition: str = "",
body: list = None
):
self.condition = condition
self.body = body or []
class SetNode:
"""
SET assignment statement.
"""
def __init__(
self,
target: str = "",
value: str = ""
):
self.target = target
self.value = value
def accept(
self,
visitor
):
return visitor.visitSet(self)
class OpNode:
"""
.ADD. / .SUB. operation statement.
"""
def __init__(
self,
op_type: str = "",
target: str = "",
value: str = ""
):
self.op_type = op_type
self.target = target
self.value = value
def accept(
self,
visitor
):
return visitor.visitOp(self)
class PassNode:
"""
PASS no-op statement.
"""
def accept(
self,
visitor
):
return visitor.visitPass(self)
class UnrecogNode:
"""
Unrecognised line preserved for downstream error reporting.
"""
def __init__(
self,
raw_line: str = ""
):
self.raw_line = raw_line
def accept(
self,
visitor
):
return visitor.visitUnrecog(self)
class NodeVisitor:
"""
Base visitor for the AutoScript AST.
Subclass and override visit* methods to implement
custom traversal logic. Default walks tree depth-first.
"""
def visitScript(
self,
_node: Script
):
for child in _node.body:
child.accept(self)
def visitIf(
self,
_node: IfNode
):
for child in _node.body:
child.accept(self)
for elif_node in _node.elif_branches:
for child in elif_node.body:
child.accept(self)
for child in _node.else_body:
child.accept(self)
def visitSet(
self,
_node: SetNode
):
pass
def visitOp(
self,
_node: OpNode
):
pass
def visitPass(
self,
_node: PassNode
):
pass
def visitUnrecog(
self,
_node: UnrecogNode
):
pass
class LineStrategy:
"""
Encapsulates a regex pattern and its data-extraction handler.
Used by the tokenizer to classify a single line.
"""
def __init__(
self,
pattern,
handler
):
self.pattern = pattern
self.handler = handler
def match(
self,
line: str
):
m = self.pattern.match(line)
if m:
return self.handler(m)
return None
# Strategy instances — one per recognised AutoScript syntax form
_LINE_STRATEGIES = [
LineStrategy(_RE_IF, lambda m: (K_IF, m.group(1))),
LineStrategy(_RE_ELSE_IF, lambda m: (K_ELSE_IF, m.group(1))),
LineStrategy(_RE_ELSE, lambda m: (K_ELSE, None)),
LineStrategy(_RE_ENDIF, lambda m: (K_ENDIF, None)),
LineStrategy(_RE_SET, lambda m: (K_SET, (m.group(1).strip(), m.group(2).strip()))),
LineStrategy(_RE_ADD, lambda m: (K_ADD, (m.group(1).strip(), m.group(2).strip()))),
LineStrategy(_RE_SUB, lambda m: (K_SUB, (m.group(1).strip(), m.group(2).strip()))),
LineStrategy(_RE_PASS, lambda m: (K_PASS, None)),
]
class Stmt:
"""
Flat statement container, backward-compatible with the original
tokenize() output and the orchestration dialog's _classifyLine.
"""
def __init__(
self,
kind: str | None = None,
condition: str | None = None,
target: str | None = None,
value: str | None = None,
op_type: str | None = None,
raw_line: str = ""
):
self.kind = kind
self.condition = condition
self.target = target
self.value = value
self.op_type = op_type
self.raw_line = raw_line
class ASTokenizer:
"""
Tokenizer / parser for the AutoScript DSL.
Main class-level entry points (engine-facing):
- classifyLine(line) — single-line classifier.
- tokenize(script) — flat Stmt list.
- parse(script) — structured AST (Script root).
Observer-enabled API (used by pre-check & orchestration):
>>> obs = ScriptPrecheckObserver()
>>> stmts = ASTokenizer.tokenizeWithObservers(script, [obs])
"""
@classmethod
def _notifyObservers(
cls,
observers: list,
method: str,
*args
):
for obs in observers:
getattr(obs, method)(*args)
@classmethod
def _matchLine(
cls,
stripped: str
):
for strategy in _LINE_STRATEGIES:
result = strategy.match(stripped)
if result:
return result
return (None, None)
@classmethod
def _buildStmt(
cls,
stripped: str,
kind: str | None,
data
) -> Stmt:
stmt = Stmt(kind=kind, raw_line=stripped)
if kind == K_IF or kind == K_ELSE_IF:
stmt.condition = data
elif kind == K_SET:
stmt.target, stmt.value = data
stmt.op_type = OP_SET
elif kind == K_ADD:
stmt.target, stmt.value = data
stmt.op_type = OP_ADD
elif kind == K_SUB:
stmt.target, stmt.value = data
stmt.op_type = OP_SUB
return stmt
@classmethod
def _stripComment(
cls,
line: str
) -> str:
in_single = False
in_double = False
i = 0
while i < len(line):
ch = line[i]
if ch == "'" and not in_double:
if i + 1 < len(line) and line[i + 1] == "'":
i += 2
continue
in_single = not in_single
elif ch == '"' and not in_single:
in_double = not in_double
elif ch == "/" and i + 1 < len(line) and line[i + 1] == "/" and not in_single and not in_double:
return line[:i].rstrip()
i += 1
return line
@classmethod
def _tokenizeImpl(
cls,
script: str
) -> list:
statements = []
for raw_line in script.split("\n"):
code = cls._stripComment(raw_line.strip())
if not code:
continue
kind, data = cls._matchLine(code)
statements.append(cls._buildStmt(code, kind, data))
return statements
@classmethod
def _parseTokens(
cls,
tokens: list
) -> Script:
body = []
i = 0
while i < len(tokens):
tok = tokens[i]
kind = tok.kind
if kind == K_IF:
node, consumed = cls._parseIfBlock(tokens, i)
body.append(node)
i += consumed
elif kind in (K_ELSE_IF, K_ELSE, K_ENDIF):
i += 1
elif kind == K_SET:
body.append(SetNode(target=tok.target, value=tok.value))
i += 1
elif kind in (K_ADD, K_SUB):
body.append(OpNode(
op_type=tok.op_type,
target=tok.target,
value=tok.value
))
i += 1
elif kind == K_PASS:
body.append(PassNode())
i += 1
else:
body.append(UnrecogNode(raw_line=tok.raw_line))
i += 1
return Script(body=body)
@classmethod
def classifyLine(
cls,
stripped: str
):
kind, data = cls._matchLine(stripped)
if kind is None or kind == K_PASS:
return None
return (kind, data)
@classmethod
def tokenize(
cls,
script: str
) -> list:
return cls._tokenizeImpl(script)
@classmethod
def parse(
cls,
script: str
) -> Script:
return cls._parseTokens(cls._tokenizeImpl(script))
@classmethod
def tokenizeWithObservers(
cls,
script: str,
observers: list
) -> list:
"""
Tokenize and notify observers for each classified line.
Fires onParseStart, onTokenParsed, and onParseComplete
events to each observer. This is the single tokenization
pipeline shared by pre-check and orchestration modules.
"""
cls._notifyObservers(observers, "onParseStart", script)
statements = []
for i, raw_line in enumerate(script.split("\n"), 1):
code = cls._stripComment(raw_line.strip())
if not code:
continue
kind, data = cls._matchLine(code)
cls._notifyObservers(observers, "onTokenParsed", kind, data, i, code)
statements.append(cls._buildStmt(code, kind, data))
cls._notifyObservers(observers, "onParseComplete", statements)
return statements
@classmethod
def parseWithObservers(
cls,
script: str,
observers: list
) -> Script:
"""
Parse and notify observers throughout the pipeline.
Calls tokenizeWithObservers (which fires per-token events),
then builds the AST and fires onASTReady.
"""
tokens = cls.tokenizeWithObservers(script, observers)
ast = cls._parseTokens(tokens)
cls._notifyObservers(observers, "onASTReady", ast)
return ast
@classmethod
def _parseIfBlock(
cls,
tokens: list,
start: int
):
first = tokens[start]
node = IfNode(condition=first.condition or "")
body = []
elif_branches = []
else_body = []
current_target = body
i = start + 1
while i < len(tokens):
tok = tokens[i]
kind = tok.kind
if kind == K_IF:
sub_node, consumed = cls._parseIfBlock(tokens, i)
current_target.append(sub_node)
i += consumed
elif kind == K_ELSE_IF:
elif_branches.append(ElifNode(condition=tok.condition or ""))
current_target = elif_branches[-1].body
i += 1
elif kind == K_ELSE:
else_body = []
current_target = else_body
i += 1
elif kind == K_ENDIF:
node.body = body
node.elif_branches = elif_branches
node.else_body = else_body
return (node, i - start + 1)
elif kind == K_SET:
current_target.append(SetNode(target=tok.target, value=tok.value))
i += 1
elif kind in (K_ADD, K_SUB):
current_target.append(OpNode(
op_type=tok.op_type,
target=tok.target,
value=tok.value
))
i += 1
elif kind == K_PASS:
current_target.append(PassNode())
i += 1
else:
current_target.append(UnrecogNode(raw_line=tok.raw_line))
i += 1
node.body = body
node.elif_branches = elif_branches
node.else_body = else_body
node.closed = False
return (node, i - start)