verantyx-logic-math / avh_math /input_normalize.py
kofdai's picture
Initial upload of Verantyx Logic Engine (v1.0)
29b87da verified
import re
_ZERO_WIDTH = re.compile(r"[\u200b\u200c\u200d\ufeff]")
_WS = re.compile(r"[ \t]+")
_MULTI_NL = re.compile(r"\n{3,}")
def _normalize_symbols(s: str) -> str:
s = s.replace("β†’", "->").replace("β‡’", "->").replace("⟢", "->")
s = s.replace("↔", "<->").replace("⇔", "<->")
s = s.replace("∧", "&").replace("∨", "|")
s = s.replace("Β¬", "~")
s = s.replace("(", "(").replace("οΌ‰", ")")
s = s.replace("【", "[").replace("】", "]")
return s
def _normalize_modal_tokens(s: str) -> str:
# Canonicalize to β–‘/β—‡ so downstream can convert as needed.
s = s.replace("[]", "β–‘").replace("<>", "β—‡")
s = re.sub(r"\bbox\b", "β–‘", s, flags=re.IGNORECASE)
s = re.sub(r"\bdiamond\b", "β—‡", s, flags=re.IGNORECASE)
# Glue modal operator to operand where obvious.
s = re.sub(r"β–‘\s+(?=[A-Za-z(~\[])", "β–‘", s)
s = re.sub(r"β—‡\s+(?=[A-Za-z(~\[])", "β—‡", s)
s = re.sub(r"β–‘\s+β–‘\s*", "β–‘β–‘", s)
s = re.sub(r"β—‡\s+β—‡\s*", "β—‡β—‡", s)
return s
def normalize_input(text: str) -> str:
"""
Normalize common symbol and spacing variants without destroying line structure.
- Remove zero-width characters.
- Normalize arrows/connectives.
- Normalize modal tokens (box/diamond/[]/<>) to β–‘/β—‡.
- Normalize excess whitespace.
"""
s = (text or "").replace("\r\n", "\n").replace("\r", "\n")
s = _ZERO_WIDTH.sub("", s)
s = _normalize_symbols(s)
s = _normalize_modal_tokens(s)
# Avoid collapsing all newlines, but compress excessive ones.
s = _MULTI_NL.sub("\n\n", s)
# Normalize spaces within lines.
s = "\n".join(_WS.sub(" ", ln).strip() for ln in s.split("\n"))
return s.strip()