Initial upload of Verantyx Logic Engine (v1.0)

29b87da verified 4 months ago

10.6 kB

	from __future__ import annotations

	import re
	from dataclasses import dataclass
	from typing import List, Optional

	_QUOTED_RE = re.compile(r'"([^"]+)"')
	# 決定打：角括弧や矢印を正しく含む正規表現に修正 + LaTeXコマンド用バックスラッシュ追加
	_UNQUOTED_LOGIC_RE = re.compile(r"([\[\]<>&\|~()A-Za-z\->\s\\]{3,})")
	_UNQUOTED_MATH_RE = re.compile(r"(∫\|dx\|dy\|dz\|sin\|cos\|tan\|log\|ln\|exp\|lim\|sum\|prod\|sqrt\|∞\|dim\|dimension\|matrix\|行列\|次元\|対称)", re.IGNORECASE)
	_GREEK_SYMBOL_RE = re.compile(r"[φψχαβγδλμνπστω]")
	_DIM_SYM_EN = re.compile(r"dimension of (the )?space of (n\sx\sn\|n×n) symmetric matrices", re.IGNORECASE)
	_DIM_SYM_JA = re.compile(r"(実\|複素)?n次対称行列の空間の次元\|対称行列の空間の次元")
	_DIM_MAT_EN = re.compile(r"dimension of (the )?space of (n\sx\sn\|n×n) matrices", re.IGNORECASE)
	_DIM_MAT_JA = re.compile(r"(実\|複素)?n次行列の空間の次元\|行列の空間の次元")

	_ARROW_FIX = [
	("→", "->"),
	("⇒", "->"),
	("⟶", "->"),
	("⟹", "->"),
	("↦", "->"),
	]
	_MISC_FIX = [
	("\u200b", ""),
	("\ufeff", ""),
	("\u200c", ""),
	("\u200d", ""),
	(r"\land", "&"),
	(r"\lor", "\|"),
	(r"\neg", "~"),
	(r"\to", "->"),
	(r"\rightarrow", "->"),
	(r"\leftrightarrow", "<->"),
	(r"\box", "[]"),
	(r"\diamond", "<>"),
	("¬", "~"),
	("∧", "&"),
	("∨", "\|"),
	("□", "[]"),
	("◇", "<>"),
	]
	_WORD_MODAL = [
	(re.compile(r"\bbox\b", re.IGNORECASE), "[]"),
	(re.compile(r"\bdiamond\b", re.IGNORECASE), "<>"),
	]
	_SPACE_AFTER_MODAL = re.compile(r"(\[\]\|\<\>)\s+")
	_HAS_LOGIC_TOKENS = re.compile(r"(\-\>\|\&\|\\|\|~\|\[\]\|\<\>\|$\|$)")
	_INLINE_MODAL_RE = re.compile(r"(\[\].{1,240}?->.{1,240})", re.DOTALL)
	_INLINE_DIA_RE = re.compile(r"(\<\>.{1,240}?->.{1,240})", re.DOTALL)
	_INLINE_PROP_RE = re.compile(r"([A-Za-z0-9_()\s~&\|]+->[\sA-Za-z0-9_()~&\|]+)", re.DOTALL)
	_INLINE_FORMULA_CHUNK_RE = re.compile(r"[A-Za-z0-9_\[\]<>~&\|()*/=\+\-]+")
	_HAS_JA_RE = re.compile(r"[一-龥ぁ-んァ-ン]")
	_MATH_TOKENS = re.compile(r"(∫\|dx\|dy\|dz\|sin\|cos\|tan\|log\|ln\|exp\|lim\|sum\|prod\|sqrt\|\^\|/\|∞)", re.IGNORECASE)

	_ASSUME_JA = {
	"assume:transitive": ["推移", "推移性", "transitive"],
	"assume:reflexive": ["反射", "反射性", "reflexive"],
	"assume:symmetric": ["対称", "対称性", "symmetric"],
	"assume:serial": ["逐次", "serial"],
	"assume:euclidean": ["ユークリッド", "euclidean"],
	}


	@dataclass
	class ParsedInput:
	raw: str
	core_formula: Optional[str]
	extra_formulas: List[str]
	context_text: str
	parse_notes: List[str]


	def _normalize_formula(s: str) -> str:
	s = s.strip()
	is_math = bool(_MATH_TOKENS.search(s))
	for a, b in _ARROW_FIX:
	s = s.replace(a, b)
	for a, b in _MISC_FIX:
	s = s.replace(a, b)
	for pat, repl in _WORD_MODAL:
	s = pat.sub(repl, s)
	s = s.replace("\n", " ").replace("\r", " ")
	s = re.sub(r"\s+", " ", s).strip()
	s = _SPACE_AFTER_MODAL.sub(r"\1", s)
	s = re.sub(r"\s->\s", "->", s)
	s = re.sub(r"\s&\s", "&", s)
	s = re.sub(r"\s\\|\s", "\|", s)
	s = re.sub(r"\s~\s", "~", s)
	if not is_math:
	# Guard against UI noise for logic formulas only.
	s = re.sub(r"\s+[a-zA-Z]\s*$", "", s).strip()
	return s


	def extract_quoted_formulas(text: str) -> List[str]:
	out: List[str] = []
	for m in _QUOTED_RE.finditer(text or ""):
	g = next((x for x in m.groups() if x), None)
	if not g:
	continue
	f = _normalize_formula(g)
	if f:
	out.append(f)
	seen = set()
	uniq = []
	for f in out:
	if f not in seen:
	seen.add(f)
	uniq.append(f)
	return uniq


	def extract_unquoted_formulas(text: str) -> List[str]:
	out: List[str] = []
	raw = text or ""
	# If inline formula exists, prefer it over long sentence lines.
	inline = extract_inline_formula(raw)

	# Pattern-based linear algebra hints.
	if _DIM_SYM_EN.search(raw) or _DIM_SYM_JA.search(raw):
	out.append(_normalize_formula("dim Sym(n,R)"))
	if _DIM_MAT_EN.search(raw) or _DIM_MAT_JA.search(raw):
	out.append(_normalize_formula("dim M(n,n,R)"))

	# Math-like detection: keep full lines if they contain math tokens.
	for line in raw.splitlines():
	if _UNQUOTED_MATH_RE.search(line):
	# Avoid capturing long natural-language lines as formulas.
	candidate = _normalize_formula(line)
	if candidate:
	# Heuristic: skip if long and operator density is low.
	op_count = len(re.findall(r"[\\[\\]<>~&\|()]", candidate)) + candidate.count("->") + candidate.count("<->")
	word_count = len(re.findall(r"[A-Za-z一-龥ぁ-んァ-ン]", line))
	if len(candidate) > 120 and op_count < 2 and word_count > 10:
	continue
	out.append(candidate)

	# Logic-like fragments (simple heuristic).
	for m in _UNQUOTED_LOGIC_RE.finditer(raw):
	cand = m.group(1) or ""
	if "->" in cand or "&" in cand or "\|" in cand or "[]" in cand or "<>" in cand or "~" in cand:
	f = _normalize_formula(cand)
	if f:
	out.append(f)

	# Inline fallback: grab the first formula-like chunk.
	if inline and inline not in out:
	out.append(inline)

	# Greek-symbol fallback only when logic tokens are present.
	if not out and _GREEK_SYMBOL_RE.search(raw) and _HAS_LOGIC_TOKENS.search(raw):
	out.append(_normalize_formula("φ"))

	seen = set()
	uniq: List[str] = []
	for f in out:
	if f not in seen:
	seen.add(f)
	uniq.append(f)
	return uniq


	def is_formula_like(s: str) -> bool:
	if not s:
	return False
	return bool(_HAS_LOGIC_TOKENS.search(s) or _MATH_TOKENS.search(s))


	def _is_strong_formula(s: str) -> bool:
	"""
	Stronger signal used for inline extraction (avoid lone [] or single symbols).
	"""
	if not s:
	return False
	if _MATH_TOKENS.search(s):
	return True
	return bool(re.search(r"(->\|<->\|&\|\\|\|~)", s))


	def extract_inline_formula(text: str) -> Optional[str]:
	"""
	Extract a likely formula-like substring from a long sentence.
	Priority: modal chain, then implication-like fragment.
	"""
	if not text:
	return None
	m = _INLINE_MODAL_RE.search(text)
	if m:
	if not _HAS_JA_RE.search(m.group(1)):
	return _normalize_formula(m.group(1))
	m = _INLINE_DIA_RE.search(text)
	if m:
	if not _HAS_JA_RE.search(m.group(1)):
	return _normalize_formula(m.group(1))
	m = _INLINE_PROP_RE.search(text)
	if m:
	if not _HAS_JA_RE.search(m.group(1)):
	return _normalize_formula(m.group(1))

	# Token-like fallback: pick the best formula-ish chunk (prefer strong operators).
	best = None
	best_score = -1
	for chunk in _INLINE_FORMULA_CHUNK_RE.findall(text):
	if _HAS_JA_RE.search(chunk):
	continue
	if not is_formula_like(chunk):
	continue
	score = 0
	if _is_strong_formula(chunk):
	score += 3
	score += len(chunk)
	if score > best_score:
	best_score = score
	best = chunk
	if best:
	return _normalize_formula(best)
	return None


	def pick_core_formula(text: str) -> ParsedInput:
	notes: List[str] = []
	raw = text or ""

	quoted = extract_quoted_formulas(raw)
	if quoted:
	notes.append(f"quoted_formulas={len(quoted)}")
	core = quoted[0]
	# If quoted core looks like a sentence, prefer inline formula fragments.
	if core and not is_formula_like(core):
	inline = extract_inline_formula(raw)
	if inline and is_formula_like(inline):
	core = inline
	notes.append("quoted_core_replaced_by_inline_formula")
	# If quoted contains a mix of prose + formula, prefer the tighter inline formula.
	if core and is_formula_like(core):
	inline = extract_inline_formula(core)
	if inline and is_formula_like(inline) and len(inline) < len(core):
	core = inline
	notes.append("quoted_core_inline_subset")
	extras = quoted[1:]
	ctx = _QUOTED_RE.sub(" ", raw)
	ctx = re.sub(r"\s+", " ", ctx).strip()
	return ParsedInput(
	raw=raw,
	core_formula=core,
	extra_formulas=extras,
	context_text=ctx,
	parse_notes=notes,
	)

	notes.append("no_quoted_formula_detected")
	unquoted = extract_unquoted_formulas(raw)
	if unquoted:
	notes.append(f"unquoted_candidates={len(unquoted)}")
	core = unquoted[0]
	# Prefer inline formula if the core looks like a full sentence.
	if core and len(core) > 120:
	inline = extract_inline_formula(raw)
	if inline and is_formula_like(inline):
	core = inline
	notes.append("unquoted_core_replaced_by_inline_formula")
	extras = unquoted[1:]
	# Reject trivial single-symbol cores (e.g., φ) with no operators.
	if _GREEK_SYMBOL_RE.fullmatch(core) and not _HAS_LOGIC_TOKENS.search(core):
	notes.append("unquoted_core_is_trivial_symbol")
	core = None
	extras = []
	return ParsedInput(
	raw=raw,
	core_formula=core,
	extra_formulas=extras,
	context_text=raw.strip(),
	parse_notes=notes,
	)

	return ParsedInput(
	raw=raw,
	core_formula=None,
	extra_formulas=[],
	context_text=raw.strip(),
	parse_notes=notes,
	)


	def ui_rule_text_ja() -> str:
	return (
	"【入力ルール】\n"
	"・式らしい部分を自動抽出します（必要なら引用してもOK）。\n"
	"・括弧・矢印・論理記号は正しく入力してください。\n"
	'例: ((A -> B) & A) -> B / []p -> [][]p / dim Sym(n,R)'
	)


	def ui_rule_text_en() -> str:
	return (
	"Rule: We auto-extract formula-like parts (quoting is optional).\n"
	"Use correct operators and parentheses.\n"
	"Examples: ((A -> B) & A) -> B, []p -> [][]p, dim Sym(n,R)"
	)


	def detect_modal_assumptions(text: str) -> List[str]:
	t = (text or "").lower()
	out: List[str] = []
	for tag, keys in _ASSUME_JA.items():
	for k in keys:
	if k.lower() in t:
	out.append(tag)
	break
	return sorted(set(out))