From 833d81f2b010ecaafac935d21a3f15f80cce1e25 Mon Sep 17 00:00:00 2001 From: adelon <22380201+adelon@users.noreply.github.com> Date: Thu, 14 Aug 2025 02:49:20 +0200 Subject: Improve scanning Fixes scanning of relation symbols and adds a few error cases for function symbols. --- source/Syntax/Adapt.hs | 56 +++++++++++++++++++------------- test/examples/abbr.tex | 2 +- test/examples/relparam.tex | 8 +++++ test/golden/abbr/encoding tasks.golden | 2 +- test/golden/abbr/generating tasks.golden | 26 ++++++++++----- test/golden/abbr/glossing.golden | 38 +++++++++++++--------- test/golden/abbr/parsing.golden | 41 ++++++++++++----------- test/golden/abbr/tokenizing.golden | 7 ++++ test/golden/relparam/scanning.golden | 4 +++ test/golden/relparam/tokenizing.golden | 38 ++++++++++++++++++++++ 10 files changed, 153 insertions(+), 69 deletions(-) create mode 100644 test/examples/relparam.tex create mode 100644 test/golden/relparam/scanning.golden create mode 100644 test/golden/relparam/tokenizing.golden diff --git a/source/Syntax/Adapt.hs b/source/Syntax/Adapt.hs index 1b5a237..77f80d6 100644 --- a/source/Syntax/Adapt.hs +++ b/source/Syntax/Adapt.hs @@ -129,9 +129,9 @@ head = ScanNoun <$> noun <|> ScanPrefixPredicate <$> prefixPredicate sigPred :: RE Token (Marker -> ScannedLexicalItem) -sigPred = ScanNoun . toLexicalPhrase <$> (var *> can *> be *> an *> pat <* iff) - <|> ScanAdj . toLexicalPhrase <$> (var *> can *> be *> pat <* iff) - -- <|> ScanVerbInfinitive . toLexicalPhrase <$> (var *> can *> pat <* iff) +sigPred = ScanNoun . toLexicalPhrase <$> (math var *> can *> be *> an *> pat <* iff) + <|> ScanAdj . toLexicalPhrase <$> (math var *> can *> be *> pat <* iff) + -- <|> ScanVerbInfinitive . toLexicalPhrase <$> (math var *> can *> pat <* iff) inductive :: RE Token [ScannedLexicalItem] inductive = do @@ -151,7 +151,7 @@ struct = do few anySym m <- label few anySym - lexicalItem <- ScanStructNoun . toLexicalPhrase <$> (an *> structPat <* var) + lexicalItem <- ScanStructNoun . toLexicalPhrase <$> (an *> structPat <* math var) few anySym lexicalItems <- structOps <|> pure [] sym (EndEnv "struct") @@ -173,26 +173,28 @@ structOp = do pure (ScanStructOp op) noun :: RE Token LexicalPhrase -noun = toLexicalPhrase <$> (var *> is *> an *> pat <* iff) +noun = toLexicalPhrase <$> (math var *> is *> an *> pat <* iff) adj :: RE Token LexicalPhrase -adj = toLexicalPhrase <$> (var *> is *> pat <* iff) +adj = toLexicalPhrase <$> (math var *> is *> pat <* iff) verb :: RE Token LexicalPhrase -verb = toLexicalPhrase <$> (var *> pat <* iff) +verb = toLexicalPhrase <$> (math var *> pat <* iff) fun :: RE Token LexicalPhrase fun = toLexicalPhrase <$> (the *> pat <* (is <|> comma)) relationSymbol :: RE Token (RelationSymbol, Int) -relationSymbol = definiendum <* iff - where - definiendum = math do - varSymbol - rel <- symbol - k <- params - varSymbol - pure (rel, k) +relationSymbol = do + beginMath + var + rel <- symbol + k <- params + var + endMath + iff + pure (rel, k) + where params :: RE Token Int params = do vars <- many (sym InvisibleBraceL *> var <* sym InvisibleBraceR) @@ -203,7 +205,12 @@ functionSymbol = do sym (BeginEnv "math") toks <- few nonDefinitionKeyword sym (Symbol "=") - pure (fromToken <$> toks) + pure case toks of + -- TODO proper error messages with more info (location, etc.) + [] -> error "Malformed function pattern: no pattern" + [Variable _] -> error "Malformed function: bare variable. This will cause infinite left recursion in the grammar and cause the parser to hang!" + [Variable _, ParenL, Variable _, ParenR] -> error "Malformed function: redefinition of function application. The notation _(_) is reserved for set-theoretic function application." + _ -> fromToken <$> toks where fromToken = \case Variable _ -> Nothing -- Variables become slots. @@ -225,7 +232,7 @@ prefixPredicate = math prfx <* iff where prfx = do r <- command - args <- many (sym InvisibleBraceL *> varSymbol <* sym InvisibleBraceR) + args <- many (sym InvisibleBraceL *> var <* sym InvisibleBraceR) pure (PrefixPredicate r (length args)) @@ -235,10 +242,7 @@ command = msym \case _ -> Nothing var :: RE Token Token -var = math varSymbol - -varSymbol :: RE Token Token -varSymbol = psym isVar +var = psym isVar nonDefinitionKeyword :: RE Token Token @@ -249,17 +253,23 @@ nonDefinitionKeyword = psym (`notElem` keywords) , Word "iff" , Symbol "=" , Command "iff" + , BeginEnv "math" + , EndEnv "math" ] pat :: RE Token [Token] -pat = many (psym isLexicalPhraseToken <|> var) +pat = many (psym isLexicalPhraseToken <|> math var) structPat :: RE Token [Token] structPat = many (psym isLexicalPhraseToken) +beginMath, endMath :: RE Token () +beginMath = void (sym (BeginEnv "math")) +endMath = void (sym (EndEnv "math")) + math :: RE Token a -> RE Token a -math re = sym (BeginEnv "math") *> re <* sym (EndEnv "math") +math re = beginMath *> re <* endMath -- | We allow /conditional perfection/: the first /@if@/ in a definition is interpreted as /@iff@/. iff :: RE Token () diff --git a/test/examples/abbr.tex b/test/examples/abbr.tex index c4790bb..c81cd59 100644 --- a/test/examples/abbr.tex +++ b/test/examples/abbr.tex @@ -13,7 +13,7 @@ \end{abbreviation} \begin{proposition}\label{dummy_abbr_test_noun} - $x$ is a function. + For all $x$ we have $x$ is a function. \end{proposition} diff --git a/test/examples/relparam.tex b/test/examples/relparam.tex new file mode 100644 index 0000000..958df48 --- /dev/null +++ b/test/examples/relparam.tex @@ -0,0 +1,8 @@ +% Dummy abbreviation to test relation symbols with parameters. +\begin{abbreviation}\label{equalparam} + $x\EQUAL{y} z$ iff $x = z$. +\end{abbreviation} + +\begin{proposition}\label{dummy_abbr_test_noun} + For all $x$ we have $x\EQUAL{y}x$. + \end{proposition} diff --git a/test/golden/abbr/encoding tasks.golden b/test/golden/abbr/encoding tasks.golden index 18ba749..365aa3d 100644 --- a/test/golden/abbr/encoding tasks.golden +++ b/test/golden/abbr/encoding tasks.golden @@ -1,6 +1,6 @@ fof(dummy_abbr_test_adj,conjecture,~?[Xy]:elem(Xy,fx)=>~?[Xy]:elem(Xy,fx)). ------------------ -fof(dummy_abbr_test_noun,conjecture,fx=fx). +fof(dummy_abbr_test_noun,conjecture,![Xx]:Xx=Xx). fof(dummy_abbr_test_adj,axiom,![Xx]:(~?[Xy]:elem(Xy,Xx)=>~?[Xy]:elem(Xy,Xx))). ------------------ fof(dummy_abbr_test_verb,conjecture,fx=fy=>fx=fy). diff --git a/test/golden/abbr/generating tasks.golden b/test/golden/abbr/generating tasks.golden index 397fafe..d438481 100644 --- a/test/golden/abbr/generating tasks.golden +++ b/test/golden/abbr/generating tasks.golden @@ -117,17 +117,25 @@ ) ] , taskConjectureLabel = Marker "dummy_abbr_test_noun" - , taskConjecture = TermSymbol - ( SymbolPredicate - ( PredicateRelation - ( Symbol "=" ) + , taskConjecture = Quantified Universally + ( Scope + ( TermSymbol + ( SymbolPredicate + ( PredicateRelation + ( Symbol "=" ) + ) + ) + [ TermVar + ( B + ( NamedVar "x" ) + ) + , TermVar + ( B + ( NamedVar "x" ) + ) + ] ) ) - [ TermVar - ( NamedVar "x" ) - , TermVar - ( NamedVar "x" ) - ] } , Task { taskDirectness = Direct diff --git a/test/golden/abbr/glossing.golden b/test/golden/abbr/glossing.golden index 152fbe1..0efddcd 100644 --- a/test/golden/abbr/glossing.golden +++ b/test/golden/abbr/glossing.golden @@ -125,25 +125,31 @@ ) ( Marker "dummy_abbr_test_noun" ) ( Lemma [] - ( TermSymbol - ( SymbolPredicate - ( PredicateNoun - ( SgPl - { sg = - [ Just - ( Word "function" ) - ] - , pl = - [ Just - ( Word "functions" ) - ] - } + ( Quantified Universally + ( Scope + ( TermSymbol + ( SymbolPredicate + ( PredicateNoun + ( SgPl + { sg = + [ Just + ( Word "function" ) + ] + , pl = + [ Just + ( Word "functions" ) + ] + } + ) + ) ) + [ TermVar + ( B + ( NamedVar "x" ) + ) + ] ) ) - [ TermVar - ( NamedVar "x" ) - ] ) ) , BlockAbbr diff --git a/test/golden/abbr/parsing.golden b/test/golden/abbr/parsing.golden index 77a4c47..21e4c60 100644 --- a/test/golden/abbr/parsing.golden +++ b/test/golden/abbr/parsing.golden @@ -122,25 +122,28 @@ ) ( Marker "dummy_abbr_test_noun" ) ( Lemma [] - ( StmtNoun - ( TermExpr - ( ExprVar - ( NamedVar "x" ) - ) :| [] - ) NounPhrase ( [] ) - ( Noun - ( SgPl - { sg = - [ Just - ( Word "function" ) - ] - , pl = - [ Just - ( Word "functions" ) - ] - } - ) [] - ) ( Nothing ) ( [] ) ( Nothing ) + ( SymbolicQuantified Universally + ( NamedVar "x" :| [] ) Unbounded Nothing + ( StmtNoun + ( TermExpr + ( ExprVar + ( NamedVar "x" ) + ) :| [] + ) NounPhrase ( [] ) + ( Noun + ( SgPl + { sg = + [ Just + ( Word "function" ) + ] + , pl = + [ Just + ( Word "functions" ) + ] + } + ) [] + ) ( Nothing ) ( [] ) ( Nothing ) + ) ) ) , BlockAbbr diff --git a/test/golden/abbr/tokenizing.golden b/test/golden/abbr/tokenizing.golden index 8da4b07..10b17fb 100644 --- a/test/golden/abbr/tokenizing.golden +++ b/test/golden/abbr/tokenizing.golden @@ -56,6 +56,13 @@ , EndEnv "abbreviation" , BeginEnv "proposition" , Label "dummy_abbr_test_noun" +, Word "for" +, Word "all" +, BeginEnv "math" +, Variable "x" +, EndEnv "math" +, Word "we" +, Word "have" , BeginEnv "math" , Variable "x" , EndEnv "math" diff --git a/test/golden/relparam/scanning.golden b/test/golden/relparam/scanning.golden new file mode 100644 index 0000000..7c72985 --- /dev/null +++ b/test/golden/relparam/scanning.golden @@ -0,0 +1,4 @@ +[ ScanRelationSymbol + ( Command "EQUAL" ) + ( Marker "equalparam" ) +] \ No newline at end of file diff --git a/test/golden/relparam/tokenizing.golden b/test/golden/relparam/tokenizing.golden new file mode 100644 index 0000000..95290d5 --- /dev/null +++ b/test/golden/relparam/tokenizing.golden @@ -0,0 +1,38 @@ +[ BeginEnv "abbreviation" +, Label "equalparam" +, BeginEnv "math" +, Variable "x" +, Command "EQUAL" +, InvisibleBraceL +, Variable "y" +, InvisibleBraceR +, Variable "z" +, EndEnv "math" +, Word "iff" +, BeginEnv "math" +, Variable "x" +, Symbol "=" +, Variable "z" +, EndEnv "math" +, Symbol "." +, EndEnv "abbreviation" +, BeginEnv "proposition" +, Label "dummy_abbr_test_noun" +, Word "for" +, Word "all" +, BeginEnv "math" +, Variable "x" +, EndEnv "math" +, Word "we" +, Word "have" +, BeginEnv "math" +, Variable "x" +, Command "EQUAL" +, InvisibleBraceL +, Variable "y" +, InvisibleBraceR +, Variable "x" +, EndEnv "math" +, Symbol "." +, EndEnv "proposition" +] \ No newline at end of file -- cgit v1.2.3