summaryrefslogtreecommitdiff
path: root/source/Syntax
diff options
context:
space:
mode:
authoradelon <22380201+adelon@users.noreply.github.com>2024-02-10 02:22:14 +0100
committeradelon <22380201+adelon@users.noreply.github.com>2024-02-10 02:22:14 +0100
commit442d732696ad431b84f6e5c72b6ee785be4fd968 (patch)
treeb476f395e7e91d67bacb6758bc84914b8711593f /source/Syntax
Initial commit
Diffstat (limited to 'source/Syntax')
-rw-r--r--source/Syntax/Abstract.hs468
-rw-r--r--source/Syntax/Adapt.hs382
-rw-r--r--source/Syntax/Chunk.hs46
-rw-r--r--source/Syntax/Concrete.hs657
-rw-r--r--source/Syntax/Concrete/Keywords.hs222
-rw-r--r--source/Syntax/Import.hs43
-rw-r--r--source/Syntax/Internal.hs612
-rw-r--r--source/Syntax/LexicalPhrase.hs93
-rw-r--r--source/Syntax/Lexicon.hs275
-rw-r--r--source/Syntax/Token.hs438
10 files changed, 3236 insertions, 0 deletions
diff --git a/source/Syntax/Abstract.hs b/source/Syntax/Abstract.hs
new file mode 100644
index 0000000..6f30612
--- /dev/null
+++ b/source/Syntax/Abstract.hs
@@ -0,0 +1,468 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE StandaloneDeriving #-}
+
+
+-- | Data types for the abstract syntax tree and helper functions
+-- for constructing the lexicon.
+--
+module Syntax.Abstract
+ ( module Syntax.Abstract
+ , module Syntax.LexicalPhrase
+ , module Syntax.Token
+ ) where
+
+
+import Base
+import Syntax.LexicalPhrase (LexicalPhrase, SgPl(..), unsafeReadPhraseSgPl, unsafeReadPhrase)
+import Syntax.Token (Token(..))
+
+import Text.Earley.Mixfix (Holey)
+import Data.Text qualified as Text
+import Text.Megaparsec.Pos (SourcePos)
+
+-- | Local "variable-like" symbols that can be captured by binders.
+data VarSymbol
+ = NamedVar Text -- ^ A named variable.
+ | FreshVar Int -- ^ A nameless (implicit) variable. Should only come from desugaring.
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+instance IsString VarSymbol where
+ fromString v = NamedVar $ Text.pack v
+
+
+data Expr
+ = ExprVar VarSymbol
+ | ExprInteger Int
+ | ExprOp FunctionSymbol [Expr]
+ | ExprStructOp StructSymbol (Maybe Expr)
+ | ExprFiniteSet (NonEmpty Expr)
+ | ExprSep VarSymbol Expr Stmt
+ -- ^ Of the form /@{x ∈ X | P(x)}@/.
+ | ExprReplace Expr (NonEmpty (VarSymbol,Expr)) (Maybe Stmt)
+ -- ^ E.g.: /@{ f(x, y) | x ∈ X, y ∈ Y | P(x, y) }@/.
+ | ExprReplacePred VarSymbol VarSymbol Expr Stmt
+ -- ^ E.g.: /@{ y | \\exists x\\in X. P(x, y) }@/.
+ deriving (Show, Eq, Ord)
+
+
+type FunctionSymbol = Holey Token
+
+type RelationSymbol = Token
+
+newtype StructSymbol = StructSymbol { unStructSymbol :: Text } deriving newtype (Show, Eq, Ord, Hashable)
+
+-- | The predefined @cons@ function symbol used for desugaring finite set expressions.
+pattern ConsSymbol :: FunctionSymbol
+pattern ConsSymbol = [Just (Command "cons"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR, Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+-- | The predefined @pair@ function symbol used for desugaring tuple notation..
+pattern PairSymbol :: FunctionSymbol
+pattern PairSymbol = [Just (Command "pair"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR, Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+-- | Function application /@f(x)@/ desugars to /@\apply{f}{x}@/.
+pattern ApplySymbol :: FunctionSymbol
+pattern ApplySymbol = [Just (Command "apply"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR, Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+pattern DomSymbol :: FunctionSymbol
+pattern DomSymbol = [Just (Command "dom"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+pattern CarrierSymbol :: StructSymbol
+pattern CarrierSymbol = StructSymbol "carrier"
+
+pattern ExprConst :: Token -> Expr
+pattern ExprConst c = ExprOp [Just c] []
+
+pattern ExprApp :: Expr -> Expr -> Expr
+pattern ExprApp e1 e2 = ExprOp ApplySymbol [e1, e2]
+
+pattern ExprPair :: Expr -> Expr -> Expr
+pattern ExprPair e1 e2 = ExprOp PairSymbol [e1, e2]
+
+-- | Tuples are interpreted as nested pairs:
+-- the triple /@(a, b, c)@/ is interpreted as
+-- /@(a, (b, c))@/.
+-- This means that the product operation should also
+-- be right associative, so that /@(a, b, c)@/ can
+-- form elements of /@A\times B\times C@/.
+makeTuple :: NonEmpty Expr -> Expr
+makeTuple = foldr1 ExprPair
+
+
+data Chain
+ = ChainBase (NonEmpty Expr) Sign Relation (NonEmpty Expr)
+ | ChainCons (NonEmpty Expr) Sign Relation Chain
+ deriving (Show, Eq, Ord)
+
+data Relation
+ = RelationSymbol RelationSymbol -- ^ E.g.: /@x \in X@/
+ | RelationExpr Expr -- ^ E.g.: /@x \mathrel{R} y@/
+ deriving (Show, Eq, Ord)
+
+data Sign = Positive | Negative deriving (Show, Eq, Ord)
+
+data Formula
+ = FormulaChain Chain
+ | FormulaPredicate PrefixPredicate (NonEmpty Expr)
+ | Connected Connective Formula Formula
+ | FormulaNeg Formula
+ | FormulaQuantified Quantifier (NonEmpty VarSymbol) Bound Formula
+ | PropositionalConstant PropositionalConstant
+ deriving (Show, Eq, Ord)
+
+data PropositionalConstant = IsBottom | IsTop
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+data PrefixPredicate
+ = PrefixPredicate Text Int
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+data Connective
+ = Conjunction
+ | Disjunction
+ | Implication
+ | Equivalence
+ | ExclusiveOr
+ | NegatedDisjunction
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+
+makeConnective :: Holey Token -> [Formula] -> Formula
+makeConnective [Nothing, Just (Command "implies"), Nothing] [f1, f2] = Connected Implication f1 f2
+makeConnective [Nothing, Just (Command "land"), Nothing] [f1, f2] = Connected Conjunction f1 f2
+makeConnective [Nothing, Just (Command "lor"), Nothing] [f1, f2] = Connected Disjunction f1 f2
+makeConnective [Nothing, Just (Command "iff"), Nothing] [f1, f2] = Connected Equivalence f1 f2
+makeConnective [Just(Command "lnot"), Nothing] [f1] = FormulaNeg f1
+makeConnective pat _ = error ("makeConnective does not handle the following connective correctly: " <> show pat)
+
+
+
+type StructPhrase = SgPl LexicalPhrase
+
+-- | For example 'an integer' would be
+-- > Noun (unsafeReadPhrase "integer[/s]") []
+type Noun = NounOf Term
+data NounOf a
+ = Noun (SgPl LexicalPhrase) [a]
+ deriving (Show, Eq, Ord)
+
+
+
+
+type NounPhrase t = NounPhraseOf t Term
+-- NOTE: 'NounPhraseOf' is only used with arguments of type 'Term',
+-- but keeping the argument parameter @a@ allows the 'Show' and 'Eq'
+-- instances to remain decidable.
+data NounPhraseOf t a
+ = NounPhrase [AdjLOf a] (NounOf a) (t VarSymbol) [AdjROf a] (Maybe Stmt)
+
+instance (Show a, Show (t VarSymbol)) => Show (NounPhraseOf t a) where
+ show (NounPhrase ls n vs rs ms) =
+ "NounPhrase ("
+ <> show ls <> ") ("
+ <> show n <> ") ("
+ <> show vs <> ") ("
+ <> show rs <> ") ("
+ <> show ms <> ")"
+
+instance (Eq a, Eq (t VarSymbol)) => Eq (NounPhraseOf t a) where
+ NounPhrase ls n vs rs ms == NounPhrase ls' n' vs' rs' ms' =
+ ls == ls' && n == n' && vs == vs' && rs == rs' && ms == ms'
+
+-- This is arbitrary and ugly, but it's useful to have a somewhat
+-- usable Ord instance for all raw syntax (e.g. for 'nubOrd').
+instance (Ord a, Ord (t VarSymbol)) => Ord (NounPhraseOf t a) where
+ NounPhrase ls n vs rs ms `compare` NounPhrase ls' n' vs' rs' ms' =
+ case compare n n' of
+ GT -> case compare ls ls' of
+ GT -> case compare rs rs' of
+ GT -> case compare ms ms' of
+ GT -> compare vs vs'
+ ordering -> ordering
+ ordering -> ordering
+ ordering -> ordering
+ ordering -> ordering
+
+-- | @Nameless a@ is quivalent to @Const () a@ (from "Data.Functor.Const").
+-- It describes a container that is unwilling to actually contain something.
+-- @Nameless@ lets us treat nouns with no names, one name, or many names uniformly.
+-- Thus @NounPhraseOf Nameless a@ is a noun phrase without a name and with arguments
+-- of type @a@.
+data Nameless a = Nameless deriving (Show, Eq, Ord)
+
+
+-- | Left adjectives modify nouns from the left side,
+-- e.g. /@even@/, /@continuous@/, and /@σ-finite@/.
+type AdjL = AdjLOf Term
+data AdjLOf a
+ = AdjL LexicalPhrase [a]
+ deriving (Show, Eq, Ord)
+
+
+-- | Right attributes consist of basic right adjectives, e.g.
+-- /@divisible by ?@/, or /@of finite type@/ and verb phrases
+-- marked with /@that@/, such as /@integer that divides n@/.
+-- In some cases these right attributes may be followed
+-- by an additional such-that phrase.
+type AdjR = AdjROf Term
+data AdjROf a
+ = AdjR LexicalPhrase [a]
+ | AttrRThat VerbPhrase
+ deriving (Show, Eq, Ord)
+
+
+-- | Adjectives for parts of the AST where adjectives are not used
+-- to modify nouns and the L/R distinction does not matter, such as
+-- when then are used together with a copula (like /@n is even@/).
+type Adj = AdjOf Term
+data AdjOf a
+ = Adj LexicalPhrase [a]
+ deriving (Show, Eq, Ord)
+
+
+type Verb = VerbOf Term
+data VerbOf a
+ = Verb (SgPl LexicalPhrase) [a]
+ deriving (Show, Eq, Ord)
+
+
+type Fun = FunOf Term
+data FunOf a
+ = Fun (SgPl LexicalPhrase) [a]
+ deriving (Show, Eq, Ord)
+
+
+type VerbPhrase = VerbPhraseOf Term
+data VerbPhraseOf a
+ = VPVerb (VerbOf a)
+ | VPAdj (NonEmpty (AdjOf a)) -- ^ @x is foo@ / @x is foo and bar@
+ | VPVerbNot (VerbOf a)
+ | VPAdjNot (NonEmpty (AdjOf a)) -- ^ @x is not foo@ / @x is neither foo nor bar@
+ deriving (Show, Eq, Ord)
+
+
+data Quantifier
+ = Universally
+ | Existentially
+ | Nonexistentially
+ deriving (Show, Eq, Ord)
+
+data QuantPhrase = QuantPhrase Quantifier (NounPhrase []) deriving (Show, Eq, Ord)
+
+
+data Term
+ = TermExpr Expr
+ -- ^ A symbolic expression.
+ | TermFun Fun
+ -- ^ Definite noun phrase, e.g. /@the derivative of $f$@/.
+ | TermIota VarSymbol Stmt
+ -- ^ Definite descriptor, e.g. /@an $x$ such that ...@//
+ | TermQuantified Quantifier (NounPhrase Maybe)
+ -- ^ Indefinite quantified notion, e.g. /@every even integer that divides $k$ ...@/.
+ deriving (Show, Eq, Ord)
+
+
+data Stmt
+ = StmtFormula Formula -- ^ E.g.: /@We have \<Formula\>@/.
+ | StmtVerbPhrase (NonEmpty Term) VerbPhrase -- ^ E.g.: /@\<Term\> and \<Term\> \<verb\>@/.
+ | StmtNoun Term (NounPhrase Maybe) -- ^ E.g.: /@\<Term\> is a(n) \<NP\>@/.
+ | StmtStruct Term StructPhrase
+ | StmtNeg Stmt -- ^ E.g.: /@It is not the case that \<Stmt\>@/.
+ | StmtExists (NounPhrase []) -- ^ E.g.: /@There exists a(n) \<NP\>@/.
+ | StmtConnected Connective Stmt Stmt
+ | StmtQuantPhrase QuantPhrase Stmt
+ | SymbolicQuantified Quantifier (NonEmpty VarSymbol) Bound (Maybe Stmt) Stmt
+ deriving (Show, Eq, Ord)
+
+data Bound = Unbounded | Bounded Sign Relation Expr deriving (Show, Eq, Ord)
+
+pattern SymbolicForall :: NonEmpty VarSymbol -> Bound -> Maybe Stmt -> Stmt -> Stmt
+pattern SymbolicForall vs bound suchThat have = SymbolicQuantified Universally vs bound suchThat have
+
+pattern SymbolicExists :: NonEmpty VarSymbol -> Bound -> Stmt -> Stmt
+pattern SymbolicExists vs bound suchThat = SymbolicQuantified Existentially vs bound Nothing suchThat
+
+pattern SymbolicNotExists :: NonEmpty VarSymbol -> Bound -> Stmt -> Stmt
+pattern SymbolicNotExists vs bound suchThat = StmtNeg (SymbolicExists vs bound suchThat)
+
+data Asm
+ = AsmSuppose Stmt
+ | AsmLetNoun (NonEmpty VarSymbol) (NounPhrase Maybe) -- ^ E.g.: /@let k be an integer@/
+ | AsmLetIn (NonEmpty VarSymbol) Expr -- ^ E.g.: /@let $k\in\integers$@/
+ | AsmLetThe VarSymbol Fun -- ^ E.g.: /@let $g$ be the derivative of $f$@/
+ | AsmLetEq VarSymbol Expr -- ^ E.g.: /@let $m = n + k$@/
+ | AsmLetStruct VarSymbol StructPhrase -- ^ E.g.: /@let $A$ be a monoid@/
+ deriving (Show, Eq, Ord)
+
+data Axiom = Axiom [Asm] Stmt
+ deriving (Show, Eq, Ord)
+
+data Lemma = Lemma [Asm] Stmt
+ deriving (Show, Eq, Ord)
+
+-- | The head of the definition describes the part before the /@iff@/,
+-- i.e. the definiendum. An optional noun-phrase corresponds to an optional
+-- type annotation for the 'Term' of the head. The last part of the head
+-- is the lexical phrase that is defined.
+--
+-- > "A natural number $n$ divides $m$ iff ..."
+-- > ^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^ ^^^
+-- > type annotation variable verb definiens
+-- > (a noun phrase) (all args are vars) (a statement)
+--
+data DefnHead
+ = DefnAdj (Maybe (NounPhrase Maybe)) VarSymbol (AdjOf VarSymbol)
+ | DefnVerb (Maybe (NounPhrase Maybe)) VarSymbol (VerbOf VarSymbol)
+ | DefnNoun VarSymbol (NounOf VarSymbol)
+ | DefnSymbolicPredicate PrefixPredicate (NonEmpty VarSymbol)
+ | DefnRel VarSymbol RelationSymbol VarSymbol
+ -- ^ E.g.: /@$x \subseteq y$ iff [...@/
+ deriving (Show, Eq, Ord)
+
+data Defn
+ = Defn [Asm] DefnHead Stmt
+ | DefnFun [Asm] (FunOf VarSymbol) (Maybe Term) Term
+ -- ^ A 'DefnFun' consists of the functional noun (which must start with /@the@/)
+ -- and an optional specification of a symbolic equivalent. The symbolic equivalent
+ -- does not need to have the same variables as the full functional noun pattern.
+ --
+ -- > "The tensor product of $U$ and $V$ over $K$, $U\tensor V$, is ..."
+ -- > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^
+ -- > definiendum symbolic eqv. definiens
+ -- > (a functional noun) (an exression) (a term)
+ --
+ | DefnOp SymbolPattern Expr
+ deriving (Show, Eq, Ord)
+
+data Proof
+ = Omitted
+ | Qed Justification
+ -- ^ Ends of a proof, leaving automation to discharge the current goal using the given justification.
+ | ByCase [Case]
+ | ByContradiction Proof
+ | BySetInduction (Maybe Term) Proof
+ -- ^ ∈-induction.
+ | ByOrdInduction Proof
+ -- ^ Transfinite induction for ordinals.
+ | Assume Stmt Proof
+ | FixSymbolic (NonEmpty VarSymbol) Bound Proof
+ | FixSuchThat (NonEmpty VarSymbol) Stmt Proof
+ | Calc Calc Proof
+ -- ^ Simplify goals that are implications or disjunctions.
+ | TakeVar (NonEmpty VarSymbol) Bound Stmt Justification Proof
+ | TakeNoun (NounPhrase []) Justification Proof
+ | Have (Maybe Stmt) Stmt Justification Proof
+ -- ^ /@Since \<stmt\>, we have \<stmt\> by \<ref\>.@/
+ | Suffices Stmt Justification Proof
+ -- ^ /@It suffices to show that [...]. [...]@/
+ | Subclaim Stmt Proof Proof
+ -- ^ A claim is a sublemma with its own proof:
+ -- /@Show \<goal stmt\>. \<steps\>. \<continue other proof\>.@/
+ | Define VarSymbol Expr Proof
+ -- ^ Local definition.
+ --
+ | DefineFunction VarSymbol VarSymbol Expr VarSymbol Expr Proof
+ -- ^ Local function definition, e.g. /@Let $f(x) = e$ for $x\\in d$@/.
+ -- The first 'VarSymbol' is the newly defined symbol, the second one is the argument.
+ -- The first 'Expr' is the value, the final variable and expr specify a bound (the domain of the function).
+ deriving (Show, Eq, Ord)
+
+-- | An inline justification.
+data Justification
+ = JustificationRef (NonEmpty Marker)
+ | JustificationSetExt
+ | JustificationEmpty
+ | JustificationLocal -- ^ Use only local assumptions
+ deriving (Show, Eq, Ord)
+
+
+-- | A case of a case split.
+data Case = Case
+ { caseOf :: Stmt
+ , caseProof :: Proof
+ } deriving (Show, Eq, Ord)
+
+data Calc
+ = Equation Expr (NonEmpty (Expr, Justification))
+ -- ^ A chain of equalities. Each claimed equality has a (potentially empty) justification.
+ -- For example: @a &= b \\explanation{by \\cref{a_eq_b}} &= c@
+ -- would be (modulo expr constructors)
+ -- @Equation "a" [("b", JustificationRef "a_eq_b"), ("c", JustificationEmpty)]@.
+ | Biconditionals Formula (NonEmpty (Formula, Justification))
+ deriving (Show, Eq, Ord)
+
+
+data Abbreviation
+ = AbbreviationAdj VarSymbol (AdjOf VarSymbol) Stmt
+ | AbbreviationVerb VarSymbol (VerbOf VarSymbol) Stmt
+ | AbbreviationNoun VarSymbol (NounOf VarSymbol) Stmt
+ | AbbreviationRel VarSymbol RelationSymbol VarSymbol Stmt
+ | AbbreviationFun (FunOf VarSymbol) Term
+ | AbbreviationEq SymbolPattern Expr
+ deriving (Show, Eq, Ord)
+
+data Datatype
+ = DatatypeFin (NounOf Term) (NonEmpty Text)
+ deriving (Show, Eq, Ord)
+
+data Inductive = Inductive
+ { inductiveSymbolPattern :: SymbolPattern
+ , inductiveDomain :: Expr
+ , inductiveIntros :: NonEmpty IntroRule
+ }
+ deriving (Show, Eq, Ord)
+
+data IntroRule = IntroRule
+ { introConditions :: [Formula] -- The inductively defined set may only appear as an argument of monotone operations on the rhs.
+ , introResult :: Formula -- TODO Refine.
+ }
+ deriving (Show, Eq, Ord)
+
+
+data SymbolPattern = SymbolPattern FunctionSymbol [VarSymbol]
+ deriving (Show, Eq, Ord)
+
+data Signature
+ = SignatureAdj VarSymbol (AdjOf VarSymbol)
+ | SignatureVerb VarSymbol (VerbOf VarSymbol)
+ | SignatureNoun VarSymbol (NounOf VarSymbol)
+ | SignatureSymbolic SymbolPattern (NounPhrase Maybe)
+ -- ^ /@$\<symbol\>(\<vars\>)$ is a \<noun\>@/
+ deriving (Show, Eq, Ord)
+
+
+data StructDefn = StructDefn
+ { structPhrase :: StructPhrase
+ -- ^ E.g.: @partial order@ or @abelian group@.\
+ , structParents :: [StructPhrase]
+ -- ^ Structural parents
+ , structLabel :: VarSymbol
+ , structFixes :: [StructSymbol]
+ -- ^ List of text for commands representing constants not inherited from its parents,
+ -- e.g.: @\sqsubseteq@ or @\inv@.
+ , structAssumes :: [(Marker, Stmt)]
+ }
+ deriving (Show, Eq, Ord)
+
+newtype Marker = Marker Text deriving (Show, Eq, Ord, Generic)
+
+deriving newtype instance Hashable Marker
+
+instance IsString Marker where
+ fromString str = Marker (Text.pack str)
+
+data Block
+ = BlockAxiom SourcePos Marker Axiom
+ | BlockLemma SourcePos Marker Lemma
+ | BlockProof SourcePos Proof
+ | BlockDefn SourcePos Marker Defn
+ | BlockAbbr SourcePos Marker Abbreviation
+ | BlockData SourcePos Datatype
+ | BlockInductive SourcePos Marker Inductive
+ | BlockSig SourcePos [Asm] Signature
+ | BlockStruct SourcePos Marker StructDefn
+ deriving (Show, Eq, Ord)
diff --git a/source/Syntax/Adapt.hs b/source/Syntax/Adapt.hs
new file mode 100644
index 0000000..0896c08
--- /dev/null
+++ b/source/Syntax/Adapt.hs
@@ -0,0 +1,382 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE ApplicativeDo #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Syntax.Adapt where
+
+import Base
+import Syntax.Abstract
+import Syntax.Lexicon
+import Syntax.Token
+
+import Data.Map.Strict qualified as Map
+import Data.Maybe (catMaybes)
+import Data.Set qualified as Set
+import Data.Sequence qualified as Seq
+import Data.HashSet qualified as HS
+import Data.HashMap.Strict qualified as HM
+import Data.Text qualified as Text
+import Text.Earley.Mixfix (Associativity(..))
+import Text.Regex.Applicative
+import Text.Megaparsec.Pos
+
+
+scanChunk :: [Located Token] -> [ScannedLexicalItem]
+scanChunk ltoks =
+ let toks = unLocated <$> ltoks
+ matchOrErr re env pos = match re toks ?? error ("could not find lexical pattern in " <> env <> " at " <> sourcePosPretty pos)
+ in case ltoks of
+ Located pos (BeginEnv "definition") : _ -> matchOrErr definition "definition" pos
+ Located pos (BeginEnv "abbreviation") : _ -> matchOrErr abbreviation "abbreviation" pos
+ Located pos (BeginEnv "struct") :_ -> matchOrErr struct "struct definition" pos
+ Located pos (BeginEnv "inductive") :_ -> matchOrErr inductive "inductive definition" pos
+ _ -> []
+
+adaptChunks :: [[Located Token]] -> Lexicon -> Lexicon
+adaptChunks = extendLexicon . concatMap scanChunk
+
+data ScannedLexicalItem
+ = ScanAdj LexicalPhrase Marker
+ | ScanFun LexicalPhrase Marker
+ | ScanNoun LexicalPhrase Marker
+ | ScanStructNoun LexicalPhrase Marker
+ | ScanVerb LexicalPhrase Marker
+ | ScanRelationSymbol RelationSymbol Marker
+ | ScanFunctionSymbol FunctionSymbol Marker
+ | ScanPrefixPredicate PrefixPredicate Marker
+ | ScanStructOp Text -- we an use the command text as export name.
+ deriving (Show, Eq)
+
+skipUntilNextLexicalEnv :: RE Token [Token]
+skipUntilNextLexicalEnv = many (psym otherToken)
+ where
+ otherToken tok = tok /= BeginEnv "definition" && tok /= BeginEnv "struct" && tok /= BeginEnv "abbreviation"
+
+notEndOfLexicalEnvToken :: RE Token Token
+notEndOfLexicalEnvToken = psym innerToken
+ where
+ innerToken tok = tok /= EndEnv "definition" && tok /= EndEnv "struct" && tok /= EndEnv "abbreviation"
+
+definition :: RE Token [ScannedLexicalItem]
+definition = do
+ sym (BeginEnv "definition")
+ few notEndOfLexicalEnvToken
+ m <- label
+ few anySym
+ lexicalItem <- head
+ few anySym
+ sym (EndEnv "definition")
+ skipUntilNextLexicalEnv
+ pure [lexicalItem m]
+
+abbreviation :: RE Token [ScannedLexicalItem]
+abbreviation = do
+ sym (BeginEnv "abbreviation")
+ few anySym
+ m <- label
+ few anySym
+ lexicalItem <- head
+ few anySym
+ sym (EndEnv "abbreviation")
+ skipUntilNextLexicalEnv
+ pure [lexicalItem m]
+
+label :: RE Token Marker
+label = msym \case
+ Label m -> Just (Marker m)
+ _ -> Nothing
+
+-- | 'RE' that matches the head of a definition.
+head :: RE Token (Marker -> ScannedLexicalItem)
+-- Note that @<|>@ is left biased for 'RE', so we can just
+-- place 'adj' before 'verb' and do not have to worry about
+-- overlapping patterns.
+head = ScanNoun <$> noun
+ <|> ScanAdj <$> adj
+ <|> ScanVerb <$> verb
+ <|> ScanFun <$> fun
+ <|> ScanRelationSymbol <$> relationSymbol
+ <|> ScanFunctionSymbol <$> functionSymbol
+ <|> ScanPrefixPredicate <$> prefixPredicate
+
+inductive :: RE Token [ScannedLexicalItem]
+inductive = do
+ sym (BeginEnv "inductive")
+ few notEndOfLexicalEnvToken
+ m <- label
+ few anySym
+ lexicalItem <- functionSymbolInductive
+ few anySym
+ sym (EndEnv "inductive")
+ skipUntilNextLexicalEnv
+ pure [ScanFunctionSymbol lexicalItem m]
+
+struct :: RE Token [ScannedLexicalItem]
+struct = do
+ sym (BeginEnv "struct")
+ few anySym
+ m <- label
+ few anySym
+ lexicalItem <- ScanStructNoun . toLexicalPhrase <$> (an *> structPat <* var)
+ few anySym
+ lexicalItems <- structOps <|> pure []
+ sym (EndEnv "struct")
+ skipUntilNextLexicalEnv
+ pure (lexicalItem m : lexicalItems)
+
+structOps :: RE Token [ScannedLexicalItem]
+structOps = do
+ sym (BeginEnv "enumerate")
+ lexicalItems <- many structOp
+ sym (EndEnv "enumerate")
+ few anySym
+ pure lexicalItems
+
+structOp :: RE Token ScannedLexicalItem
+structOp = do
+ sym (Command "item")
+ op <- math command
+ pure (ScanStructOp op)
+
+noun :: RE Token LexicalPhrase
+noun = toLexicalPhrase <$> (var *> is *> an *> pat <* iff)
+
+adj :: RE Token LexicalPhrase
+adj = toLexicalPhrase <$> (var *> is *> pat <* iff)
+
+verb :: RE Token LexicalPhrase
+verb = toLexicalPhrase <$> (var *> pat <* iff)
+
+fun :: RE Token LexicalPhrase
+fun = toLexicalPhrase <$> (the *> pat <* (is <|> comma))
+
+relationSymbol :: RE Token RelationSymbol
+relationSymbol = math relator' <* iff
+ where
+ relator' = do
+ varSymbol
+ rel <- symbol
+ varSymbol
+ pure rel
+
+functionSymbol :: RE Token FunctionSymbol
+functionSymbol = do
+ sym (BeginEnv "math")
+ toks <- few nonDefinitionKeyword
+ sym (Symbol "=")
+ pure (fromToken <$> toks)
+ where
+ fromToken = \case
+ Variable _ -> Nothing -- Variables become slots.
+ tok -> Just tok -- Everything else is part of the pattern.
+
+functionSymbolInductive :: RE Token FunctionSymbol
+functionSymbolInductive = do
+ sym (BeginEnv "math")
+ toks <- few nonDefinitionKeyword
+ sym (Command "subseteq")
+ pure (fromToken <$> toks)
+ where
+ fromToken = \case
+ Variable _ -> Nothing -- Variables become slots.
+ tok -> Just tok -- Everything else is part of the pattern.
+
+prefixPredicate :: RE Token PrefixPredicate
+prefixPredicate = math prfx <* iff
+ where
+ prfx = do
+ r <- command
+ args <- many (sym InvisibleBraceL *> varSymbol <* sym InvisibleBraceR)
+ pure (PrefixPredicate r (length args))
+
+
+command :: RE Token Text
+command = msym \case
+ Command cmd -> Just cmd
+ _ -> Nothing
+
+var :: RE Token Token
+var = math varSymbol
+
+varSymbol :: RE Token Token
+varSymbol = psym isVar
+
+
+nonDefinitionKeyword :: RE Token Token
+nonDefinitionKeyword = psym (`notElem` keywords)
+ where
+ keywords =
+ [ Word "if"
+ , Word "iff"
+ , Symbol "="
+ , Command "iff"
+ ]
+
+
+pat :: RE Token [Token]
+pat = many (psym isLexicalPhraseToken <|> var)
+
+structPat :: RE Token [Token]
+structPat = many (psym isLexicalPhraseToken)
+
+math :: RE Token a -> RE Token a
+math re = sym (BeginEnv "math") *> re <* sym (EndEnv "math")
+
+-- | We allow /conditional perfection/: the first /@if@/ in a definition is interpreted as /@iff@/.
+iff :: RE Token ()
+iff = void (sym (Word "if")) -- Using @void@ is faster (only requires recognition).
+ <|> void (sym (Word "iff"))
+ <|> void (string [Word "if", Word "and", Word "only", Word "if"])
+ <|> void (sym (Word "denote"))
+ <|> void (sym (Word "stand") *> sym (Word "for"))
+{-# INLINE iff #-}
+
+an :: RE Token ()
+an = void (sym (Word "a"))
+ <|> void (sym (Word "an"))
+{-# INLINE an #-}
+
+is :: RE Token ()
+is = void (sym (Word "is") <|> sym (Word "denotes"))
+{-# INLINE is #-}
+
+the :: RE Token ()
+the = void (sym (Word "the"))
+{-# INLINE the #-}
+
+comma :: RE Token ()
+comma = void (sym (Symbol ","))
+{-# INLINE comma #-}
+
+
+isVar :: Token -> Bool
+isVar = \case
+ Variable _ -> True
+ _token -> False
+
+isCommand :: Token -> Bool
+isCommand = \case
+ Command _ -> True
+ _token -> False
+
+isNotDefnToken :: Token -> Bool
+isNotDefnToken = \case
+ BeginEnv "definition" -> False
+ EndEnv "definition" -> False
+ _token -> True
+
+isLexicalPhraseToken :: Token -> Bool
+isLexicalPhraseToken = \case
+ Word w -> w `Set.notMember` keywords
+ --
+ -- Simple commands (outside of math-mode) are allowed. This is useful
+ -- for defining lexical phrases containing symbolic expressions such as
+ -- `X is \Ttwo{}`, where `\Ttwo` is a macro that expands to `T_2`.
+ -- We also allow these macros to take arguments, hence the need to
+ -- allow grouping delimiters. They can also be used to escape the end
+ -- of the command for correct spacing, as in the above example.
+ --
+ Command _cmd -> True
+ InvisibleBraceL -> True
+ InvisibleBraceR -> True
+ --
+ -- No other tokens may occur in lexical phrases. In particular, no `_dot`
+ -- token may occur, limiting the lexical phrase to a single sentence.
+ -- Commas occurring in variable lists should be placed
+ -- within the math environment. Thus `$a,b$ are coprime iff`,
+ -- not `$a$,`$b$` are coprime iff`.
+ --
+ _token -> False
+ where
+ keywords = Set.fromList ["a", "an", "is", "are", "if", "iff", "denote", "stand", "let"]
+
+
+toLexicalPhrase :: [Token] -> LexicalPhrase
+toLexicalPhrase toks = component <$> toks
+ where
+ component = \case
+ Variable _ -> Nothing
+ tok -> Just tok
+
+
+symbol :: RE Token Token
+symbol = msym $ \tok -> case tok of
+ Command _ -> Just tok
+ Symbol _ -> Just tok
+ _tok -> Nothing
+
+
+-- | Basic paradigms for pluralizations of nominals.
+guessNounPlural :: LexicalPhrase -> SgPl LexicalPhrase
+guessNounPlural item = SgPl item (pluralize item)
+ where
+ pluralize :: LexicalPhrase -> LexicalPhrase
+ pluralize = \case
+ Just (Word w) : pat'@(Just w' : _) | isPreposition w' -> Just (Word (Text.snoc w 's')) : pat'
+ [Just (Word w)] -> [Just (Word (Text.snoc w 's'))]
+ [tok, Just (Word w)] -> [tok, Just (Word (Text.snoc w 's'))]
+ [tok, tok', Just (Word w)] -> [tok, tok', Just (Word (Text.snoc w 's'))]
+ pat' -> pat'
+
+guessVerbPlural :: LexicalPhrase -> SgPl LexicalPhrase
+guessVerbPlural item = SgPl item itemPl
+ where
+ itemPl = case item of
+ Just (Word v) : rest -> case Text.unsnoc v of
+ Just (v', 's') -> Just (Word v') : rest
+ _ -> item
+ _ -> item
+
+isAdjR :: LexicalPhrase -> Bool
+isAdjR item = containsPreposition item || containsSlot item
+ where
+ containsPreposition, containsSlot :: LexicalPhrase -> Bool
+ containsPreposition = any isPreposition . catMaybes
+ containsSlot = (Nothing `elem`)
+
+isPreposition :: Token -> Bool
+isPreposition w = HS.member w (HS.map Word prepositions)
+
+-- | Right-biased set insertion, keeping the original set
+-- when inserting already present elements. @insertR@ is unfortunately
+-- a hidden function, even in @Data.Set.Internal@, so we approximate it
+-- here. In theory one could avoid the indirection of first forming the singleton
+-- set on the rhs.
+insertR' :: Hashable a => a -> HashSet a -> HashSet a
+insertR' x xs = xs `HS.union` HS.singleton x -- @union@ is left-biased.
+
+insertMapR :: Ord k => k -> a -> Map k a -> Map k a
+insertMapR k x xs = xs `Map.union` Map.singleton k x -- @union@ is left-biased.
+
+
+insertR :: Hashable k => k -> a -> HashMap k a -> HashMap k a
+insertR k x xs = xs `HM.union` HM.singleton k x -- @union@ is left-biased.
+
+-- | Takes the scanned lexical phrases and inserts them in the correct
+-- places in a lexicon.
+extendLexicon :: [ScannedLexicalItem] -> Lexicon -> Lexicon
+extendLexicon [] lexicon = lexicon
+-- Note that we only consider 'sg' in the 'Ord' instance of SgPl, so that
+-- known irregular plurals are preserved.
+extendLexicon (scan : scans) lexicon@Lexicon{..} = case scan of
+ ScanAdj item m -> if isAdjR item
+ then extendLexicon scans lexicon{lexiconAdjRs = insertR item m lexiconAdjRs}
+ else extendLexicon scans lexicon{lexiconAdjLs = insertR item m lexiconAdjLs}
+ ScanFun item m ->
+ extendLexicon scans lexicon{lexiconFuns = insertR (guessNounPlural item) m lexiconFuns}
+ ScanVerb item m ->
+ extendLexicon scans lexicon{lexiconVerbs = insertR (guessVerbPlural item) m lexiconVerbs}
+ ScanNoun item m ->
+ extendLexicon scans lexicon{lexiconNouns = insertR (guessNounPlural item) m lexiconNouns}
+ ScanStructNoun item m ->
+ extendLexicon scans lexicon{lexiconStructNouns = insertR (guessNounPlural item) m lexiconStructNouns}
+ ScanRelationSymbol item m ->
+ extendLexicon scans lexicon{lexiconRelationSymbols = insertR item m lexiconRelationSymbols}
+ ScanFunctionSymbol item m ->
+ if any (item `HM.member`) lexiconMixfix
+ then extendLexicon scans lexicon
+ else extendLexicon scans lexicon{lexiconMixfix = Seq.adjust (HM.insert item (NonAssoc, m)) 9 lexiconMixfix}
+ ScanStructOp op ->
+ extendLexicon scans lexicon{lexiconStructFun = insertR (StructSymbol op) (Marker op) lexiconStructFun}
+ ScanPrefixPredicate tok m ->
+ extendLexicon scans lexicon{lexiconPrefixPredicates = insertR tok m lexiconPrefixPredicates}
diff --git a/source/Syntax/Chunk.hs b/source/Syntax/Chunk.hs
new file mode 100644
index 0000000..604ccf6
--- /dev/null
+++ b/source/Syntax/Chunk.hs
@@ -0,0 +1,46 @@
+module Syntax.Chunk where
+
+import Base
+import Syntax.Token
+
+import Data.List qualified as List
+
+
+-- LATER This is just a naïve implementation of token chunks.
+-- It and the current tokenizer should probably be replaced by
+-- a more efficient implementation that does chunking
+-- directly while tokenizing.
+
+chunkify :: [Located Token] -> [[Located Token]]
+chunkify [] = []
+chunkify (tok : toks) = case unLocated tok of
+ BeginEnv env | isTopLevelEnv env ->
+ let (axiomToks, otherToks) = envBlock env toks
+ in (tok : axiomToks) : chunkify otherToks
+ --
+ -- Skip all tokens outside of toplevel environments.
+ _ -> chunkify toks
+
+isTopLevelEnv :: Text -> Bool
+isTopLevelEnv env = env `elem`
+ [ "abbreviation"
+ , "axiom"
+ , "claim"
+ , "corollary"
+ , "datatype"
+ , "definition"
+ , "inductive"
+ , "lemma"
+ , "proof"
+ , "proposition"
+ , "signature"
+ , "struct"
+ , "theorem"
+ ]
+
+envBlock :: Text -> [Located Token] -> ([Located Token], [Located Token])
+envBlock env toks =
+ let (pre, post) = List.span (\tok -> unLocated tok /= EndEnv env) toks
+ in case post of
+ [] -> (pre, post)
+ endEnv : post' -> (pre ++ [endEnv], post')
diff --git a/source/Syntax/Concrete.hs b/source/Syntax/Concrete.hs
new file mode 100644
index 0000000..8c6962d
--- /dev/null
+++ b/source/Syntax/Concrete.hs
@@ -0,0 +1,657 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE RecursiveDo #-}
+
+-- | Concrete syntax of the surface language.
+module Syntax.Concrete where
+
+import Base
+import Syntax.Abstract
+import Syntax.Concrete.Keywords
+import Syntax.Lexicon (Lexicon(..), lexiconAdjs, splitOnVariableSlot)
+import Syntax.Token
+
+import Data.HashSet qualified as HS
+import Data.List.NonEmpty qualified as NonEmpty
+import Data.HashMap.Strict qualified as HM
+import Text.Earley (Grammar, Prod, (<?>), rule, satisfy, terminal)
+import Text.Earley.Mixfix
+import Text.Megaparsec.Pos (SourcePos)
+
+
+grammar :: Lexicon -> Grammar r (Prod r Text (Located Token) Block)
+grammar lexicon@Lexicon{..} = mdo
+ let makeOp :: ([Maybe Token], Associativity) -> ([Maybe (Prod r Text (Located Token) Token)], Associativity)
+ makeOp (pat, assoc) = (map (fmap token) pat, assoc)
+ ops = map (map makeOp) (toList (HM.toList . HM.map fst <$> lexiconMixfix))
+ conns = map (map makeOp) lexiconConnectives
+
+ integer <- rule (terminal maybeIntToken <?> "integer")
+ relator <- rule $ unLocated <$> (satisfy (\ltok -> unLocated ltok `HM.member` lexiconRelationSymbols) <?> "relator")
+ varSymbol <- rule (terminal maybeVarToken <?> "variable")
+ varSymbols <- rule (commaList varSymbol)
+ cmd <- rule (terminal maybeCmdToken <?> "TEX command")
+--
+-- Formulas have three levels:
+--
+-- + Expressions: atoms or operators applied to atoms.
+-- + Chains: comma-lists of expressions, separated by relators.
+-- + Formulas: chains or connectives applied to chains.
+--
+-- For example, the formula @x, y < z \implies x, y < z + 1@ consist of the
+-- connective @\implies@ applied to two chains @x, y < z@ and @x, y < z + 1@.
+-- In turn, the chain @x, y < z + 1@ consist of three expressions,
+-- @x@, @y@, and @z + 1@. Finally, @z + 1@ consist the operator @+@
+-- applied to two atoms, the variable @z@ and the number literal @1@.
+--
+-- This split is due to the different behaviour of relators compared to
+-- operators and connectives. Relators can chain (@x < y < z@) and allow
+-- lists as arguments, as in the above example. Operators and connectives
+-- instead have precedence and fixity. The only syntactic difference between
+-- an operator and a connective is the relative precedence compared to relators.
+--
+ replaceBound <- rule $ (,) <$> varSymbol <* _in <*> expr
+ replaceBounds <- rule $ commaList replaceBound
+ replaceFun <- rule $ ExprReplace <$> expr <* _pipe <*> replaceBounds <*> optional (_pipe *> comprStmt)
+ comprStmt <- rule $ (StmtFormula <$> formula) <|> text stmt
+
+ replacePredSymbolic <- rule $ ExprReplacePred <$> varSymbol <* _pipe <*> (command "exists" *> varSymbol) <* _in <*> expr <* _dot <*> (StmtFormula <$> formula)
+ replacePredText <- rule $ ExprReplacePred <$> varSymbol <* _pipe <*> (begin "text" *> _exists *> beginMath *> varSymbol <* _in) <*> expr <* endMath <* _suchThat <*> stmt <* end "text"
+ replacePred <- rule $ replacePredSymbolic <|> replacePredText
+
+ let exprStructOpOf ann = HS.foldr alg empty (HM.keysSet lexiconStructFun)
+ where
+ alg s prod = prod <|> (ExprStructOp <$> structSymbol s <*> ann)
+
+ exprStructOp <- rule (exprStructOpOf (optional (bracket expr)))
+
+ let bracedArgs1 ar arg = count1 ar $ group arg
+ let prefixPredicateOf f arg symb@(PrefixPredicate c ar) = f <$> pure symb <* command c <*> bracedArgs1 ar arg
+
+
+ exprParen <- rule $ paren expr
+ exprInteger <- rule $ ExprInteger <$> integer
+ exprVar <- rule $ ExprVar <$> varSymbol
+ exprTuple <- rule $ makeTuple <$> paren (commaList2 expr)
+ exprSep <- rule $ brace $ ExprSep <$> varSymbol <* _in <*> expr <* _pipe <*> comprStmt
+ exprReplace <- rule $ brace $ (replaceFun <|> replacePred)
+ exprFinSet <- rule $ brace $ ExprFiniteSet <$> exprs
+ exprBase <- rule $ asum [exprVar, exprInteger, exprStructOp, exprParen, exprTuple, exprSep, exprReplace, exprFinSet]
+ exprApp <- rule $ ExprApp <$> exprBase <*> (paren expr <|> exprTuple)
+ expr <- mixfixExpression ops (exprBase <|> exprApp) ExprOp
+ exprs <- rule $ commaList expr
+
+ relationSign <- rule $ pure Positive <|> (Negative <$ command "not")
+ relationExpr <- rule $ RelationExpr <$> (command "mathrel" *> group expr)
+ relation <- rule $ (RelationSymbol <$> relator) <|> relationExpr
+ chainBase <- rule $ ChainBase <$> exprs <*> relationSign <*> relation <*> exprs
+ chainCons <- rule $ ChainCons <$> exprs <*> relationSign <*> relation <*> chain
+ chain <- rule $ chainCons <|> chainBase
+
+ formulaPredicate <- rule $ asum $ prefixPredicateOf FormulaPredicate expr <$> HM.keys lexiconPrefixPredicates
+ formulaChain <- rule $ FormulaChain <$> chain
+ formulaBottom <- rule $ PropositionalConstant IsBottom <$ command "bot" <?> "\"\\bot\""
+ formulaTop <- rule $ PropositionalConstant IsTop <$ command "top" <?> "\"\\top\""
+ formulaExists <- rule $ FormulaQuantified Existentially <$> (command "exists" *> varSymbols) <*> maybeBounded <* _dot <*> formula
+ formulaAll <- rule $ FormulaQuantified Universally <$> (command "forall" *> varSymbols) <*> maybeBounded <* _dot <*> formula
+ formulaQuantified <- rule $ formulaExists <|> formulaAll
+ formulaBase <- rule $ asum [formulaChain, formulaPredicate, formulaBottom, formulaTop, paren formula]
+ formulaConn <- mixfixExpression conns formulaBase makeConnective
+ formula <- rule $ formulaQuantified <|> formulaConn
+
+-- These are asymmetric formulas (only variables are allowed on one side).
+-- They express judgements.
+--
+ assignment <- rule $ (,) <$> varSymbol <* (_eq <|> _defeq) <*> expr
+ typing <- rule $ (,) <$> varSymbols <* (_in <|> _colon) <*> expr
+
+ adjL <- rule $ adjLOf lexicon term
+ adjR <- rule $ adjROf lexicon term
+ adj <- rule $ adjOf lexicon term
+ adjVar <- rule $ adjOf lexicon var
+
+ var <- rule $ math varSymbol
+ vars <- rule $ math varSymbols
+
+ verb <- rule $ verbOf lexicon sg term
+ verbPl <- rule $ verbOf lexicon pl term
+ verbVar <- rule $ verbOf lexicon sg var
+
+ noun <- rule $ nounOf lexicon sg term nounName -- Noun with optional variable name.
+ nounList <- rule $ nounOf lexicon sg term nounNames -- Noun with a list of names.
+ nounVar <- rule $ fst <$> nounOf lexicon sg var (pure Nameless) -- No names in defined nouns.
+ nounPl <- rule $ nounOf lexicon pl term nounNames
+ nounPlMay <- rule $ nounOf lexicon pl term nounName
+
+ structNoun <- rule $ structNounOf lexicon sg var var
+ structNounNameless <- rule $ fst <$> structNounOf lexicon sg var (pure Nameless)
+
+
+ fun <- rule $ funOf lexicon sg term
+ funVar <- rule $ funOf lexicon sg var
+
+ attrRThat <- rule $ AttrRThat <$> thatVerbPhrase
+ attrRThats <- rule $ ((:[]) <$> attrRThat) <|> ((\a a' -> [a,a']) <$> attrRThat <* _and <*> attrRThat) <|> pure []
+ attrRs <- rule $ ((:[]) <$> adjR) <|> ((\a a' -> [a,a']) <$> adjR <* _and <*> adjR) <|> pure []
+ attrRight <- rule $ (<>) <$> attrRs <*> attrRThats
+
+ verbPhraseVerbSg <- rule $ VPVerb <$> verb
+ verbPhraseVerbNotSg <- rule $ VPVerbNot <$> (_does *> _not *> verbPl)
+ verbPhraseAdjSg <- rule $ VPAdj . (:|[]) <$> (_is *> adj)
+ verbPhraseAdjAnd <- rule do {_is; a1 <- adj; _and; a2 <- adj; pure (VPAdj (a1 :| [a2]))}
+ verbPhraseAdjNotSg <- rule $ VPAdjNot . (:|[]) <$> (_is *> _not *> adj)
+ verbPhraseNotSg <- rule $ verbPhraseVerbNotSg <|> verbPhraseAdjNotSg
+ verbPhraseSg <- rule $ verbPhraseVerbSg <|> verbPhraseAdjSg <|> verbPhraseAdjAnd <|> verbPhraseNotSg
+
+ -- LATER can cause technical ambiguities? verbPhraseVerbPl <- rule $ VPVerb <$> verbPl
+ verbPhraseVerbNotPl <- rule $ VPVerbNot <$> (_do *> _not *> verbPl)
+ verbPhraseAdjPl <- rule $ VPAdj . (:|[]) <$> (_are *> adj)
+ verbPhraseAdjNotPl <- rule $ VPAdjNot . (:|[]) <$> (_are *> _not *> adj)
+ verbPhraseNotPl <- rule $ verbPhraseVerbNotPl <|> verbPhraseAdjNotPl
+ verbPhrasePl <- rule $ verbPhraseAdjPl <|> verbPhraseNotPl -- LATER <|> verbPhraseVerbPl
+
+
+
+ thatVerbPhrase <- rule $ _that *> verbPhraseSg
+
+ nounName <- rule $ optional (math varSymbol)
+ nounNames <- rule $ math (commaList_ varSymbol) <|> pure []
+ nounPhrase <- rule $ makeNounPhrase <$> many adjL <*> noun <*> attrRight <*> optional suchStmt
+ nounPhrase' <- rule $ makeNounPhrase <$> many adjL <*> nounList <*> attrRight <*> optional suchStmt
+ nounPhrasePl <- rule $ makeNounPhrase <$> many adjL <*> nounPl <*> attrRight <*> optional suchStmt
+ nounPhrasePlMay <- rule $ makeNounPhrase <$> many adjL <*> nounPlMay <*> attrRight <*> optional suchStmt
+ nounPhraseMay <- rule $ makeNounPhrase <$> many adjL <*> noun <*> attrRight <*> optional suchStmt
+
+ -- Quantification phrases for quantification and indfinite terms.
+ quantAll <- rule $ QuantPhrase Universally <$> (_forEvery *> nounPhrase' <|> _forAll *> nounPhrasePl)
+ quantSome <- rule $ QuantPhrase Existentially <$> (_some *> (nounPhrase' <|> nounPhrasePl))
+ quantNone <- rule $ QuantPhrase Nonexistentially <$> (_no *> (nounPhrase' <|> nounPhrasePl))
+ quant <- rule $ quantAll <|> quantSome <|> quantNone -- <|> quantUniq
+
+
+ termExpr <- rule $ TermExpr <$> math expr
+ termFun <- rule $ TermFun <$> (optional _the *> fun)
+ termIota <- rule $ TermIota <$> (_the *> var) <* _suchThat <*> stmt
+ termAll <- rule $ TermQuantified Universally <$> (_every *> nounPhraseMay)
+ termSome <- rule $ TermQuantified Existentially <$> (_some *> nounPhraseMay)
+ termNo <- rule $ TermQuantified Nonexistentially <$> (_no *> nounPhraseMay)
+ termQuantified <- rule $ termAll <|> termSome <|> termNo
+ term <- rule $ termExpr <|> termFun <|> termQuantified <|> termIota
+
+-- Basic statements @stmt'@ are statements without any conjunctions or quantifiers.
+--
+ stmtVerbSg <- rule $ StmtVerbPhrase <$> ((:| []) <$> term) <*> verbPhraseSg
+ stmtVerbPl <-rule $ StmtVerbPhrase <$> andList1 term <*> verbPhrasePl
+ stmtVerb <- rule $ stmtVerbSg <|> stmtVerbPl
+ stmtNounIs <- rule $ StmtNoun <$> term <* _is <* _an <*> nounPhrase
+ stmtNounIsNot <- rule $ StmtNeg <$> (StmtNoun <$> term <* _is <* _not <* _an <*> nounPhrase)
+ stmtNoun <- rule $ stmtNounIs <|> stmtNounIsNot
+ stmtStruct <- rule $ StmtStruct <$> (term <* _is <* _an) <*> structNounNameless
+ stmtExists <- rule $ StmtExists <$> (_exists *> _an *> nounPhrase')
+ stmtExist <- rule $ StmtExists <$> (_exist *> nounPhrasePl)
+ stmtExistsNot <- rule $ StmtNeg . StmtExists <$> (_exists *> _no *> nounPhrase')
+ stmtFormula <- rule $ StmtFormula <$> math formula
+ stmtBot <- rule $ StmtFormula (PropositionalConstant IsBottom) <$ _contradiction
+ stmt' <- rule $ stmtVerb <|> stmtNoun <|> stmtStruct <|> stmtFormula <|> stmtBot
+ stmtOr <- rule $ stmt' <|> (StmtConnected Disjunction <$> stmt' <* _or <*> stmt)
+ stmtAnd <- rule $ stmtOr <|> (StmtConnected Conjunction <$> stmtOr <* _and <*> stmt)
+ stmtIff <- rule $ stmtAnd <|> (StmtConnected Equivalence <$> stmtAnd <* _iff <*> stmt)
+ stmtIf <- rule $ StmtConnected Implication <$> (_if *> stmt) <* optional _comma <* _then <*> stmt
+ stmtXor <- rule $ StmtConnected ExclusiveOr <$> (_either *> stmt) <* _or <*> stmt
+ stmtNor <- rule $ StmtConnected NegatedDisjunction <$> (_neither *> stmt) <* _nor <*> stmt
+ stmtNeg <- rule $ StmtNeg <$> (_itIsWrong *> stmt)
+
+ stmtQuantPhrase <- rule $ StmtQuantPhrase <$> (_for *> quant) <* optional _comma <* optional _have <*> stmt
+
+ suchStmt <- rule $ _suchThat *> stmt <* optional _comma
+
+
+ -- Symbolic quantifications with or without generalized bounds.
+ symbolicForall <- rule $ SymbolicForall
+ <$> ((_forAll <|> _forEvery) *> beginMath *> varSymbols)
+ <*> maybeBounded <* endMath
+ <*> optional suchStmt
+ <* optional _have <*> stmt
+ symbolicExists <- rule $ SymbolicExists
+ <$> ((_exists <|> _exist) *> beginMath *> varSymbols)
+ <*> maybeBounded <* endMath
+ <*> ((_suchThat *> stmt) <|> pure (StmtFormula (PropositionalConstant IsTop)))
+ symbolicNotExists <- rule $ SymbolicNotExists
+ <$> (_exists *> _no *> beginMath *> varSymbols)
+ <*> maybeBounded <* endMath
+ <* _suchThat <*> stmt
+ symbolicBound <- rule $ Bounded <$> relationSign <*> relation <*> expr
+ maybeBounded <- rule (pure Unbounded <|> symbolicBound)
+
+ symbolicQuantified <- rule $ symbolicForall <|> symbolicExists <|> symbolicNotExists
+
+ stmt <- rule $ asum [stmtNeg, stmtIf, stmtXor, stmtNor, stmtExists, stmtExist, stmtExistsNot, stmtQuantPhrase, stmtIff, symbolicQuantified] <?> "a statement"
+
+ asmLetIn <- rule $ uncurry AsmLetIn <$> (_let *> math typing)
+ asmLetNoun <- rule $ AsmLetNoun <$> (_let *> fmap pure var <* (_be <|> _denote) <* _an) <*> nounPhrase
+ asmLetNouns <- rule $ AsmLetNoun <$> (_let *> vars <* (_be <|> _denote)) <*> nounPhrasePlMay
+ asmLetEq <- rule $ uncurry AsmLetEq <$> (_let *> math assignment)
+ asmLetThe <- rule $ AsmLetThe <$> (_let *> var <* _be <* _the) <*> fun
+ asmLetStruct <- rule $ AsmLetStruct <$> (_let *> var <* _be <* _an) <*> structNounNameless
+ asmLet <- rule $ asmLetNoun <|> asmLetNouns <|> asmLetIn <|> asmLetEq <|> asmLetThe <|> asmLetStruct
+ asmSuppose <- rule $ AsmSuppose <$> (_suppose *> stmt)
+ asm <- rule $ assumptionList (asmLet <|> asmSuppose) <* _dot
+ asms <- rule $ concat <$> many asm
+
+ axiom <- rule $ Axiom <$> asms <* optional _then <*> stmt <* _dot
+
+ lemma <- rule $ Lemma <$> asms <* optional _then <*> stmt <* _dot
+
+ defnAdj <- rule $ DefnAdj <$> optional (_an *> nounPhrase) <*> var <* _is <*> adjVar
+ defnVerb <- rule $ DefnVerb <$> optional (_an *> nounPhrase) <*> var <*> verbVar
+ defnNoun <- rule $ DefnNoun <$> var <* _is <* _an <*> nounVar
+ defnRel <- rule $ DefnRel <$> (beginMath *> varSymbol) <*> relator <*> varSymbol <* endMath
+ defnSymbolicPredicate <- rule $ math $ asum $ prefixPredicateOf DefnSymbolicPredicate varSymbol <$> HM.keys lexiconPrefixPredicates
+ defnHead <- rule $ optional _write *> asum [defnAdj, defnVerb, defnNoun, defnRel, defnSymbolicPredicate]
+
+ defnIf <- rule $ Defn <$> asms <*> defnHead <* (_iff <|> _if) <*> stmt <* _dot
+ defnFunSymb <- rule $ _comma *> termExpr <* _comma -- ^ Optional symbolic equivalent.
+ defnFun <- rule $ DefnFun <$> asms <*> (optional _the *> funVar) <*> optional defnFunSymb <* _is <*> term <* _dot
+
+ symbolicPatternEqTerm <- rule do
+ pat <- beginMath *> symbolicPattern <* _eq
+ e <- expr <* endMath <* _dot
+ pure (pat, e)
+ defnOp <- rule $ uncurry DefnOp <$> symbolicPatternEqTerm
+
+ defn <- rule $ defnIf <|> defnFun <|> defnOp
+
+ abbreviationVerb <- rule $ AbbreviationVerb <$> var <*> verbVar <* (_iff <|> _if) <*> stmt <* _dot
+ abbreviationAdj <- rule $ AbbreviationAdj <$> var <* _is <*> adjVar <* (_iff <|> _if) <*> stmt <* _dot
+ abbreviationNoun <- rule $ AbbreviationNoun <$> var <* _is <* _an <*> nounVar <* (_iff <|> _if) <*> stmt <* _dot
+ abbreviationRel <- rule $ AbbreviationRel <$> (beginMath *> varSymbol) <*> relator <*> varSymbol <* endMath <* (_iff <|> _if) <*> stmt <* _dot
+ abbreviationFun <- rule $ AbbreviationFun <$> (_the *> funVar) <* (_is <|> _denotes) <*> term <* _dot
+ abbreviationEq <- rule $ uncurry AbbreviationEq <$> symbolicPatternEqTerm
+ abbreviation <- rule $ (abbreviationVerb <|> abbreviationAdj <|> abbreviationNoun <|> abbreviationRel <|> abbreviationFun <|> abbreviationEq)
+
+ datatypeFin <- rule $ DatatypeFin <$> fmap fst (_an *> noun) <*> (_is *> _oneOf *> orList2 (math cmd) <* _dot)
+ datatype <- rule datatypeFin
+
+ unconditionalIntro <- rule $ IntroRule [] <$> math formula
+ conditionalIntro <- rule $ IntroRule <$> (_if *> andList1_ (math formula)) <* _comma <* _then <*> math formula
+ inductiveIntro <- rule $ (unconditionalIntro <|> conditionalIntro) <* _dot
+ inductiveDomain <- rule $ math $ (,) <$> symbolicPattern <* _subseteq <*> expr
+ inductiveHead <- rule $ _define *> inductiveDomain <* optional _inductively <* optional _asFollows <* _dot
+ inductive <- rule $ uncurry Inductive <$> inductiveHead <*> enumerated1 inductiveIntro
+
+ signatureAdj <- rule $ SignatureAdj <$> var <* _can <* _be <*> adjOf lexicon var
+ symbolicPattern <- symbolicPatternOf ops varSymbol
+ signatureSymbolic <- rule $ SignatureSymbolic <$> math symbolicPattern <* _is <* _an <*> nounPhrase
+ signature <- rule $ (,) <$> asms <* optional _then <*> (signatureAdj <|> signatureSymbolic) <* _dot
+
+ structFix <- rule do
+ beginMath
+ rawCmd <- cmd
+ endMath
+ pure (StructSymbol rawCmd)
+ structDefn <- rule $ do
+ _an
+ ~(structPhrase, structLabel) <- structNoun
+ _extends
+ structParents <- andList1_ (_an *> structNounNameless)
+ maybeFixes <- optional (_equipped *> enumerated structFix)
+ structAssumes <- (_suchThat *> enumeratedMarked (stmt <* _dot)) <|> ([] <$ _dot)
+ pure StructDefn
+ { structPhrase = structPhrase
+ , structLabel = structLabel
+ , structParents = structParents
+ , structFixes = maybeFixes ?? []
+ , structAssumes = structAssumes
+ }
+
+ justificationSet <- rule $ JustificationSetExt <$ _bySetExt
+ justificationRef <- rule $ JustificationRef <$> (_by *> ref)
+ justificationLocal <- rule $ JustificationLocal <$ (_by *> (_assumption <|> _definition))
+ justification <- rule (justificationSet <|> justificationRef <|> justificationLocal <|> pure JustificationEmpty)
+
+ trivial <- rule $ Qed JustificationEmpty <$ _trivial <* _dot
+ omitted <- rule $ Omitted <$ _omitted <* _dot
+ qedJustified <- rule $ Qed <$> (_follows *> justification <* _dot)
+ qed <- rule $ qedJustified <|> trivial <|> omitted <|> pure (Qed JustificationEmpty)
+
+ let alignedEq = symbol "&=" <?> "\"&=\""
+ explanation <- rule $ (text justification) <|> pure JustificationEmpty
+ equationItem <- rule $ (,) <$> (alignedEq *> expr) <*> explanation
+ equations <- rule $ Equation <$> expr <*> (many1 equationItem)
+
+ let alignedIff = symbol "&" *> command "iff" <?> "\"&\\iff\""
+ biconditionalItem <- rule $ (,) <$> (alignedIff *> formula) <*> explanation
+ biconditionals <- rule $ Biconditionals <$> formula <*> (many1 biconditionalItem)
+
+ calc <- rule $ Calc <$> align (equations <|> biconditionals) <*> proof
+
+ caseOf <- rule $ command "caseOf" *> token InvisibleBraceL *> stmt <* _dot <* token InvisibleBraceR
+ byCases <- rule $ ByCase <$> env_ "byCase" (many1_ (Case <$> caseOf <*> proof))
+ byContradiction <- rule $ ByContradiction <$ _suppose <* _not <* _dot <*> proof
+ bySetInduction <- rule $ BySetInduction <$> proofBy (_in *> word "-induction" *> optional (word "on" *> term)) <*> proof
+ byOrdInduction <- rule $ ByOrdInduction <$> proofBy (word "transfinite" *> word "induction" *> proof)
+ assume <- rule $ Assume <$> (_suppose *> stmt <* _dot) <*> proof
+
+ fixSymbolic <- rule $ FixSymbolic <$> (_fix *> beginMath *> varSymbols) <*> maybeBounded <* endMath <* _dot <*> proof
+ fixSuchThat <- rule $ FixSuchThat <$> (_fix *> math varSymbols) <* _suchThat <*> stmt <* _dot <*> proof
+ fix <- rule $ fixSymbolic <|> fixSuchThat
+
+ takeVar <- rule $ TakeVar <$> (_take *> beginMath *> varSymbols) <*> maybeBounded <* endMath <* _suchThat <*> stmt <*> justification <* _dot <*> proof
+ takeNoun <- rule $ TakeNoun <$> (_take *> _an *> (nounPhrase' <|> nounPhrasePl)) <*> justification <* _dot <*> proof
+ take <- rule $ takeVar <|> takeNoun
+ suffices <- rule $ Suffices <$> (_sufficesThat *> stmt) <*> (justification <* _dot) <*> proof
+ subclaim <- rule $ Subclaim <$> (_show *> stmt <* _dot) <*> env_ "subproof" proof <*> proof
+ have <- rule $ Have <$> optional (_since *> stmt <* _comma <* _have) <* optional _haveIntro <*> stmt <*> justification <* _dot <*> proof
+
+ define <- rule $ Define <$> (_let *> beginMath *> varSymbol <* _eq) <*> expr <* endMath <* _dot <*> proof
+ defineFunction <- rule $ DefineFunction <$> (_let *> beginMath *> varSymbol) <*> paren varSymbol <* _eq <*> expr <* endMath <* _for <* beginMath <*> varSymbol <* _in <*> expr <* endMath <* _dot <*> proof
+
+ proof <- rule $ asum [byContradiction, byCases, bySetInduction, byOrdInduction, calc, subclaim, assume, fix, take, have, suffices, define, defineFunction, qed]
+
+
+ blockAxiom <- rule $ uncurry3 BlockAxiom <$> envPos "axiom" axiom
+ blockLemma <- rule $ uncurry3 BlockLemma <$> lemmaEnv lemma
+ blockProof <- rule $ uncurry BlockProof <$> envPos_ "proof" proof
+ blockDefn <- rule $ uncurry3 BlockDefn <$> envPos "definition" defn
+ blockAbbr <- rule $ uncurry3 BlockAbbr <$> envPos "abbreviation" abbreviation
+ blockData <- rule $ uncurry BlockData <$> envPos_ "datatype" datatype
+ blockInd <- rule $ uncurry3 BlockInductive <$> envPos "inductive" inductive
+ blockSig <- rule $ (\(p, (a, s)) -> BlockSig p a s) <$> envPos_ "signature" signature
+ blockStruct <- rule $ uncurry3 BlockStruct <$> envPos "struct" structDefn
+ block <- rule $ asum [blockAxiom, blockLemma, blockDefn, blockAbbr, blockData, blockInd, blockSig, blockStruct, blockProof]
+
+ -- Starting category.
+ pure block
+
+
+proofBy :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+proofBy method = bracket $ word "proof" *> word "by" *> method
+
+
+lemmaEnv :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, Marker, a)
+lemmaEnv content = asum
+ [ envPos "theorem" content
+ , envPos "lemma" content
+ , envPos "corollary" content
+ , envPos "claim" content
+ , envPos "proposition" content
+ ]
+
+
+-- | A disjunctive list with at least two items:
+-- * 'a or b'
+-- * 'a, b, or c'
+-- * 'a, b, c, or d'
+--
+orList2 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+orList2 item = ((:|) <$> item <*> many (_commaOr *> item))
+ <|> ((\i j -> i:|[j]) <$> item <* _or <*> item)
+
+
+-- | Nonempty textual lists of the form "a, b, c, and d".
+-- The final comma is mandatory, 'and' is not.
+-- Also allows "a and b". Should therefore be avoided in contexts where
+-- a logical conjunction would also be possible.
+-- Currently also allows additional 'and's after each comma...
+--
+andList1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+andList1 item = ((:|) <$> item <*> many (_commaAnd *> item))
+ <|> ((\i j -> i:|[j]) <$> item <* _and <*> item)
+
+-- | Like 'andList1', but drops the information about nonemptiness.
+andList1_ :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+andList1_ item = toList <$> andList1 item
+
+
+commaList :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+commaList item = (:|) <$> item <*> many (_comma *> item)
+
+-- | Like 'commaList', but drops the information about nonemptiness.
+commaList_ :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+commaList_ item = NonEmpty.toList <$> commaList item
+
+-- | Like 'commaList', but requires at least two items (and hence at least one comma).
+commaList2 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+commaList2 item = (:|) <$> item <* _comma <*> commaList_ item
+
+
+assumptionList :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+assumptionList item = NonEmpty.toList <$> andList1 item
+
+enumerated :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+enumerated p = NonEmpty.toList <$> enumerated1 p
+
+enumerated1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+enumerated1 p = begin "enumerate" *> many1 (command "item" *> p) <* end "enumerate" <?> "\"\\begin{enumerate} ...\""
+
+
+enumeratedMarked :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [(Marker, a)]
+enumeratedMarked p = NonEmpty.toList <$> enumeratedMarked1 p
+
+enumeratedMarked1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty (Marker, a))
+enumeratedMarked1 p = begin "enumerate" *> many1 ((,) <$> (command "item" *> label) <*> p) <* end "enumerate" <?> "\"\\begin{enumerate}\\item\\label{...}...\""
+
+
+
+
+-- This function could be rewritten, so that it can be used directly in the grammar,
+-- instead of with specialized variants.
+--
+phraseOf
+ :: (pat -> [a] -> b)
+ -> Lexicon
+ -> (Lexicon -> HashSet pat)
+ -> (pat -> LexicalPhrase)
+ -> Prod r Text (Located Token) a
+ -> Prod r Text (Located Token) b
+phraseOf constr lexicon selector proj arg =
+ uncurry constr <$> asum (fmap make pats)
+ where
+ pats = HS.toList (selector lexicon)
+ make pat = (\args -> (pat, args)) <$> go (proj pat)
+ go = \case
+ Just w : ws -> token w *> go ws
+ Nothing : ws -> (:) <$> arg <*> go ws
+ [] -> pure []
+
+adjLOf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjLOf arg)
+adjLOf lexicon arg = phraseOf AdjL lexicon (HM.keysSet . lexiconAdjLs) id arg <?> "a left adjective"
+
+adjROf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjROf arg)
+adjROf lexicon arg = phraseOf AdjR lexicon (HM.keysSet . lexiconAdjRs) id arg <?> "a right adjective"
+
+adjOf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjOf arg)
+adjOf lexicon arg = phraseOf Adj lexicon (HM.keysSet . lexiconAdjs) id arg <?> "an adjective"
+
+verbOf
+ :: Lexicon
+ -> (SgPl LexicalPhrase -> LexicalPhrase)
+ -> Prod r Text (Located Token) a
+ -> Prod r Text (Located Token) (VerbOf a)
+verbOf lexicon proj arg = phraseOf Verb lexicon (HM.keysSet . lexiconVerbs) proj arg
+
+funOf
+ :: Lexicon
+ -> (SgPl LexicalPhrase -> LexicalPhrase)
+ -> Prod r Text (Located Token) a
+ -> Prod r Text (Located Token) (FunOf a)
+funOf lexicon proj arg = phraseOf Fun lexicon (HM.keysSet . lexiconFuns) proj arg <?> "functional phrase"
+
+
+-- | A noun with a @t VarSymbol@ as name(s).
+nounOf
+ :: Lexicon
+ -> (SgPl LexicalPhrase -> LexicalPhrase)
+ -> Prod r Text (Located Token) arg
+ -> Prod r Text (Located Token) (t VarSymbol)
+ -> Prod r Text (Located Token) (NounOf arg, t VarSymbol)
+nounOf lexicon proj arg vars =
+ (\(args1, xs, args2, pat) -> (Noun pat (args1 <> args2), xs)) <$> asum (fmap make pats) <?> "a noun"
+ where
+ pats = HM.keys (lexiconNouns lexicon)
+ make pat =
+ let (pat1, pat2) = splitOnVariableSlot (proj pat)
+ in (\args1 xs args2 -> (args1, xs, args2, pat)) <$> go pat1 <*> vars <*> go pat2
+ go = \case
+ Just w : ws -> token w *> go ws
+ Nothing : ws -> (:) <$> arg <*> go ws
+ [] -> pure []
+
+structNounOf
+ :: Lexicon
+ -> (SgPl LexicalPhrase -> LexicalPhrase)
+ -> Prod r Text (Located Token) arg
+ -> Prod r Text (Located Token) name
+ -> Prod r Text (Located Token) (StructPhrase, name)
+structNounOf lexicon proj arg name =
+ (\(_args1, xs, _args2, pat) -> (pat, xs)) <$> asum (fmap make pats) <?> "a structure noun"
+ where
+ pats = HM.keys (lexiconStructNouns lexicon)
+ make pat =
+ let (pat1, pat2) = splitOnVariableSlot (proj pat)
+ in (\args1 xs args2 -> (args1, xs, args2, pat)) <$> go pat1 <*> name <*> go pat2
+ go = \case
+ Just w : ws -> token w *> go ws
+ Nothing : ws -> (:) <$> arg <*> go ws
+ [] -> pure []
+
+
+symbolicPatternOf
+ :: forall r. [[(Holey (Prod r Text (Located Token) Token), Associativity)]]
+ -> Prod r Text (Located Token) VarSymbol
+ -> Grammar r (Prod r Text (Located Token) SymbolPattern)
+symbolicPatternOf ops varSymbol = rule $ asum
+ [ go op
+ | ops' <- ops
+ , (op, _assoc) <- ops'
+ ] <?> "a symbolic pattern"
+ where
+ go :: Holey (Prod r Text (Located Token) Token) -> Prod r Text (Located Token) SymbolPattern
+ go [] = pure $ SymbolPattern [] []
+ go (head : tail) = case head of
+ Just symb -> (\s (SymbolPattern op vs) -> SymbolPattern (Just s : op) vs) <$> symb <*> go tail
+ Nothing -> (\v (SymbolPattern op vs) -> SymbolPattern (Nothing : op) (v : vs)) <$> varSymbol <*> go tail
+
+
+makeNounPhrase
+ :: [AdjL]
+ -> (Noun, t VarSymbol)
+ -> [AdjR]
+ -> Maybe Stmt
+ -> NounPhrase t
+makeNounPhrase ls (n, vs) rs ms = NounPhrase ls n vs rs ms
+
+
+
+
+begin, end :: Text -> Prod r Text (Located Token) SourcePos
+begin kind = tokenPos (BeginEnv kind) <?> ("\"\\begin{" <> kind <> "}\"")
+end kind = tokenPos (EndEnv kind) <?> ("\"\\end{" <> kind <> "}\"")
+
+-- | Surround a production rule @body@ with an environment of a certain @kind@ requiring a marker specified in a @\\label@.
+-- Ignores the optional title after the beginning of the environment.
+envPos :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, Marker, a)
+envPos kind body = do
+ p <- begin kind <?> ("start of a \"" <> kind <> "\" environment")
+ optional title
+ m <- label
+ a <- body <* end kind
+ pure (p, m, a)
+ where
+ title :: Prod r Text (Located Token) [Token]
+ title = bracket (many (unLocated <$> satisfy (\ltok -> unLocated ltok /= BracketR)))
+
+-- 'env_' is like 'env', but without allowing titles.
+--
+envPos_ :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, a)
+envPos_ kind body = (,) <$> begin kind <*> (optional label *> body) <* end kind
+
+env_ :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+env_ kind body = begin kind *> optional label *> body <* end kind
+
+-- | A label specifying a marker for referencing via /@\\label{...}@/. Returns the marker text.
+label :: Prod r Text (Located Token) Marker
+label = label_ <?> "\"\\label{...}\""
+ where
+ label_ = terminal \ltok -> case unLocated ltok of
+ Label m -> Just (Marker m)
+ _tok -> Nothing
+
+-- | A reference via /@\\ref{...}@/. Returns the markers as text.
+ref :: Prod r Text (Located Token) (NonEmpty Marker)
+ref = terminal \ltok -> case unLocated ltok of
+ Ref ms -> Just (Marker <$> ms)
+ _tok -> Nothing
+
+math :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+math body = beginMath *> body <* endMath
+
+text :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+text body = begin "text" *> body <* end "text" <?> "\"\\text{...}\""
+
+beginMath, endMath :: Prod r Text (Located Token) SourcePos
+beginMath = begin "math" <?> "start of a formula, e.g. \"$\""
+endMath = end "math" <?> "end of a formula, e.g. \"$\""
+
+paren :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+paren body = token ParenL *> body <* token ParenR <?> "\"(...)\""
+
+bracket :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+bracket body = token BracketL *> body <* token BracketR <?> "\"[...]\""
+
+brace :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+brace body = token VisibleBraceL *> body <* token VisibleBraceR <?> "\"\\{...\\}\""
+
+group :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+group body = token InvisibleBraceL *> body <* token InvisibleBraceR <?> "\"{...}\""
+
+align :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+align body = begin "align*" *> body <* end "align*"
+
+
+maybeVarToken :: Located Token -> Maybe VarSymbol
+maybeVarToken ltok = case unLocated ltok of
+ Variable x -> Just (NamedVar x)
+ _tok -> Nothing
+
+maybeWordToken :: Located Token -> Maybe Text
+maybeWordToken ltok = case unLocated ltok of
+ Word n -> Just n
+ _tok -> Nothing
+
+maybeIntToken :: Located Token -> Maybe Int
+maybeIntToken ltok = case unLocated ltok of
+ Integer n -> Just n
+ _tok -> Nothing
+
+maybeCmdToken :: Located Token -> Maybe Text
+maybeCmdToken ltok = case unLocated ltok of
+ Command n -> Just n
+ _tok -> Nothing
+
+structSymbol :: StructSymbol -> Prod r Text (Located Token) StructSymbol
+structSymbol s@(StructSymbol c) = terminal \ltok -> case unLocated ltok of
+ Command c' | c == c' -> Just s
+ _ -> Nothing
+
+-- | Tokens that are allowed to appear in labels of environments.
+maybeTagToken :: Located Token -> Maybe Text
+maybeTagToken ltok = case unLocated ltok of
+ Symbol "'" ->Just "'"
+ Symbol "-" -> Just ""
+ _ -> maybeWordToken ltok
+
+
+token :: Token -> Prod r Text (Located Token) Token
+token tok = terminal maybeToken <?> tokToText tok
+ where
+ maybeToken ltok = case unLocated ltok of
+ tok' | tok == tok' -> Just tok
+ _ -> Nothing
+
+tokenPos :: Token -> Prod r Text (Located Token) SourcePos
+tokenPos tok = terminal maybeToken <?> tokToText tok
+ where
+ maybeToken ltok = case unLocated ltok of
+ tok' | tok == tok' -> Just (startPos ltok)
+ _ -> Nothing
diff --git a/source/Syntax/Concrete/Keywords.hs b/source/Syntax/Concrete/Keywords.hs
new file mode 100644
index 0000000..e0f577e
--- /dev/null
+++ b/source/Syntax/Concrete/Keywords.hs
@@ -0,0 +1,222 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-|
+This module defines lots of keywords and various filler
+phrases. The prefix underscore indicates that we do not
+care about the parse result (analogous to discarding
+like @...; _ <- action; ...@ in do-notation). Moreover,
+this convention allows the use of short names that would
+otherwise be Haskell keywords or clash with other definitions.
+Care should be taken with introducing too many variants of
+a keyword, lest the grammar becomes needlessly ambiguous!
+
+The names are chosen using the following criteria:
+
+ * As short as possible (e.g.: @_since@ over @_because@).
+
+ * Sound like a keyword (e.g.: @_show@).
+
+This module also defines symbols that have special uses
+(such as @_colon@ for its use in type signatures).
+-}
+module Syntax.Concrete.Keywords where
+
+
+import Base
+import Syntax.Token
+
+import Text.Earley (Prod, (<?>), terminal)
+import Text.Megaparsec (SourcePos)
+
+infixr 0 ?
+-- | Variant of '<?>' for annotating literal tokens.
+(?) :: Prod r Text t a -> Text -> Prod r Text t a
+p ? e = p <?> ("\"" <> e <> "\"")
+
+word :: Text -> Prod r Text (Located Token) SourcePos
+word w = terminal maybeToken
+ where
+ maybeToken ltok = case unLocated ltok of
+ Word w' | w == w' -> Just (startPos ltok)
+ _ -> Nothing
+
+symbol :: Text -> Prod r Text (Located Token) SourcePos
+symbol s = terminal maybeToken
+ where
+ maybeToken ltok = case unLocated ltok of
+ Symbol s' | s == s' -> Just (startPos ltok)
+ _ -> Nothing
+
+command :: Text -> Prod r Text (Located Token) SourcePos
+command cmd = terminal maybeToken
+ where
+ maybeToken ltok = case unLocated ltok of
+ Command cmd' | cmd == cmd' -> Just (startPos ltok)
+ _ -> Nothing
+
+_arity :: Prod r Text (Located Token) Int
+_arity = asum
+ [ 1 <$ word "unary"
+ , 2 <$ word "binary"
+ , 3 <$ word "ternary"
+ , 4 <$ word "quaternary"
+ , 5 <$ word "quinary"
+ , 6 <$ word "senary"
+ , 7 <$ word "septenary"
+ , 8 <$ word "octonary"
+ , 9 <$ word "nonary"
+ , 10 <$ word "denary"
+ ] <?> "\"unary\", \"binary\', ..."
+
+-- * Keywords
+
+_an :: Prod r Text (Located Token) SourcePos
+_an = word "a" <|> word "an" <?> "indefinite article"
+_and :: Prod r Text (Located Token) SourcePos
+_and = word "and" ? "and"
+_are :: Prod r Text (Located Token) SourcePos
+_are = word "are" ? "are"
+_asFollows :: Prod r Text (Located Token) SourcePos
+_asFollows = word "as" <* word "follows" ? "as follows"
+_assumption :: Prod r Text (Located Token) SourcePos
+_assumption = word "assumption" ? "assumption"
+_be :: Prod r Text (Located Token) SourcePos
+_be = word "be" ? "be"
+_by :: Prod r Text (Located Token) SourcePos
+_by = word "by" ? "by"
+_bySetExt :: Prod r Text (Located Token) SourcePos
+_bySetExt = word "by" <* ((word "set" ? "set") <* word "extensionality") ? "by set extensionality"
+_can :: Prod r Text (Located Token) SourcePos
+_can = word "can" ? "can"
+_consistsOf :: Prod r Text (Located Token) SourcePos
+_consistsOf = word "consists" <* word "of" ? "consists of"
+_contradiction :: Prod r Text (Located Token) SourcePos
+_contradiction = optional (word "a") *> word "contradiction" ? "a contradiction"
+_define :: Prod r Text (Located Token) SourcePos
+_define = word "define" ? "define"
+_definition :: Prod r Text (Located Token) SourcePos
+_definition = word "definition" ? "definition"
+_denote :: Prod r Text (Located Token) SourcePos
+_denote = word "denote" <|> (word "stand" <* word "for") ? "denote"
+_denotes :: Prod r Text (Located Token) SourcePos
+_denotes = word "denotes" ? "denotes"
+_do :: Prod r Text (Located Token) SourcePos
+_do = word "do" ? "do"
+_does :: Prod r Text (Located Token) SourcePos
+_does = word "does" ? "does"
+_either :: Prod r Text (Located Token) SourcePos
+_either = word "either" ? "either"
+_equipped :: Prod r Text (Located Token) SourcePos
+_equipped = (word "equipped" <|> word "together") <* word "with" ? "equipped with"
+_every :: Prod r Text (Located Token) SourcePos
+_every = (word "every") ? "every"
+_exist :: Prod r Text (Located Token) SourcePos
+_exist = word "there" <* word "exist" ? "there exist"
+_exists :: Prod r Text (Located Token) SourcePos
+_exists = word "there" <* word "exists" ? "there exists"
+_extends :: Prod r Text (Located Token) SourcePos
+_extends = (_is) <|> (word "consists" <* word "of") ? "consists of"
+_fix :: Prod r Text (Located Token) SourcePos
+_fix = word "fix" ? "fix"
+_follows :: Prod r Text (Located Token) SourcePos
+_follows = word "follows" ? "follows"
+_for :: Prod r Text (Located Token) SourcePos
+_for = word "for" ? "for"
+_forAll :: Prod r Text (Located Token) SourcePos
+_forAll = (word "for" <* word "all") <|> word "all" ? "all"
+_forEvery :: Prod r Text (Located Token) SourcePos
+_forEvery = (word "for" <* word "every") <|> (word "every") ? "for every"
+_have :: Prod r Text (Located Token) SourcePos
+_have = word "we" <* word "have" <* optional (word "that") ? "we have"
+_if :: Prod r Text (Located Token) SourcePos
+_if = word "if" ? "if"
+_iff :: Prod r Text (Located Token) SourcePos
+_iff = word "iff" <|> (word "if" <* word "and" <* word "only" <* word "if") ? "iff"
+_inductively :: Prod r Text (Located Token) SourcePos
+_inductively = word "inductively" ? "inductively"
+_is :: Prod r Text (Located Token) SourcePos
+_is = word "is" ? "is"
+_itIsWrong :: Prod r Text (Located Token) SourcePos
+_itIsWrong = word "it" <* word "is" <* (word "not" <* word "the" <* word "case" <|> word "wrong") <* word "that" ? "it is wrong that"
+_let :: Prod r Text (Located Token) SourcePos
+_let = word "let" ? "let"
+_neither :: Prod r Text (Located Token) SourcePos
+_neither = word "neither" ? "neither"
+_no :: Prod r Text (Located Token) SourcePos
+_no = word "no" ? "no"
+_nor :: Prod r Text (Located Token) SourcePos
+_nor = word "nor" ? "nor"
+_not :: Prod r Text (Located Token) SourcePos
+_not = word "not" ? "not"
+_omitted :: Prod r Text (Located Token) SourcePos
+_omitted = word "omitted" ? "omitted"
+_on :: Prod r Text (Located Token) SourcePos
+_on = word "on" ? "on"
+_oneOf :: Prod r Text (Located Token) SourcePos
+_oneOf = word "one" <* word "of" ? "one of"
+_or :: Prod r Text (Located Token) SourcePos
+_or = word "or" ? "or"
+_particularly :: Prod r Text (Located Token) SourcePos
+_particularly = (word "particularly" <|> (word "in" *> word "particular")) <* _comma ? "particularly"
+_relation :: Prod r Text (Located Token) SourcePos
+_relation = word "relation" ? "relation"
+_satisfying :: Prod r Text (Located Token) SourcePos
+_satisfying = _suchThat <|> word "satisfying" ? "satisfying"
+_setOf :: Prod r Text (Located Token) SourcePos
+_setOf = word "set" <* word "of" ? "set of"
+_show :: Prod r Text (Located Token) SourcePos
+_show = optional (word "first" <|> word "finally" <|> word "next" <|> word "now") *> optional (word "we") *> word "show" <* optional (word "that")
+_since :: Prod r Text (Located Token) SourcePos
+_since = word "since" <|> word "because" ? "since"
+_some :: Prod r Text (Located Token) SourcePos
+_some = word "some" ? "some"
+_suchThat :: Prod r Text (Located Token) SourcePos
+_suchThat = ((word "such" <* word "that") <|> (word "s" <* _dot <* word "t" <* _dot)) ? "such that"
+_sufficesThat :: Prod r Text (Located Token) SourcePos
+_sufficesThat = word "it" <* word "suffices" <* word "to" <* word "show" <* word "that" ? "it suffices to show"
+_suppose :: Prod r Text (Located Token) SourcePos
+_suppose = (word "suppose" <|> word "assume") <* optional (word "that") ? "assume"
+_take :: Prod r Text (Located Token) SourcePos
+_take = word "take" <|> word "consider" ? "take"
+_that :: Prod r Text (Located Token) SourcePos
+_that = word "that" ? "that"
+_the :: Prod r Text (Located Token) SourcePos
+_the = word "the" ? "the"
+_then :: Prod r Text (Located Token) SourcePos
+_then = word "then" ? "then"
+_throughout :: Prod r Text (Located Token) SourcePos
+_throughout = word "throughout" <* optional (word "this" <* word "section") <* optional _comma <|> (word "in" <* word "the" <* word "sequel") ? "throughout"
+_thus :: Prod r Text (Located Token) SourcePos
+_thus = word "thus" <|> word "then" <|> word "hence" <|> word "now" <|> word "finally" <|> word "therefore" ? "thus"
+_trivial :: Prod r Text (Located Token) SourcePos
+_trivial = word "straightforward" <|> word "trivial" ? "trivial"
+_unique :: Prod r Text (Located Token) SourcePos
+_unique = word "unique" ? "unique"
+_write :: Prod r Text (Located Token) SourcePos
+_write = (optional (word "we") *> word "say" <* optional (word "that")) <|> (optional (word "we") *> word "write") ? "write"
+
+-- | Introducing plain claims in proofs.
+_haveIntro :: Prod r Text (Located Token) SourcePos
+_haveIntro = _thus <|> _particularly <|> _have
+
+-- * Symbols
+
+_colon :: Prod r Text (Located Token) SourcePos
+_colon = symbol ":" ? ":"
+_pipe :: Prod r Text (Located Token) SourcePos
+_pipe = symbol "|" <|> command "mid" ? "\\mid"
+_comma :: Prod r Text (Located Token) SourcePos
+_comma = symbol "," ? ","
+_commaAnd :: Prod r Text (Located Token) SourcePos
+_commaAnd = symbol "," <* optional (word "and") ? ", and"
+_commaOr :: Prod r Text (Located Token) SourcePos
+_commaOr = symbol "," <* optional (word "or") ? ", or"
+_defeq :: Prod r Text (Located Token) SourcePos
+_defeq = symbol ":=" ? ":=" -- Should use `\coloneq` from unicode-math as display.
+_dot :: Prod r Text (Located Token) SourcePos
+_dot = symbol "." ? "."
+_eq :: Prod r Text (Located Token) SourcePos
+_eq = symbol "=" ? "="
+_in :: Prod r Text (Located Token) SourcePos
+_in = symbol "∈" <|> command "in" ? "\\in"
+_subseteq :: Prod r Text (Located Token) SourcePos
+_subseteq = command "subseteq" ? ":"
diff --git a/source/Syntax/Import.hs b/source/Syntax/Import.hs
new file mode 100644
index 0000000..33082f1
--- /dev/null
+++ b/source/Syntax/Import.hs
@@ -0,0 +1,43 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE ApplicativeDo #-}
+
+
+module Syntax.Import where
+
+
+import Base
+
+import Text.Regex.Applicative.Text
+import Data.CharSet qualified as CharSet
+import Data.Char (isAlphaNum)
+
+gatherImports :: Text -> [FilePath]
+gatherImports s = case findFirstPrefix imps s of
+ Nothing -> []
+ Just (paths, _) -> paths
+
+
+imps :: RE Char [FilePath]
+imps = do
+ mpath <- optional imp
+ paths <- many (few anySym *> string "\n" *> imp)
+ few anySym
+ begin
+ pure case mpath of
+ Nothing -> paths
+ Just path -> path : paths
+
+
+imp :: RE Char FilePath
+imp = do
+ -- Requiring a newline makes it possible to comment imports out.
+ string "\\import{"
+ path <- few (psym isTheoryNameChar)
+ sym '}'
+ pure path
+
+isTheoryNameChar :: Char -> Bool
+isTheoryNameChar c = isAlphaNum c || c `CharSet.member` CharSet.fromList ".-_/"
+
+begin :: RE Char Text
+begin = string "\\begin{"
diff --git a/source/Syntax/Internal.hs b/source/Syntax/Internal.hs
new file mode 100644
index 0000000..44603ad
--- /dev/null
+++ b/source/Syntax/Internal.hs
@@ -0,0 +1,612 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+-- | Data types for the internal (semantic) syntax tree.
+module Syntax.Internal
+ ( module Syntax.Internal
+ , module Syntax.Abstract
+ , module Syntax.LexicalPhrase
+ , module Syntax.Token
+ ) where
+
+
+import Base
+import Syntax.Lexicon (pattern PairSymbol, pattern ConsSymbol)
+import Syntax.LexicalPhrase (LexicalPhrase, SgPl(..), unsafeReadPhrase, unsafeReadPhraseSgPl)
+import Syntax.Token (Token(..))
+
+import Syntax.Abstract
+ ( Chain(..)
+ , Connective(..)
+ , VarSymbol(..)
+ , FunctionSymbol
+ , RelationSymbol
+ , StructSymbol (..)
+ , Relation
+ , VarSymbol(..)
+ , PropositionalConstant(..)
+ , StructPhrase
+ , Justification(..)
+ , Marker(..)
+ , pattern CarrierSymbol, pattern ConsSymbol
+ )
+
+import Bound
+import Bound.Scope
+import Data.Deriving (deriveShow1, deriveEq1, deriveOrd1)
+import Data.Hashable.Lifted
+import Data.HashMap.Strict qualified as HM
+import Data.Maybe
+import Data.Set qualified as Set
+import Data.List qualified as List
+import Data.List.NonEmpty qualified as NonEmpty
+import Text.Megaparsec.Pos (SourcePos)
+
+-- | 'Symbol's can be used as function and relation symbols.
+data Symbol
+ = SymbolMixfix FunctionSymbol
+ | SymbolFun (SgPl LexicalPhrase)
+ | SymbolInteger Int
+ | SymbolPredicate Predicate
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+data Predicate
+ = PredicateAdj LexicalPhrase
+ | PredicateVerb (SgPl LexicalPhrase)
+ | PredicateNoun (SgPl LexicalPhrase) -- ^ /@\<...\> is a \<...\>@/.
+ | PredicateRelation RelationSymbol
+ | PredicateSymbol Text
+ | PredicateNounStruct (SgPl LexicalPhrase) -- ^ /@\<...\> is a \<...\>@/.
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+data Quantifier
+ = Universally
+ | Existentially
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+type Formula = Term
+type Term = Expr
+type Expr = ExprOf VarSymbol
+
+
+-- | Internal higher-order expressions.
+data ExprOf a
+ = TermVar a
+ -- ^ Fresh constants disjoint from all user-named identifiers.
+ -- These can be used to eliminate higher-order constructs.
+ --
+ | TermSymbol Symbol [ExprOf a]
+ -- ^ Application of a symbol (including function and predicate symbols).
+ | TermSymbolStruct StructSymbol (Maybe (ExprOf a))
+ --
+ | Apply (ExprOf a) (NonEmpty (ExprOf a))
+ -- ^ Higher-order application.
+ --
+ | TermSep VarSymbol (ExprOf a) (Scope () ExprOf a)
+ -- ^ Set comprehension using seperation, e.g.: /@{ x ∈ X | P(x) }@/.
+ --
+ | Iota VarSymbol (Scope () ExprOf a)
+ -- ^ Definite descriptor.
+ --
+ | ReplacePred VarSymbol VarSymbol (ExprOf a) (Scope ReplacementVar ExprOf a)
+ -- ^ Replacement for single-valued predicates. The concrete syntax for these
+ -- syntactically requires a bounded existential quantifier in the condition:
+ --
+ -- /@$\\{ y | \\exists x\\in A. P(x,y) \\}$@/
+ --
+ -- In definitions the single-valuedness of @P@ becomes a proof obligation.
+ -- In other cases we could instead add it as constraint
+ --
+ -- /@$b\\in \\{ y | \\exists x\\in A. P(x,y) \\}$@/
+ -- /@iff@/
+ -- /@$\\exists x\\in A. P(x,y)$ and $P$ is single valued@/
+ --
+ --
+ | ReplaceFun (NonEmpty (VarSymbol, ExprOf a)) (Scope VarSymbol ExprOf a) (Scope VarSymbol ExprOf a)
+ -- ^ Set comprehension using functional replacement,
+ -- e.g.: /@{ f(x, y) | x ∈ X; y ∈ Y; P(x, y) }@/.
+ -- The list of pairs gives the domains, the integers in the scope point to list indices.
+ -- The first scope is the lhs, the optional scope can be used for additional constraints
+ -- on the variables (i.e. implicit separation over the product of the domains).
+ -- An out-of-bound index is an error, since otherwise replacement becomes unsound.
+ --
+ | Connected Connective (ExprOf a) (ExprOf a)
+ | Lambda (Scope VarSymbol ExprOf a)
+ | Quantified Quantifier (Scope VarSymbol ExprOf a)
+ | PropositionalConstant PropositionalConstant
+ | Not (ExprOf a)
+ deriving (Functor, Foldable, Traversable)
+
+data ReplacementVar = ReplacementDomVar | ReplacementRangeVar deriving (Show, Eq, Ord, Generic, Hashable)
+
+makeBound ''ExprOf
+
+deriveShow1 ''ExprOf
+deriveEq1 ''ExprOf
+deriveOrd1 ''ExprOf
+
+deriving instance Show a => Show (ExprOf a)
+deriving instance Eq a => Eq (ExprOf a)
+deriving instance Ord a => Ord (ExprOf a)
+
+deriving instance Generic (ExprOf a)
+deriving instance Generic1 ExprOf
+
+deriving instance Hashable1 ExprOf
+
+deriving instance Hashable a => Hashable (ExprOf a)
+
+
+abstractVarSymbol :: VarSymbol -> ExprOf VarSymbol -> Scope VarSymbol ExprOf VarSymbol
+abstractVarSymbol x = abstract (\y -> if x == y then Just x else Nothing)
+
+abstractVarSymbols :: Foldable t => t VarSymbol -> ExprOf VarSymbol -> Scope VarSymbol ExprOf VarSymbol
+abstractVarSymbols xs = abstract (\y -> if y `elem` xs then Just y else Nothing)
+
+-- | Use the given set of in scope structures to cast them to their carriers
+-- when occurring on the rhs of the element relation.
+-- Use the given 'Map' to annotate (unannotated) structure operations
+-- with the most recent inscope appropriate label.
+annotateWith :: Set VarSymbol -> HashMap StructSymbol VarSymbol -> Formula -> Formula
+annotateWith = go
+ where
+ go :: (Ord a) => Set a -> HashMap StructSymbol a -> ExprOf a -> ExprOf a
+ go labels ops = \case
+ TermSymbolStruct symb Nothing ->
+ TermSymbolStruct symb (TermVar <$> HM.lookup symb ops)
+ e@TermSymbolStruct{} ->
+ e
+ a `IsElementOf` TermVar x | x `Set.member` labels ->
+ go labels ops a `IsElementOf` TermSymbolStruct CarrierSymbol (Just (TermVar x))
+ Not a ->
+ Not (go labels ops a)
+ Connected conn a b ->
+ Connected conn (go labels ops a) (go labels ops b)
+ Quantified quant body ->
+ Quantified quant (toScope (go (Set.map F labels) (F <$> ops) (fromScope body)))
+ e@TermVar{} -> e
+ TermSymbol symb args ->
+ TermSymbol symb (go labels ops <$> args)
+ Apply e1 args ->
+ Apply (go labels ops e1) (go labels ops <$> args)
+ TermSep vs e scope ->
+ TermSep vs (go labels ops e) (toScope (go (Set.map F labels) (F <$> ops) (fromScope scope)))
+ Iota x body ->
+ Iota x (toScope (go (Set.map F labels) (F <$> ops) (fromScope body)))
+ ReplacePred y x xB scope ->
+ ReplacePred y x (go labels ops xB) (toScope (go (Set.map F labels) (F <$> ops) (fromScope scope)))
+ ReplaceFun bounds ap cond ->
+ ReplaceFun
+ (fmap (\(x, e) -> (x, go labels ops e)) bounds)
+ (toScope (go (Set.map F labels) (F <$> ops) (fromScope ap)))
+ (toScope (go (Set.map F labels) (F <$> ops) (fromScope cond)))
+ Lambda body ->
+ Lambda (toScope (go (Set.map F labels) (F <$> ops) (fromScope body)))
+ e@PropositionalConstant{} -> e
+
+containsHigherOrderConstructs :: ExprOf a -> Bool
+containsHigherOrderConstructs = \case
+ TermSep {} -> True
+ Iota{} -> True
+ ReplacePred{}-> True
+ ReplaceFun{}-> True
+ Lambda{} -> True
+ Apply{} -> False -- FIXME: this is a lie in general; we need to add sortchecking to determine this.
+ TermVar{} -> False
+ PropositionalConstant{} -> False
+ TermSymbol _ es -> any containsHigherOrderConstructs es
+ Not e -> containsHigherOrderConstructs e
+ Connected _ e1 e2 -> containsHigherOrderConstructs e1 || containsHigherOrderConstructs e2
+ Quantified _ scope -> containsHigherOrderConstructs (fromScope scope)
+ TermSymbolStruct _ _ -> False
+
+pattern TermOp :: FunctionSymbol -> [ExprOf a] -> ExprOf a
+pattern TermOp op es = TermSymbol (SymbolMixfix op) es
+
+pattern TermConst :: Token -> ExprOf a
+pattern TermConst c = TermOp [Just c] []
+
+pattern TermPair :: ExprOf a -> ExprOf a -> ExprOf a
+pattern TermPair e1 e2 = TermOp PairSymbol [e1, e2]
+
+pattern Atomic :: Predicate -> [ExprOf a] -> ExprOf a
+pattern Atomic symbol args = TermSymbol (SymbolPredicate symbol) args
+
+
+pattern FormulaAdj :: ExprOf a -> LexicalPhrase -> [ExprOf a] -> ExprOf a
+pattern FormulaAdj e adj es = Atomic (PredicateAdj adj) (e:es)
+
+pattern FormulaVerb :: ExprOf a -> SgPl LexicalPhrase -> [ExprOf a] -> ExprOf a
+pattern FormulaVerb e verb es = Atomic (PredicateVerb verb) (e:es)
+
+pattern FormulaNoun :: ExprOf a -> SgPl LexicalPhrase -> [ExprOf a] -> ExprOf a
+pattern FormulaNoun e noun es = Atomic (PredicateNoun noun) (e:es)
+
+relationNoun :: Expr -> Formula
+relationNoun arg = FormulaNoun arg (unsafeReadPhraseSgPl "relation[/s]") []
+
+rightUniqueAdj :: Expr -> Formula
+rightUniqueAdj arg = FormulaAdj arg (unsafeReadPhrase "right-unique") []
+
+-- | Untyped quantification.
+pattern Forall, Exists :: Scope VarSymbol ExprOf a -> ExprOf a
+pattern Forall scope = Quantified Universally scope
+pattern Exists scope = Quantified Existentially scope
+
+makeForall, makeExists :: Foldable t => t VarSymbol -> Formula -> Formula
+makeForall xs e = Quantified Universally (abstractVarSymbols xs e)
+makeExists xs e = Quantified Existentially (abstractVarSymbols xs e)
+
+instantiateSome :: NonEmpty VarSymbol -> Scope VarSymbol ExprOf VarSymbol -> Scope VarSymbol ExprOf VarSymbol
+instantiateSome xs scope = toScope (instantiateEither inst scope)
+ where
+ inst (Left x) | x `elem` xs = TermVar (F x)
+ inst (Left b) = TermVar (B b)
+ inst (Right fv) = TermVar (F fv)
+
+-- | Bind all free variables not occuring in the given set universally
+forallClosure :: Set VarSymbol -> Formula -> Formula
+forallClosure xs phi = if isClosed phi
+ then phi
+ else Quantified Universally (abstract isNamedVar phi)
+ where
+ isNamedVar :: VarSymbol -> Maybe VarSymbol
+ isNamedVar x = if x `Set.member` xs then Nothing else Just x
+
+freeVars :: ExprOf VarSymbol -> Set VarSymbol
+freeVars = Set.fromList . toList
+
+pattern And :: ExprOf a -> ExprOf a -> ExprOf a
+pattern And e1 e2 = Connected Conjunction e1 e2
+
+pattern Or :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Or e1 e2 = Connected Disjunction e1 e2
+
+pattern Implies :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Implies e1 e2 = Connected Implication e1 e2
+
+pattern Iff :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Iff e1 e2 = Connected Equivalence e1 e2
+
+pattern Xor :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Xor e1 e2 = Connected ExclusiveOr e1 e2
+
+
+pattern Bottom :: ExprOf a
+pattern Bottom = PropositionalConstant IsBottom
+
+pattern Top :: ExprOf a
+pattern Top = PropositionalConstant IsTop
+
+
+pattern Relation :: RelationSymbol -> [ExprOf a] -> ExprOf a
+pattern Relation rel es = Atomic (PredicateRelation rel) es
+
+-- | Membership.
+pattern IsElementOf :: ExprOf a -> ExprOf a -> ExprOf a
+pattern IsElementOf e1 e2 = Atomic (PredicateRelation (Command "in")) (e1 : [e2])
+
+-- | Membership.
+pattern IsNotElementOf :: ExprOf a -> ExprOf a -> ExprOf a
+pattern IsNotElementOf e1 e2 = Not (IsElementOf e1 e2)
+
+-- | Subset relation (non-strict).
+pattern IsSubsetOf :: ExprOf a -> ExprOf a -> ExprOf a
+pattern IsSubsetOf e1 e2 = Atomic (PredicateRelation (Command "subseteq")) (e1 : [e2])
+
+-- | Ordinal predicate.
+pattern IsOrd :: ExprOf a -> ExprOf a
+pattern IsOrd e1 = Atomic (PredicateNoun (SgPl [Just "ordinal"] [Just "ordinals"])) [e1]
+
+-- | Equality.
+pattern Equals :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Equals e1 e2 = Atomic (PredicateRelation (Symbol "=")) (e1 : [e2])
+
+-- | Disequality.
+pattern NotEquals :: ExprOf a -> ExprOf a -> ExprOf a
+pattern NotEquals e1 e2 = Atomic (PredicateRelation (Command "neq")) (e1 : [e2])
+
+pattern EmptySet :: ExprOf a
+pattern EmptySet = TermSymbol (SymbolMixfix [Just (Command "emptyset")]) []
+
+makeConjunction :: [ExprOf a] -> ExprOf a
+makeConjunction = \case
+ [] -> Top
+ es -> List.foldl1' And es
+
+makeDisjunction :: [ExprOf a] -> ExprOf a
+makeDisjunction = \case
+ [] -> Bottom
+ es -> List.foldl1' Or es
+
+finiteSet :: NonEmpty (ExprOf a) -> ExprOf a
+finiteSet = foldr cons EmptySet
+ where
+ cons x y = TermSymbol (SymbolMixfix ConsSymbol) [x, y]
+
+isPositive :: ExprOf a -> Bool
+isPositive = \case
+ Not _ -> False
+ _ -> True
+
+dual :: ExprOf a -> ExprOf a
+dual = \case
+ Not f -> f
+ f -> Not f
+
+
+
+-- | Local assumptions.
+data Asm
+ = Asm Formula
+ | AsmStruct VarSymbol StructPhrase
+
+
+deriving instance Show Asm
+deriving instance Eq Asm
+deriving instance Ord Asm
+
+data StructAsm
+ = StructAsm VarSymbol StructPhrase
+
+
+
+data Axiom = Axiom [Asm] Formula
+
+deriving instance Show Axiom
+deriving instance Eq Axiom
+deriving instance Ord Axiom
+
+
+data Lemma = Lemma [Asm] Formula
+
+deriving instance Show Lemma
+deriving instance Eq Lemma
+deriving instance Ord Lemma
+
+
+data Defn
+ = DefnPredicate [Asm] Predicate (NonEmpty VarSymbol) Formula
+ | DefnFun [Asm] (SgPl LexicalPhrase) [VarSymbol] Term
+ | DefnOp FunctionSymbol [VarSymbol] Term
+
+deriving instance Show Defn
+deriving instance Eq Defn
+deriving instance Ord Defn
+
+data Inductive = Inductive
+ { inductiveSymbol :: FunctionSymbol
+ , inductiveParams :: [VarSymbol]
+ , inductiveDomain :: Expr
+ , inductiveIntros :: NonEmpty IntroRule
+ }
+ deriving (Show, Eq, Ord)
+
+data IntroRule = IntroRule
+ { introConditions :: [Formula] -- The inductively defined set may only appear as an argument of monotone operations on the rhs.
+ , introResult :: Formula -- TODO Refine.
+ }
+ deriving (Show, Eq, Ord)
+
+
+data Proof
+ = Omitted
+ -- ^ Ends a proof without further verification.
+ -- This results in a “gap” in the formalization.
+ | Qed Justification
+ -- ^ Ends of a proof, leaving automation to discharge the current goal using the given justification.
+ | ByContradiction Proof
+ -- ^ Take the dual of the current goal as an assumption and
+ -- set the goal to absurdity.
+ | BySetInduction (Maybe Term) Proof
+ -- ^ ∈-induction.
+ | ByOrdInduction Proof
+ -- ^ Transfinite induction for ordinals.
+ | Assume Formula Proof
+ -- ^ Simplify goals that are implications or disjunctions.
+ | Fix (NonEmpty VarSymbol) Formula Proof
+ -- ^ Simplify universal goals (with an optional bound or such that statement)
+ | Take (NonEmpty VarSymbol) Formula Justification Proof
+ -- ^ Use existential assumptions.
+ | Suffices Formula Justification Proof
+ | ByCase [Case]
+ -- ^ Proof by case. Disjunction of the case hypotheses 'Case'
+ -- must hold for this step to succeed. Each case starts a subproof,
+ -- keeping the same goal but adding the case hypothesis as an assumption.
+ -- Often this will be a classical split between /@P@/ and /@not P@/, in
+ -- which case the proof that /@P or not P@/ holds is easy.
+ --
+ | Have Formula Justification Proof
+ -- ^ An affirmation, e.g.: /@We have \<stmt\> by \<ref\>@/.
+ --
+ | Calc Calc Proof
+ | Subclaim Formula Proof Proof
+ -- ^ A claim is a sublemma with its own proof:
+ --
+ -- /@Show \<goal stmt\>. \<steps\>. \<continue other proof\>.@/
+ --
+ -- A successful first proof adds the claimed formula as an assumption
+ -- for the remaining proof.
+ --
+ | Define VarSymbol Term Proof
+ | DefineFunction VarSymbol VarSymbol Term Term Proof
+
+deriving instance Show Proof
+deriving instance Eq Proof
+deriving instance Ord Proof
+
+
+
+-- | A case of a case split.
+data Case = Case
+ { caseOf :: Formula
+ , caseProof :: Proof
+ }
+
+deriving instance Show Case
+deriving instance Eq Case
+deriving instance Ord Case
+
+-- | See 'Syntax.Abstract.Calc'.
+data Calc
+ = Equation Term (NonEmpty (Term, Justification))
+ | Biconditionals Term (NonEmpty (Term, Justification))
+
+deriving instance Show Calc
+deriving instance Eq Calc
+deriving instance Ord Calc
+
+calcResult :: Calc -> ExprOf VarSymbol
+calcResult = \case
+ Equation e eqns -> e `Equals` fst (NonEmpty.last eqns)
+ Biconditionals phi phis -> phi `Iff` fst (NonEmpty.last phis)
+
+calculation :: Calc -> [(ExprOf VarSymbol, Justification)]
+calculation = \case
+ Equation e1 eqns@((e2, jst) :| _) -> (e1 `Equals` e2, jst) : collectEquations (toList eqns)
+ Biconditionals p1 ps@((p2, jst) :| _) -> (p1 `Iff` p2, jst) : collectBiconditionals (toList ps)
+
+
+collectEquations :: [(ExprOf a, b)] -> [(ExprOf a, b)]
+collectEquations = \case
+ (e1, _) : eqns'@((e2, jst) : _) -> (e1 `Equals` e2, jst) : collectEquations eqns'
+ _ -> []
+
+collectBiconditionals :: [(ExprOf a, b)] -> [(ExprOf a, b)]
+collectBiconditionals = \case
+ (p1, _) : ps@((p2, jst) : _) -> (p1 `Iff` p2, jst) : collectEquations ps
+ _ -> []
+
+
+newtype Datatype
+ = DatatypeFin (NonEmpty Text)
+ deriving (Show, Eq, Ord)
+
+
+data Signature
+ = SignaturePredicate Predicate (NonEmpty VarSymbol)
+ | SignatureFormula Formula -- TODO: Reconsider, this is pretty lossy.
+
+deriving instance Show Signature
+deriving instance Eq Signature
+deriving instance Ord Signature
+
+data StructDefn = StructDefn
+ { structPhrase :: StructPhrase
+ -- ^ The noun phrase naming the structure, e.g.: @partial order@ or @abelian group@.
+ , structParents :: Set StructPhrase
+ , structDefnLabel :: VarSymbol
+ , structDefnFixes :: Set StructSymbol
+ -- ^ List of commands representing operations,
+ -- e.g.: @\\contained@ or @\\inv@. These are used as default operation names
+ -- in instantiations such as @Let $G$ be a group@.
+ -- The commands should be set up to handle an optional struct label
+ -- which would typically be rendered as a sub- or superscript, e.g.:
+ -- @\\contained[A]@ could render as ”⊑ᴬ“.
+ -- --
+ , structDefnAssumes :: [(Marker, Formula)]
+ -- ^ The assumption or axioms of the structure.
+ -- To be instantiate with the @structFixes@ of a given structure.
+ }
+
+deriving instance Show StructDefn
+deriving instance Eq StructDefn
+deriving instance Ord StructDefn
+
+
+data Abbreviation
+ = Abbreviation Symbol (Scope Int ExprOf Void)
+ deriving (Show, Eq, Ord)
+
+data Block
+ = BlockAxiom SourcePos Marker Axiom
+ | BlockLemma SourcePos Marker Lemma
+ | BlockProof SourcePos Proof
+ | BlockDefn SourcePos Marker Defn
+ | BlockAbbr SourcePos Marker Abbreviation
+ | BlockStruct SourcePos Marker StructDefn
+ | BlockInductive SourcePos Marker Inductive
+ | BlockSig SourcePos [Asm] Signature
+ deriving (Show, Eq, Ord)
+
+
+data Task = Task
+ { taskDirectness :: Directness
+ , taskHypotheses :: [(Marker, Formula)] -- ^ No guarantees on order.
+ , taskConjectureLabel :: Marker
+ , taskConjecture :: Formula
+ } deriving (Show, Eq, Generic, Hashable)
+
+
+-- | Indicates whether a given proof is direct or indirect.
+-- An indirect proof (i.e. a proof by contradiction) may
+-- cause an ATP to emit a warning about contradictory axioms.
+-- When we know that the proof is indirect, we want to ignore
+-- this warning. For relevance filtering we also want to know
+-- what our actual goal is, so we keep the original conjecture.
+data Directness
+ = Indirect Formula -- ^ The former conjecture.
+ | Direct
+ deriving (Show, Eq, Generic, Hashable)
+
+isIndirect :: Task -> Bool
+isIndirect task = case taskDirectness task of
+ Indirect _ -> True
+ Direct -> False
+
+
+-- | Boolean contraction of a task.
+contractionTask :: Task -> Task
+contractionTask task = task
+ { taskHypotheses = mapMaybe contract (taskHypotheses task)
+ , taskConjecture = contraction (taskConjecture task)
+ }
+
+contract :: (Marker, Formula) -> Maybe (Marker, Formula)
+contract (m, phi) = case contraction phi of
+ Top -> Nothing
+ phi' -> Just (m, phi')
+
+
+
+-- | Full boolean contraction.
+contraction :: ExprOf a -> ExprOf a
+contraction = \case
+ Connected conn f1 f2 -> atomicContraction (Connected conn (contraction f1) (contraction f2))
+ Quantified quant scope -> atomicContraction (Quantified quant (hoistScope contraction scope))
+ Not f -> Not (contraction f)
+ f -> f
+
+
+-- | Atomic boolean contraction.
+atomicContraction :: ExprOf a -> ExprOf a
+atomicContraction = \case
+ Top `Iff` f -> f
+ Bottom `Iff` f -> Not f
+ f `Iff` Top -> f
+ f `Iff` Bottom -> Not f
+
+ Top `Implies` f -> f
+ Bottom `Implies` _ -> Top
+ _ `Implies` Top -> Top
+ f `Implies` Bottom -> Not f
+
+ Top `And` f -> f
+ Bottom `And` _ -> Bottom
+ f `And` Top -> f
+ _ `And` Bottom -> Bottom
+
+ phi@(Quantified _quant scope) -> case unscope scope of
+ Top -> Top
+ Bottom -> Bottom
+ _ -> phi
+
+ Not Top -> Bottom
+ Not Bottom -> Top
+
+ f -> f
diff --git a/source/Syntax/LexicalPhrase.hs b/source/Syntax/LexicalPhrase.hs
new file mode 100644
index 0000000..1743255
--- /dev/null
+++ b/source/Syntax/LexicalPhrase.hs
@@ -0,0 +1,93 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Syntax.LexicalPhrase where
+
+
+import Base
+import Syntax.Token (Token(..))
+
+import Data.Char (isAlpha)
+import Data.Text qualified as Text
+import Text.Earley.Mixfix (Holey)
+import Text.Earley (Grammar, Prod, (<?>), fullParses, parser, rule, token, satisfy)
+
+
+
+-- | 'LexicalPhrase's should be nonempty lists with at least one proper word token.
+-- Hyphens and quotes in words are treated as letters.
+-- Thus /@manifold-with-boundary@/ is a singleton lexical phrase (one word).
+--
+type LexicalPhrase = Holey Token
+
+-- MAYBE Add this instance by making LexicalPhrase a proper Type?
+-- Until then we can use the default instance for lists of prettyprintable things.
+--
+-- instance Pretty LexicalPhrase where
+-- pretty components = hsep (prettyComponent <$> components)
+-- where
+-- prettyComponent = \case
+-- Nothing -> "_"
+-- Just tok -> pretty tok
+
+
+
+-- | Split data by grammatical number (singular/plural).
+-- The 'Eq' and 'Ord' instances only consider the singular
+-- form so that we can prefer known irregular plurals over
+-- guessed irregular plurals when inserting items into
+-- the 'Lexicon'.
+data SgPl a
+ = SgPl {sg :: a, pl :: a}
+ deriving (Show, Functor, Generic, Hashable)
+
+instance Eq a => Eq (SgPl a) where (==) = (==) `on` sg
+instance Ord a => Ord (SgPl a) where compare = compare `on` sg
+
+
+unsafeReadPhrase :: String -> LexicalPhrase
+unsafeReadPhrase spec = case fst (fullParses (parser lexicalPhraseSpec) spec) of
+ pat : _ -> pat
+ _ -> error "unsafeReadPhrase failed"
+
+unsafeReadPhraseSgPl :: String -> SgPl LexicalPhrase
+unsafeReadPhraseSgPl spec = case fst (fullParses (parser lexicalPhraseSpecSgPl) spec) of
+ pat : _ -> pat
+ _ -> error "unsafeReadPhraseSgPl failed"
+
+
+lexicalPhraseSpec :: Grammar r (Prod r String Char LexicalPhrase)
+lexicalPhraseSpec = do
+ hole <- rule $ Nothing <$ token '?' <?> "hole"
+ word <- rule $ Just <$> many (satisfy (\c -> isAlpha c || c == '-'))
+ space <- rule $ Just . (:[]) <$> token ' '
+ segment <- rule $ hole <|> word
+ rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
+ where
+ makePhrase :: [Maybe String] -> LexicalPhrase
+ makePhrase pat = fmap makeWord pat
+
+
+lexicalPhraseSpecSgPl :: Grammar r (Prod r String Char (SgPl LexicalPhrase))
+lexicalPhraseSpecSgPl = do
+ space <- rule $ Just . (:[]) <$> token ' '
+ hole <- rule $ (Nothing, Nothing) <$ token '?'<?> "hole"
+
+ word <- rule (many (satisfy isAlpha) <?> "word")
+ wordSgPl <- rule $ (,) <$> (token '[' *> word) <* token '/' <*> word <* token ']'
+ complexWord <- rule $ (\(a,b) -> (Just a, Just b)) . fuse <$>
+ many ((<>) <$> (dup <$> word) <*> wordSgPl) <?> "word"
+ segment <- rule (hole <|> (dup . Just <$> word) <|> complexWord )
+ rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
+ where
+ dup x = (x,x)
+ fuse = \case
+ (a, b) : (c, d) : rest -> fuse ((a <> c, b <> d) : rest)
+ [(a, b)] -> (a, b)
+ _ -> error "Syntax.Abstract.fuse"
+
+ makePhrase :: [(Maybe String, Maybe String)] -> SgPl LexicalPhrase
+ makePhrase = (\(patSg, patPl) -> SgPl (fmap makeWord patSg) (fmap makeWord patPl)) . unzip
+
+makeWord :: Maybe String -> Maybe Token
+makeWord = fmap (Word . Text.pack)
diff --git a/source/Syntax/Lexicon.hs b/source/Syntax/Lexicon.hs
new file mode 100644
index 0000000..805b875
--- /dev/null
+++ b/source/Syntax/Lexicon.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | The 'Lexicon' describes the part of the grammar that extensible/dynamic.
+--
+-- The items of the 'Lexicon' are organized by their meaning and their
+-- syntactic behaviour. They are typically represented as some kind of
+-- pattern data which is then used to generate various production rules
+-- for the concrete grammar. This representation makes inspection and
+-- extension easier.
+--
+
+module Syntax.Lexicon
+ ( module Syntax.Lexicon
+ , pattern ConsSymbol
+ , pattern PairSymbol
+ , pattern CarrierSymbol
+ , pattern ApplySymbol
+ , pattern DomSymbol
+ ) where
+
+
+import Base
+import Syntax.Abstract
+
+import Data.List qualified as List
+import Data.Sequence qualified as Seq
+import Data.HashSet qualified as Set
+import Data.HashMap.Strict qualified as HM
+import Data.Text qualified as Text
+import Text.Earley.Mixfix (Holey, Associativity(..))
+
+
+data Lexicon = Lexicon
+ { lexiconMixfix :: Seq (HashMap (Holey Token) (Associativity, Marker))
+ , lexiconConnectives :: [[(Holey Token, Associativity)]]
+ , lexiconPrefixPredicates :: LexicalItems PrefixPredicate
+ , lexiconStructFun :: LexicalItems StructSymbol
+ , lexiconRelationSymbols :: LexicalItems RelationSymbol
+ , lexiconVerbs :: LexicalItems (SgPl LexicalPhrase)
+ , lexiconAdjLs :: LexicalItems LexicalPhrase
+ , lexiconAdjRs :: LexicalItems LexicalPhrase
+ , lexiconNouns :: LexicalItems (SgPl LexicalPhrase)
+ , lexiconStructNouns :: LexicalItems (SgPl LexicalPhrase)
+ , lexiconFuns :: LexicalItems (SgPl LexicalPhrase)
+ } deriving (Show, Eq)
+
+-- Projection returning the union of both left and right attributes.
+--
+lexiconAdjs :: Lexicon -> HashMap LexicalPhrase Marker
+lexiconAdjs lexicon = lexiconAdjLs lexicon <> lexiconAdjRs lexicon
+
+lookupOp :: FunctionSymbol -> Seq (HashMap FunctionSymbol (assoc, Marker)) -> Either String Marker
+lookupOp f ops = case snd <$> firstJust (HM.lookup f) ops of
+ Just m -> Right m
+ Nothing -> Left (show f)
+
+lookupLexicalItem :: (Hashable a, Show a) => a -> LexicalItems a -> Either String Marker
+lookupLexicalItem a items = case HM.lookup a items of
+ Just m -> Right m
+ Nothing -> Left (show a)
+
+type LexicalItems a = HashMap a Marker
+
+builtins :: Lexicon
+builtins = Lexicon
+ { lexiconMixfix = builtinMixfix
+ , lexiconPrefixPredicates = builtinPrefixPredicates
+ , lexiconStructFun = builtinStructOps
+ , lexiconConnectives = builtinConnectives
+ , lexiconRelationSymbols = builtinRelationSymbols
+ , lexiconAdjLs = mempty
+ , lexiconAdjRs = builtinAdjRs
+ , lexiconVerbs = builtinVerbs
+ , lexiconNouns = builtinNouns
+ , lexiconStructNouns = builtinStructNouns
+ , lexiconFuns = mempty
+ }
+
+-- INVARIANT: 10 precedence levels for now.
+builtinMixfix :: Seq (HashMap FunctionSymbol (Associativity, Marker))
+builtinMixfix = Seq.fromList $ (HM.fromList <$>)
+ [ []
+ , [binOp (Symbol "+") LeftAssoc "add", binOp (Command "union") LeftAssoc "union", binOp (Command "monus") LeftAssoc "monus"]
+ , [binOp (Command "relcomp") LeftAssoc "relcomp"]
+ , [binOp (Command "circ") LeftAssoc "circ"]
+ , [binOp (Command "mul") LeftAssoc "mul", binOp (Command "inter") LeftAssoc "inter"]
+ , [binOp (Command "setminus") LeftAssoc "setminus"]
+ , [binOp (Command "times") RightAssoc "times"]
+ , []
+ , prefixOps
+ , builtinIdentifiers
+ ]
+ where
+ builtinIdentifiers :: [(FunctionSymbol, (Associativity, Marker))]
+ builtinIdentifiers = identifier <$>
+ [ "emptyset"
+ , "naturals"
+ , "rationals"
+ , "reals"
+ , "unit"
+ , "zero"
+ ]
+
+prefixOps :: [(FunctionSymbol, (Associativity, Marker))]
+prefixOps =
+ [ ([Just (Command "unions"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "unions"))
+ , ([Just (Command "cumul"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "cumul"))
+ , ([Just (Command "fst"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "fst"))
+ , ([Just (Command "snd"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "snd"))
+ , ([Just (Command "pow"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "pow"))
+ , (ConsSymbol, (NonAssoc, "cons"))
+ , (PairSymbol, (NonAssoc, "pair"))
+ -- NOTE Is now defined and hence no longer necessary , (ApplySymbol, (NonAssoc, "apply"))
+ ]
+
+
+builtinStructOps :: LexicalItems StructSymbol
+builtinStructOps = HM.fromList
+ [ (CarrierSymbol, "carrier")
+ ]
+
+identifier :: Text -> (Holey Token, (Associativity, Marker))
+identifier cmd = ([Just (Command cmd)], (NonAssoc, Marker cmd))
+
+
+builtinRelationSymbols :: LexicalItems RelationSymbol
+builtinRelationSymbols = HM.fromList
+ [ (Symbol "=", "eq")
+ , (Command "neq", "neq")
+ , (Command "in", "elem")
+ , (Command "notin", "notelem") -- Alternative to @\not\in@.
+ ]
+
+builtinPrefixPredicates :: LexicalItems PrefixPredicate
+builtinPrefixPredicates = HM.fromList
+ [ (PrefixPredicate "Cong" 4, "cong")
+ , (PrefixPredicate "Betw" 3, "betw")
+ ]
+
+
+builtinConnectives :: [[(Holey Token, Associativity)]]
+builtinConnectives =
+ [ [binOp' (Command "iff") NonAssoc]
+ , [binOp' (Command "implies") RightAssoc]
+ , [binOp' (Command "lor") LeftAssoc]
+ , [binOp' (Command "land") LeftAssoc]
+ , [([Just (Command "lnot"), Nothing], NonAssoc)]
+ ]
+
+
+binOp :: Token -> Associativity -> Marker -> (Holey Token, (Associativity, Marker))
+binOp tok assoc m = ([Nothing, Just tok, Nothing], (assoc, m))
+
+binOp' :: Token -> Associativity -> (Holey Token, Associativity)
+binOp' tok assoc = ([Nothing, Just tok, Nothing], assoc)
+
+builtinAdjRs :: LexicalItems LexicalPhrase
+builtinAdjRs = HM.fromList
+ [ (unsafeReadPhrase "equal to ?", "eq")
+ ]
+
+builtinVerbs :: LexicalItems (SgPl LexicalPhrase)
+builtinVerbs = HM.fromList
+ [ (unsafeReadPhraseSgPl "equal[s/] ?", "eq")
+ ]
+
+
+-- Some of these do/should correspond to mathlib structures,
+-- e.g.: lattice, complete lattice, ring, etc.
+--
+builtinNouns :: LexicalItems (SgPl LexicalPhrase)
+builtinNouns = HM.mapKeys unsafeReadPhraseSgPl (HM.fromList
+ -- Nullary
+ [ ("set[/s]", "set")
+ , ("point[/s]", "point")
+ , ("element[/s] of ?", "elem")
+ ])
+
+_Onesorted :: SgPl LexicalPhrase
+_Onesorted = unsafeReadPhraseSgPl "onesorted structure[/s]"
+
+builtinStructNouns :: LexicalItems (SgPl LexicalPhrase)
+builtinStructNouns = HM.singleton _Onesorted "onesorted_structure"
+
+
+-- | Naïve splitting of lexical phrases to insert a variable slot for names in noun phrases,
+-- as in /@there exists a linear form $h$ on $E$@/, where the underlying pattern is
+-- /@linear form on ?@/. In this case we would get:
+--
+-- > splitOnVariableSlot (sg (unsafeReadPhraseSgPl "linear form[/s] on ?"))
+-- > ==
+-- > (unsafeReadPhrase "linear form", unsafeReadPhrase "on ?")
+--
+splitOnVariableSlot :: LexicalPhrase -> (LexicalPhrase, LexicalPhrase)
+splitOnVariableSlot phrase = case prepositionIndices <> nonhyphenatedSlotIndices of
+ [] -> (phrase, []) -- Place variable slot at the end.
+ is -> List.splitAt (minimum is) phrase
+ where
+ prepositionIndices, slotIndices, nonhyphenatedSlotIndices :: [Int] -- Ascending.
+ prepositionIndices = List.findIndices isPreposition phrase
+ slotIndices = List.findIndices isNothing phrase
+ nonhyphenatedSlotIndices = [i | i <- slotIndices, noHyphen (nth (i + 1) phrase)]
+
+ isPreposition :: Maybe Token -> Bool
+ isPreposition = \case
+ Just (Word w) -> w `Set.member` prepositions
+ _ -> False
+
+ noHyphen :: Maybe (Maybe Token) -> Bool
+ noHyphen = \case
+ Just (Just (Word w)) -> Text.head w /= '-'
+ -- If we arrive here, either the pattern is over (`Nothing`) or the next
+ -- part of the pattern is not a word that starts with a hyphen.
+ _ -> True
+
+
+-- Preposition are a closed class, but this list is not yet exhaustive.
+-- It can and should be extended when needed. The following list is a
+-- selection of the prepositions found at
+-- https://en.wikipedia.org/wiki/List_of_English_prepositions.
+--
+prepositions :: HashSet Text
+prepositions = Set.fromList
+ [ "about"
+ , "above"
+ , "across"
+ , "after"
+ , "against"
+ , "along", "alongside"
+ , "amid", "amidst"
+ , "among"
+ , "around"
+ , "as"
+ , "at"
+ , "atop"
+ , "before"
+ , "behind"
+ , "below"
+ , "beneath"
+ , "beside", "besides"
+ , "between"
+ , "beyond"
+ , "but"
+ , "by"
+ , "except"
+ , "for"
+ , "from"
+ , "in", "inside", "into"
+ , "like"
+ , "modulo", "mod"
+ , "near"
+ , "next"
+ , "of"
+ , "off"
+ , "on"
+ , "onto"
+ , "opposite"
+ , "out"
+ , "over"
+ , "past"
+ , "per"
+ , "sans"
+ , "till"
+ , "to"
+ , "under"
+ , "underneath"
+ , "unlike"
+ , "unto"
+ , "up", "upon"
+ , "versus"
+ , "via"
+ , "with"
+ , "within"
+ , "without"
+ ]
diff --git a/source/Syntax/Token.hs b/source/Syntax/Token.hs
new file mode 100644
index 0000000..7c8606c
--- /dev/null
+++ b/source/Syntax/Token.hs
@@ -0,0 +1,438 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- |
+-- This module defines the lexer and its associated data types.
+-- The lexer takes `Text` as input and produces a stream of tokens
+-- annotated with positional information. This information is bundled
+-- together with the original raw input for producing error messages.
+--
+-- The lexer perfoms some normalizations to make describing the grammar easier.
+-- Words outside of math environments are case-folded. Some commands are analysed
+-- as variable tokens and are equivalent to their respective unicode variants
+-- (α, β, γ, ..., 𝔸, 𝔹, ℂ, ...). Similarly, @\\begin{...}@ and @\\end{...}@ commands
+-- are each parsed as single tokens.
+--
+module Syntax.Token
+ ( Token(..)
+ , tokToString
+ , tokToText
+ , TokStream(..)
+ , Located(..)
+ , runLexer
+ ) where
+
+
+import Base hiding (many)
+
+import Control.Monad.Combinators
+import Control.Monad.State.Strict
+import Data.Char (isAsciiLower)
+import Data.List.NonEmpty qualified as NonEmpty
+import Data.Text qualified as Text
+import Prettyprinter (Pretty(..))
+import Text.Megaparsec hiding (Token, Label, label)
+import Text.Megaparsec.Char qualified as Char
+import Text.Megaparsec.Char.Lexer qualified as Lexer
+
+
+runLexer :: String -> Text -> Either (ParseErrorBundle Text Void) [Located Token]
+runLexer file raw = runParser (evalStateT toks initLexerState) file raw
+
+
+type Lexer = StateT LexerState (Parsec Void Text)
+
+
+data LexerState = LexerState
+ { textNesting :: !Int
+ -- ^ Represents nesting of braces inside of the @\text{...}@
+ -- command. When we encounter @\text@ the token mode switches
+ -- to text tokens. In order to switch back to math mode correctly
+ -- we need to count the braces.
+ , mode :: !Mode
+ } deriving (Show, Eq)
+
+initLexerState :: LexerState
+initLexerState = LexerState 0 TextMode
+
+incrNesting, decrNesting :: LexerState -> LexerState
+incrNesting (LexerState n m) = LexerState (succ n) m
+decrNesting (LexerState n m) = LexerState (pred n) m
+
+data Mode = TextMode | MathMode deriving (Show, Eq)
+
+isTextMode, isMathMode :: Lexer Bool
+isTextMode = do
+ m <- gets mode
+ pure (m == TextMode)
+isMathMode = do
+ m <- gets mode
+ pure (m == MathMode)
+
+setTextMode, setMathMode :: Lexer ()
+setTextMode = do
+ st <- get
+ put st{mode = TextMode}
+setMathMode = do
+ st <- get
+ put st{mode = MathMode}
+
+-- |
+-- A token stream as input stream for a parser. Contains the raw input
+-- before tokenization as 'Text' for showing error messages.
+--
+data TokStream = TokStream
+ { rawInput :: !Text
+ , unTokStream :: ![Located Token]
+ } deriving (Show, Eq)
+
+instance Semigroup TokStream where
+ TokStream raw1 toks1 <> TokStream raw2 toks2 = TokStream (raw1 <> raw2) (toks1 <> toks2)
+
+instance Monoid TokStream where
+ mempty = TokStream mempty mempty
+
+-- | A LaTeX token.
+-- Invisible delimiters 'InvisibleBraceL' and 'InvisibleBraceR' are
+-- unescaped braces used for grouping in TEX (@{@),
+-- visibles braces are escaped braces (@\\{@).
+data Token
+ = Word !Text
+ | Variable !Text
+ | Symbol !Text
+ | Integer !Int
+ | Command !Text
+ | Label Text -- ^ A /@\\label{...}@/ command (case-sensitive).
+ | Ref (NonEmpty Text) -- ^ A /@\\ref{...}@/ command (case-sensitive).
+ | BeginEnv !Text
+ | EndEnv !Text
+ | ParenL | ParenR
+ | BracketL | BracketR
+ | VisibleBraceL | VisibleBraceR
+ | InvisibleBraceL | InvisibleBraceR
+ deriving (Show, Eq, Ord, Generic, Hashable)
+
+instance IsString Token where
+ fromString w = Word (Text.pack w)
+
+tokToText :: Token -> Text
+tokToText = \case
+ Word w -> w
+ Variable v -> v
+ Symbol s -> s
+ Integer n -> Text.pack (show n)
+ Command cmd -> Text.cons '\\' cmd
+ Label m -> "\\label{" <> m <> "}"
+ Ref ms -> "\\ref{" <> Text.intercalate ", " (toList ms) <> "}"
+ BeginEnv "math" -> "$"
+ EndEnv "math" -> "$"
+ BeginEnv env -> "\\begin{" <> env <> "}"
+ EndEnv env -> "\\end{" <> env <> "}"
+ ParenL -> "("
+ ParenR -> ")"
+ BracketL -> "["
+ BracketR -> "]"
+ VisibleBraceL -> "\\{"
+ VisibleBraceR -> "\\}"
+ InvisibleBraceL -> "{"
+ InvisibleBraceR -> "}"
+
+tokToString :: Token -> String
+tokToString = Text.unpack . tokToText
+
+instance Pretty Token where
+ pretty = \case
+ Word w -> pretty w
+ Variable v -> pretty v
+ Symbol s -> pretty s
+ Integer n -> pretty n
+ Command cmd -> "\\" <> pretty cmd
+ Label m -> "\\label{" <> pretty m <> "}"
+ Ref m -> "\\ref{" <> pretty m <> "}"
+ BeginEnv env -> "\\begin{" <> pretty env <> "}"
+ EndEnv env -> "\\end{" <> pretty env <> "}"
+ ParenL -> "("
+ ParenR -> ")"
+ BracketL -> "["
+ BracketR -> "]"
+ VisibleBraceL -> "\\{"
+ VisibleBraceR -> "\\}"
+ InvisibleBraceL -> "{"
+ InvisibleBraceR -> "}"
+
+
+data Located a = Located
+ { startPos :: !SourcePos
+ , unLocated :: !a
+ } deriving (Show)
+
+instance Eq a => Eq (Located a) where (==) = (==) `on` unLocated
+instance Ord a => Ord (Located a) where compare = compare `on` unLocated
+
+
+-- | Parses tokens, switching tokenizing mode when encountering math environments.
+toks :: Lexer [Located Token]
+toks = whitespace *> goNormal id <* eof
+ where
+ goNormal f = do
+ r <- optional tok
+ case r of
+ Nothing -> pure (f [])
+ Just t@(Located _ (BeginEnv "math")) -> goMath (f . (t:))
+ Just t@(Located _ (BeginEnv "align*")) -> goMath (f . (t:))
+ Just t -> goNormal (f . (t:))
+ goText f = do
+ r <- optional textToken
+ case r of
+ Nothing -> pure (f [])
+ Just t@(Located _ (BeginEnv "math")) -> goMathInText (f . (t:))
+ Just t@(Located _ (EndEnv "text")) -> goMath (f . (t:))
+ Just t@(Located _ (EndEnv "explanation")) -> goMath (f . (t:))
+ Just t -> goText (f . (t:))
+ goMath f = do
+ r <- optional mathToken
+ case r of
+ Nothing -> pure (f [])
+ Just t@(Located _ (EndEnv "math")) -> goNormal (f . (t:))
+ Just t@(Located _ (EndEnv "align*")) -> goNormal (f . (t:))
+ Just t@(Located _ (BeginEnv "text")) -> goText (f . (t:))
+ Just t@(Located _ (BeginEnv "explanation")) -> goText (f . (t:))
+ Just t -> goMath (f . (t:))
+ goMathInText f = do
+ r <- optional mathToken
+ case r of
+ Nothing -> pure (f [])
+ Just t@(Located _ (EndEnv "math")) -> goText (f . (t:))
+ Just t@(Located _ (BeginEnv "text")) -> goText (f . (t:))
+ Just t -> goMathInText (f . (t:))
+{-# INLINE toks #-}
+
+-- | Parses a single normal mode token.
+tok :: Lexer (Located Token)
+tok =
+ word <|> var <|> symbol <|> mathBegin <|> alignBegin <|> begin <|> end <|> opening <|> closing <|> label <|> ref <|> command
+
+-- | Parses a single math mode token.
+mathToken :: Lexer (Located Token)
+mathToken =
+ var <|> symbol <|> number <|> begin <|> alignEnd <|> end <|> opening <|> closing <|> beginText <|> beginExplanation <|> mathEnd <|> command
+
+beginText :: Lexer (Located Token)
+beginText = lexeme do
+ Char.string "\\text{"
+ setTextMode
+ pure (BeginEnv "text")
+
+-- | Same as text modulo spacing, so we treat it synonymously
+beginExplanation :: Lexer (Located Token)
+beginExplanation = lexeme do
+ Char.string "\\explanation{"
+ setTextMode
+ pure (BeginEnv "text")
+
+-- | Normal mode embedded into math mode via @\text{...}@.
+textToken :: Lexer (Located Token)
+textToken = word <|> symbol <|> begin <|> end <|> textEnd <|> mathBegin <|> alignBegin <|> opening' <|> closing' <|> ref <|> command
+ where
+ textEnd = lexeme do
+ 0 <- gets textNesting -- Otherwise fail.
+ Char.char '}'
+ setMathMode
+ pure (EndEnv "text")
+
+ opening' = lexeme (brace <|> group <|> paren <|> bracket)
+ where
+ brace = VisibleBraceL <$ lexeme (Char.string "\\{")
+ group = InvisibleBraceL <$ lexeme (Char.char '{') <* modify' incrNesting
+ paren = ParenL <$ lexeme (Char.char '(')
+ bracket = BracketL <$ lexeme (Char.char '[')
+
+ closing' = lexeme (brace <|> group <|> paren <|> bracket)
+ where
+ brace = VisibleBraceR <$ lexeme (Char.string "\\}")
+ group = InvisibleBraceR <$ lexeme (Char.char '}') <* modify' decrNesting
+ paren = ParenR <$ lexeme (Char.char ')')
+ bracket = BracketR <$ lexeme (Char.char ']')
+
+
+-- | Parses a single begin math token.
+mathBegin :: Lexer (Located Token)
+mathBegin = guardM isTextMode *> lexeme do
+ Char.string "\\(" <|> Char.string "\\[" <|> Char.string "$"
+ setMathMode
+ pure (BeginEnv "math")
+
+alignBegin :: Lexer (Located Token)
+alignBegin = guardM isTextMode *> lexeme do
+ Char.string "\\begin{align*}"
+ setMathMode
+ pure (BeginEnv "align*")
+
+
+-- | Parses a single end math token.
+mathEnd :: Lexer (Located Token)
+mathEnd = guardM isMathMode *> lexeme do
+ Char.string "\\)" <|> Char.string "\\]" <|> Char.string "$"
+ setTextMode
+ pure (EndEnv "math")
+
+alignEnd :: Lexer (Located Token)
+alignEnd = guardM isMathMode *> lexeme do
+ Char.string "\\end{align*}"
+ setTextMode
+ pure (EndEnv "align*")
+
+
+-- | Parses a word. Words are returned casefolded, since we want to ignore their case later on.
+word :: Lexer (Located Token)
+word = guardM isTextMode *> lexeme do
+ w <- some (Char.letterChar <|> Char.char '\'' <|> Char.char '-')
+ let t = Word (Text.toCaseFold (Text.pack w))
+ pure t
+
+number :: Lexer (Located Token)
+number = lexeme $ Integer <$> Lexer.decimal
+
+
+var :: Lexer (Located Token)
+var = guardM isMathMode *> lexeme (fmap Variable var')
+ where
+ var' = do
+ alphabeticPart <- letter <|> bb <|> greek
+ variationPart <- subscriptNumber <|> ticked <|> pure ""
+ pure (alphabeticPart <> variationPart)
+
+ subscriptNumber :: Lexer Text
+ subscriptNumber = do
+ Char.char '_'
+ n <- some Char.digitChar
+ pure (Text.pack n)
+
+ -- Temporary hack to fit the TPTP format.
+ ticked :: Lexer Text
+ ticked = do
+ ticks <- some $ Char.char '\''
+ let ticks' = "prime" <$ ticks :: [Text]
+ pure (Text.concat ticks')
+
+ letter :: Lexer Text
+ letter = fmap Text.singleton Char.letterChar
+
+ greek :: Lexer Text
+ greek = try do
+ Char.char '\\'
+ l <- symbolParser greeks
+ notFollowedBy Char.letterChar
+ pure l
+
+ greeks :: [Text]
+ greeks =
+ [ "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta"
+ , "iota", "kappa", "lambda", "mu", "nu", "xi", "pi", "rho", "sigma"
+ , "tau", "upsilon", "phi", "chi", "psi", "omega"
+ , "Gamma", "Delta", "Theta", "Lambda", "Xi", "Pi", "Sigma", "Upsilon"
+ , "Phi", "Psi", "Omega"
+ ]
+
+ bb :: Lexer Text
+ bb = do
+ Char.string "\\mathbb{"
+ l <- symbolParser bbs
+ Char.char '}'
+ pure $ "bb" <> l
+
+ bbs :: [Text]
+ bbs = Text.singleton <$> ['A'..'Z']
+
+
+ symbolParser :: [Text] -> Lexer Text
+ symbolParser symbols = asum (fmap Char.string symbols)
+
+
+symbol :: Lexer (Located Token)
+symbol = lexeme do
+ symb <- some (satisfy (`elem` symbols))
+ pure (Symbol (Text.pack symb))
+ where
+ symbols :: [Char]
+ symbols = ".,:;!?@=≠+-/|^><≤≥*&≈⊂⊃⊆⊇∈“”‘’"
+
+-- | Parses a TEX-style command.
+command :: Lexer (Located Token)
+command = lexeme do
+ Char.char '\\'
+ cmd <- some Char.letterChar
+ pure (Command (Text.pack cmd))
+
+-- | Parses the beginning of an environment.
+-- Commits only after having seen "\begin{".
+begin :: Lexer (Located Token)
+begin = lexeme do
+ Char.string "\\begin{"
+ env <- some (Char.letterChar <|> Char.char '*')
+ Char.char '}'
+ pure (BeginEnv (Text.pack env))
+
+-- | Parses a label command and extracts its marker.
+label :: Lexer (Located Token)
+label = lexeme do
+ Char.string "\\label{"
+ m <- marker
+ Char.char '}'
+ pure (Label m)
+
+-- | Parses a label command and extracts its marker.
+ref :: Lexer (Located Token)
+ref = lexeme do
+ -- @\\cref@ is from @cleveref@ and @\\hyperref@ is from @hyperref@
+ cmd <- Char.string "\\ref{" <|> Char.string "\\cref{" <|> Char.string "\\hyperref["
+ ms <- NonEmpty.fromList <$> marker `sepBy1` Char.char ','
+ case cmd of
+ "\\hyperref[" -> Char.string "]{" *> some (satisfy (/= '}')) *> Char.char '}' *> pure (Ref ms)
+ _ -> Char.char '}' *> pure (Ref ms)
+
+marker :: Lexer Text
+marker = takeWhile1P Nothing (\x -> isAsciiLower x || x == '_')
+
+-- | Parses the end of an environment.
+-- Commits only after having seen "\end{".
+end :: Lexer (Located Token)
+end = lexeme do
+ Char.string "\\end{"
+ env <- some (Char.letterChar <|> Char.char '*')
+ Char.char '}'
+ pure (EndEnv (Text.pack env))
+
+-- | Parses an opening delimiter.
+opening :: Lexer (Located Token)
+opening = lexeme (paren <|> brace <|> group <|> bracket)
+ where
+ brace = VisibleBraceL <$ lexeme (Char.string "\\{")
+ group = InvisibleBraceL <$ lexeme (Char.char '{')
+ paren = ParenL <$ lexeme (Char.char '(')
+ bracket = BracketL <$ lexeme (Char.char '[')
+
+-- | Parses a closing delimiter.
+closing :: Lexer (Located Token)
+closing = lexeme (paren <|> brace <|> group <|> bracket)
+ where
+ brace = VisibleBraceR <$ lexeme (Char.string "\\}")
+ group = InvisibleBraceR <$ lexeme (Char.char '}')
+ paren = ParenR <$ lexeme (Char.char ')')
+ bracket = BracketR <$ lexeme (Char.char ']')
+
+-- | Turns a Lexer into one that tracks the source position of the token
+-- and consumes trailing whitespace.
+lexeme :: Lexer a -> Lexer (Located a)
+lexeme p = do
+ start <- getSourcePos
+ t <- p
+ whitespace
+ pure (Located start t)
+
+space :: Lexer ()
+space = void (Char.char ' ' <|> Char.char '\n')
+ <|> void (Char.string "\\ " <|> Char.string "\\\\" <|> Char.string "\\!" <|> Char.string "\\," <|> Char.string "\\:" <|> Char.string "\\;" <|> Char.string "\\;")
+
+whitespace :: Lexer ()
+whitespace = Lexer.space (void (some space)) (Lexer.skipLineComment "%") empty