Initial commit

author: adelon <22380201+adelon@users.noreply.github.com> 2024-02-10 02:22:14 +0100
committer: adelon <22380201+adelon@users.noreply.github.com> 2024-02-10 02:22:14 +0100
commit: 442d732696ad431b84f6e5c72b6ee785be4fd968 (patch)
tree: b476f395e7e91d67bacb6758bc84914b8711593f /source/Syntax
10 files changed, 3236 insertions, 0 deletions
diff --git a/source/Syntax/Abstract.hs b/source/Syntax/Abstract.hs
new file mode 100644
index 0000000..6f30612
--- /dev/null
+++ b/source/Syntax/Abstract.hs
@@ -0,0 +1,468 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE StandaloneDeriving #-}
+
+
+-- | Data types for the abstract syntax tree and helper functions
+-- for constructing the lexicon.
+--
+module Syntax.Abstract
+    ( module Syntax.Abstract
+    , module Syntax.LexicalPhrase
+    , module Syntax.Token
+    ) where
+
+
+import Base
+import Syntax.LexicalPhrase (LexicalPhrase, SgPl(..), unsafeReadPhraseSgPl, unsafeReadPhrase)
+import Syntax.Token (Token(..))
+
+import Text.Earley.Mixfix (Holey)
+import Data.Text qualified as Text
+import Text.Megaparsec.Pos (SourcePos)
+
+-- | Local "variable-like" symbols that can be captured by binders.
+data VarSymbol
+    = NamedVar Text -- ^ A named variable.
+    | FreshVar Int -- ^ A nameless (implicit) variable. Should only come from desugaring.
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+instance IsString VarSymbol where
+    fromString v = NamedVar $ Text.pack v
+
+
+data Expr
+    = ExprVar VarSymbol
+    | ExprInteger Int
+    | ExprOp FunctionSymbol [Expr]
+    | ExprStructOp StructSymbol (Maybe Expr)
+    | ExprFiniteSet (NonEmpty Expr)
+    | ExprSep VarSymbol Expr Stmt
+    -- ^ Of the form /@{x ∈ X | P(x)}@/.
+    | ExprReplace Expr (NonEmpty (VarSymbol,Expr)) (Maybe Stmt)
+    -- ^ E.g.: /@{ f(x, y) | x ∈ X, y ∈ Y | P(x, y) }@/.
+    | ExprReplacePred VarSymbol VarSymbol Expr Stmt
+    -- ^ E.g.: /@{ y | \\exists x\\in X. P(x, y) }@/.
+    deriving (Show, Eq, Ord)
+
+
+type FunctionSymbol = Holey Token
+
+type RelationSymbol = Token
+
+newtype StructSymbol = StructSymbol { unStructSymbol :: Text } deriving newtype (Show, Eq, Ord, Hashable)
+
+-- | The predefined @cons@ function symbol used for desugaring finite set expressions.
+pattern ConsSymbol :: FunctionSymbol
+pattern ConsSymbol = [Just (Command "cons"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR, Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+-- | The predefined @pair@ function symbol used for desugaring tuple notation..
+pattern PairSymbol :: FunctionSymbol
+pattern PairSymbol = [Just (Command "pair"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR, Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+-- | Function application /@f(x)@/ desugars to /@\apply{f}{x}@/.
+pattern ApplySymbol :: FunctionSymbol
+pattern ApplySymbol = [Just (Command "apply"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR, Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+pattern DomSymbol :: FunctionSymbol
+pattern DomSymbol = [Just (Command "dom"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR]
+
+pattern CarrierSymbol :: StructSymbol
+pattern CarrierSymbol = StructSymbol "carrier"
+
+pattern ExprConst :: Token -> Expr
+pattern ExprConst c = ExprOp [Just c] []
+
+pattern ExprApp :: Expr -> Expr -> Expr
+pattern ExprApp e1 e2 = ExprOp ApplySymbol [e1, e2]
+
+pattern ExprPair :: Expr -> Expr -> Expr
+pattern ExprPair e1 e2 = ExprOp PairSymbol [e1, e2]
+
+-- | Tuples are interpreted as nested pairs:
+-- the triple /@(a, b, c)@/ is interpreted as
+-- /@(a, (b, c))@/.
+-- This means that the product operation should also
+-- be right associative, so that /@(a, b, c)@/ can
+-- form elements of /@A\times B\times C@/.
+makeTuple :: NonEmpty Expr -> Expr
+makeTuple = foldr1 ExprPair
+
+
+data Chain
+    = ChainBase (NonEmpty Expr) Sign Relation (NonEmpty Expr)
+    | ChainCons (NonEmpty Expr) Sign Relation Chain
+    deriving (Show, Eq, Ord)
+
+data Relation
+    = RelationSymbol RelationSymbol  -- ^  E.g.: /@x \in X@/
+    | RelationExpr Expr   -- ^  E.g.: /@x \mathrel{R} y@/
+    deriving (Show, Eq, Ord)
+
+data Sign = Positive | Negative deriving (Show, Eq, Ord)
+
+data Formula
+    = FormulaChain Chain
+    | FormulaPredicate PrefixPredicate (NonEmpty Expr)
+    | Connected Connective Formula Formula
+    | FormulaNeg Formula
+    | FormulaQuantified Quantifier (NonEmpty VarSymbol) Bound Formula
+    | PropositionalConstant PropositionalConstant
+    deriving (Show, Eq, Ord)
+
+data PropositionalConstant = IsBottom | IsTop
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+data PrefixPredicate
+    = PrefixPredicate Text Int
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+data Connective
+    = Conjunction
+    | Disjunction
+    | Implication
+    | Equivalence
+    | ExclusiveOr
+    | NegatedDisjunction
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+
+makeConnective :: Holey Token -> [Formula] -> Formula
+makeConnective [Nothing, Just (Command "implies"), Nothing] [f1, f2] = Connected Implication  f1 f2
+makeConnective [Nothing, Just (Command "land"), Nothing] [f1, f2] = Connected Conjunction f1 f2
+makeConnective [Nothing, Just (Command "lor"), Nothing] [f1, f2] = Connected Disjunction f1 f2
+makeConnective [Nothing, Just (Command "iff"), Nothing] [f1, f2] = Connected Equivalence f1 f2
+makeConnective [Just(Command "lnot"), Nothing] [f1] = FormulaNeg f1
+makeConnective pat _ = error ("makeConnective does not handle the following connective correctly: " <> show pat)
+
+
+
+type StructPhrase = SgPl LexicalPhrase
+
+-- | For example 'an integer' would be
+-- > Noun (unsafeReadPhrase "integer[/s]") []
+type Noun = NounOf Term
+data NounOf a
+    = Noun (SgPl LexicalPhrase) [a]
+    deriving (Show, Eq, Ord)
+
+
+
+
+type NounPhrase t = NounPhraseOf t Term
+-- NOTE: 'NounPhraseOf' is only used with arguments of type 'Term',
+-- but keeping the argument parameter @a@ allows the 'Show' and 'Eq'
+-- instances to remain decidable.
+data NounPhraseOf t a
+    = NounPhrase [AdjLOf a] (NounOf a) (t VarSymbol) [AdjROf a] (Maybe Stmt)
+
+instance (Show a, Show (t VarSymbol)) => Show (NounPhraseOf t a) where
+    show (NounPhrase ls n vs rs ms) =
+        "NounPhrase ("
+            <> show ls <> ") ("
+            <> show n <> ") ("
+            <> show vs <> ") ("
+            <> show rs <> ") ("
+            <> show ms <> ")"
+
+instance (Eq a, Eq (t VarSymbol)) => Eq (NounPhraseOf t a) where
+    NounPhrase ls n vs rs ms == NounPhrase ls' n' vs' rs' ms' =
+        ls == ls' && n == n' && vs == vs' && rs == rs' && ms == ms'
+
+-- This is arbitrary and ugly, but it's useful to have a somewhat
+-- usable Ord instance for all raw syntax (e.g. for 'nubOrd').
+instance (Ord a, Ord (t VarSymbol)) => Ord (NounPhraseOf t a) where
+    NounPhrase ls n vs rs ms `compare` NounPhrase ls' n' vs' rs' ms' =
+        case compare n n' of
+            GT -> case compare ls ls' of
+                GT -> case compare rs rs' of
+                    GT -> case compare ms ms' of
+                        GT -> compare vs vs'
+                        ordering -> ordering
+                    ordering -> ordering
+                ordering -> ordering
+            ordering -> ordering
+
+-- | @Nameless a@ is quivalent to @Const () a@ (from "Data.Functor.Const").
+-- It describes a container that is unwilling to actually contain something.
+-- @Nameless@ lets us treat nouns with no names, one name, or many names uniformly.
+-- Thus @NounPhraseOf Nameless a@ is a noun phrase without a name and with arguments
+-- of type @a@.
+data Nameless a = Nameless deriving (Show, Eq, Ord)
+
+
+-- | Left adjectives modify nouns from the left side,
+-- e.g. /@even@/, /@continuous@/, and /@σ-finite@/.
+type AdjL = AdjLOf Term
+data AdjLOf a
+    = AdjL LexicalPhrase [a]
+    deriving (Show, Eq, Ord)
+
+
+-- | Right attributes consist of basic right adjectives, e.g.
+-- /@divisible by ?@/, or /@of finite type@/ and verb phrases
+-- marked with /@that@/, such as /@integer that divides n@/.
+-- In some cases these right attributes may be followed
+-- by an additional such-that phrase.
+type AdjR = AdjROf Term
+data AdjROf a
+    = AdjR LexicalPhrase [a]
+    | AttrRThat VerbPhrase
+    deriving (Show, Eq, Ord)
+
+
+-- | Adjectives for parts of the AST where adjectives are not used
+-- to modify nouns and the L/R distinction does not matter, such as
+-- when then are used together with a copula (like /@n is even@/).
+type Adj = AdjOf Term
+data AdjOf a
+    = Adj LexicalPhrase [a]
+    deriving (Show, Eq, Ord)
+
+
+type Verb = VerbOf Term
+data VerbOf a
+    = Verb (SgPl LexicalPhrase) [a]
+    deriving (Show, Eq, Ord)
+
+
+type Fun = FunOf Term
+data FunOf a
+    = Fun (SgPl LexicalPhrase) [a]
+    deriving (Show, Eq, Ord)
+
+
+type VerbPhrase = VerbPhraseOf Term
+data VerbPhraseOf a
+    = VPVerb (VerbOf a)
+    | VPAdj (NonEmpty (AdjOf a)) -- ^ @x is foo@ / @x is foo and bar@
+    | VPVerbNot (VerbOf a)
+    | VPAdjNot (NonEmpty (AdjOf a)) -- ^ @x is not foo@ / @x is neither foo nor bar@
+    deriving (Show, Eq, Ord)
+
+
+data Quantifier
+    = Universally
+    | Existentially
+    | Nonexistentially
+    deriving (Show, Eq, Ord)
+
+data QuantPhrase = QuantPhrase Quantifier (NounPhrase []) deriving (Show, Eq, Ord)
+
+
+data Term
+    = TermExpr Expr
+    -- ^ A symbolic expression.
+    | TermFun Fun
+    -- ^ Definite noun phrase, e.g. /@the derivative of $f$@/.
+    | TermIota VarSymbol Stmt
+    -- ^ Definite descriptor, e.g. /@an $x$ such that ...@//
+    | TermQuantified Quantifier (NounPhrase Maybe)
+    -- ^ Indefinite quantified notion, e.g. /@every even integer that divides $k$ ...@/.
+    deriving (Show, Eq, Ord)
+
+
+data Stmt
+    = StmtFormula Formula -- ^ E.g.: /@We have \<Formula\>@/.
+    | StmtVerbPhrase (NonEmpty Term) VerbPhrase -- ^ E.g.: /@\<Term\> and \<Term\> \<verb\>@/.
+    | StmtNoun Term (NounPhrase Maybe) -- ^ E.g.: /@\<Term\> is a(n) \<NP\>@/.
+    | StmtStruct Term StructPhrase
+    | StmtNeg Stmt -- ^ E.g.: /@It is not the case that \<Stmt\>@/.
+    | StmtExists (NounPhrase []) -- ^ E.g.: /@There exists a(n) \<NP\>@/.
+    | StmtConnected Connective Stmt Stmt
+    | StmtQuantPhrase QuantPhrase Stmt
+    | SymbolicQuantified Quantifier (NonEmpty VarSymbol) Bound (Maybe Stmt) Stmt
+    deriving (Show, Eq, Ord)
+
+data Bound = Unbounded | Bounded Sign Relation Expr deriving (Show, Eq, Ord)
+
+pattern SymbolicForall :: NonEmpty VarSymbol -> Bound -> Maybe Stmt -> Stmt -> Stmt
+pattern SymbolicForall vs bound suchThat have = SymbolicQuantified Universally vs bound suchThat have
+
+pattern SymbolicExists :: NonEmpty VarSymbol -> Bound -> Stmt -> Stmt
+pattern SymbolicExists vs bound suchThat = SymbolicQuantified Existentially vs bound Nothing suchThat
+
+pattern SymbolicNotExists :: NonEmpty VarSymbol -> Bound -> Stmt -> Stmt
+pattern SymbolicNotExists vs bound suchThat = StmtNeg (SymbolicExists vs bound suchThat)
+
+data Asm
+    = AsmSuppose Stmt
+    | AsmLetNoun (NonEmpty VarSymbol) (NounPhrase Maybe) -- ^ E.g.: /@let k be an integer@/
+    | AsmLetIn (NonEmpty VarSymbol) Expr -- ^ E.g.: /@let $k\in\integers$@/
+    | AsmLetThe VarSymbol Fun -- ^ E.g.: /@let $g$ be the derivative of $f$@/
+    | AsmLetEq VarSymbol Expr -- ^ E.g.: /@let $m = n + k$@/
+    | AsmLetStruct VarSymbol StructPhrase -- ^ E.g.: /@let $A$ be a monoid@/
+    deriving (Show, Eq, Ord)
+
+data Axiom = Axiom [Asm] Stmt
+    deriving (Show, Eq, Ord)
+
+data Lemma = Lemma [Asm] Stmt
+    deriving (Show, Eq, Ord)
+
+-- | The head of the definition describes the part before the /@iff@/,
+-- i.e. the definiendum. An optional noun-phrase corresponds to an optional
+-- type annotation for the 'Term' of the head. The last part of the head
+-- is the lexical phrase that is defined.
+--
+-- > "A natural number   $n$        divides $m$   iff   ..."
+-- >  ^^^^^^^^^^^^^^^^   ^^^        ^^^^^^^^^^^         ^^^
+-- >  type annotation    variable   verb                definiens
+-- >  (a noun phrase)               (all args are vars) (a statement)
+--
+data DefnHead
+    = DefnAdj (Maybe (NounPhrase Maybe)) VarSymbol (AdjOf VarSymbol)
+    | DefnVerb (Maybe (NounPhrase Maybe)) VarSymbol (VerbOf VarSymbol)
+    | DefnNoun VarSymbol (NounOf VarSymbol)
+    | DefnSymbolicPredicate PrefixPredicate (NonEmpty VarSymbol)
+    | DefnRel VarSymbol RelationSymbol VarSymbol
+    -- ^ E.g.: /@$x \subseteq y$ iff [...@/
+    deriving (Show, Eq, Ord)
+
+data Defn
+    = Defn [Asm] DefnHead Stmt
+    | DefnFun [Asm] (FunOf VarSymbol) (Maybe Term) Term
+    -- ^ A 'DefnFun' consists of the functional noun (which must start with /@the@/)
+    -- and an optional specification of a symbolic equivalent. The symbolic equivalent
+    -- does not need to have the same variables as the full functional noun pattern.
+    --
+    -- > "The tensor product of $U$ and $V$ over $K$, $U\tensor V$, is ..."
+    -- >  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^  ^^^^^^^^^^^^     ^^^
+    -- >  definiendum                                 symbolic eqv.    definiens
+    -- >  (a functional noun)                         (an exression)   (a term)
+    --
+    | DefnOp SymbolPattern Expr
+    deriving (Show, Eq, Ord)
+
+data Proof
+    = Omitted
+    | Qed Justification
+    -- ^ Ends of a proof, leaving automation to discharge the current goal using the given justification.
+    | ByCase [Case]
+    | ByContradiction Proof
+    | BySetInduction (Maybe Term) Proof
+    -- ^ ∈-induction.
+    | ByOrdInduction Proof
+    -- ^ Transfinite induction for ordinals.
+    | Assume Stmt Proof
+    | FixSymbolic (NonEmpty VarSymbol) Bound Proof
+    | FixSuchThat (NonEmpty VarSymbol) Stmt Proof
+    | Calc Calc Proof
+    -- ^ Simplify goals that are implications or disjunctions.
+    | TakeVar (NonEmpty VarSymbol) Bound Stmt Justification Proof
+    | TakeNoun (NounPhrase []) Justification Proof
+    | Have (Maybe Stmt) Stmt Justification Proof
+    -- ^ /@Since \<stmt\>, we have \<stmt\> by \<ref\>.@/
+    | Suffices Stmt Justification Proof
+    -- ^ /@It suffices to show that [...]. [...]@/
+    | Subclaim Stmt Proof Proof
+    -- ^ A claim is a sublemma with its own proof:
+    --  /@Show \<goal stmt\>. \<steps\>. \<continue other proof\>.@/
+    | Define VarSymbol Expr Proof
+    -- ^ Local definition.
+    --
+    | DefineFunction VarSymbol VarSymbol Expr VarSymbol Expr Proof
+    -- ^ Local function definition, e.g. /@Let $f(x) = e$ for $x\\in d$@/.
+    -- The first 'VarSymbol' is the newly defined symbol, the second one is the argument.
+    -- The first 'Expr' is the value, the final variable and expr specify a bound (the domain of the function).
+    deriving (Show, Eq, Ord)
+
+-- | An inline justification.
+data Justification
+    = JustificationRef (NonEmpty Marker)
+    | JustificationSetExt
+    | JustificationEmpty
+    | JustificationLocal -- ^ Use only local assumptions
+    deriving (Show, Eq, Ord)
+
+
+-- | A case of a case split.
+data Case = Case
+    { caseOf :: Stmt
+    , caseProof :: Proof
+    } deriving (Show, Eq, Ord)
+
+data Calc
+    = Equation Expr (NonEmpty (Expr, Justification))
+    -- ^ A chain of equalities. Each claimed equality has a (potentially empty) justification.
+    -- For example: @a &= b \\explanation{by \\cref{a_eq_b}} &= c@
+    -- would be (modulo expr constructors)
+    -- @Equation "a" [("b", JustificationRef "a_eq_b"), ("c", JustificationEmpty)]@.
+    | Biconditionals Formula (NonEmpty (Formula, Justification))
+    deriving (Show, Eq, Ord)
+
+
+data Abbreviation
+    = AbbreviationAdj VarSymbol (AdjOf VarSymbol) Stmt
+    | AbbreviationVerb VarSymbol (VerbOf VarSymbol) Stmt
+    | AbbreviationNoun VarSymbol (NounOf VarSymbol) Stmt
+    | AbbreviationRel VarSymbol RelationSymbol VarSymbol Stmt
+    | AbbreviationFun (FunOf VarSymbol) Term
+    | AbbreviationEq SymbolPattern Expr
+    deriving (Show, Eq, Ord)
+
+data Datatype
+    = DatatypeFin (NounOf Term) (NonEmpty Text)
+    deriving (Show, Eq, Ord)
+
+data Inductive = Inductive
+    { inductiveSymbolPattern :: SymbolPattern
+    , inductiveDomain :: Expr
+    , inductiveIntros :: NonEmpty IntroRule
+    }
+    deriving (Show, Eq, Ord)
+
+data IntroRule = IntroRule
+    { introConditions :: [Formula] -- The inductively defined set may only appear as an argument of monotone operations on the rhs.
+    , introResult :: Formula -- TODO Refine.
+    }
+    deriving (Show, Eq, Ord)
+
+
+data SymbolPattern = SymbolPattern FunctionSymbol [VarSymbol]
+    deriving (Show, Eq, Ord)
+
+data Signature
+    = SignatureAdj  VarSymbol (AdjOf  VarSymbol)
+    | SignatureVerb VarSymbol (VerbOf VarSymbol)
+    | SignatureNoun VarSymbol (NounOf VarSymbol)
+    | SignatureSymbolic SymbolPattern (NounPhrase Maybe)
+    -- ^ /@$\<symbol\>(\<vars\>)$ is a \<noun\>@/
+    deriving (Show, Eq, Ord)
+
+
+data StructDefn = StructDefn
+    { structPhrase :: StructPhrase
+    -- ^ E.g.: @partial order@ or @abelian group@.\
+    , structParents :: [StructPhrase]
+    -- ^ Structural parents
+    , structLabel :: VarSymbol
+    , structFixes :: [StructSymbol]
+    -- ^ List of text for commands representing constants not inherited from its parents,
+    -- e.g.: @\sqsubseteq@ or @\inv@.
+    , structAssumes :: [(Marker, Stmt)]
+    }
+    deriving (Show, Eq, Ord)
+
+newtype Marker = Marker Text deriving (Show, Eq, Ord, Generic)
+
+deriving newtype instance Hashable Marker
+
+instance IsString Marker where
+    fromString str = Marker (Text.pack str)
+
+data Block
+    = BlockAxiom SourcePos Marker Axiom
+    | BlockLemma SourcePos Marker Lemma
+    | BlockProof SourcePos Proof
+    | BlockDefn SourcePos Marker Defn
+    | BlockAbbr SourcePos Marker Abbreviation
+    | BlockData SourcePos Datatype
+    | BlockInductive SourcePos Marker Inductive
+    | BlockSig SourcePos [Asm] Signature
+    | BlockStruct SourcePos Marker StructDefn
+    deriving (Show, Eq, Ord)
diff --git a/source/Syntax/Adapt.hs b/source/Syntax/Adapt.hs
new file mode 100644
index 0000000..0896c08
--- /dev/null
+++ b/source/Syntax/Adapt.hs
@@ -0,0 +1,382 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE ApplicativeDo #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Syntax.Adapt where
+
+import Base
+import Syntax.Abstract
+import Syntax.Lexicon
+import Syntax.Token
+
+import Data.Map.Strict qualified as Map
+import Data.Maybe (catMaybes)
+import Data.Set qualified as Set
+import Data.Sequence qualified as Seq
+import Data.HashSet qualified as HS
+import Data.HashMap.Strict qualified as HM
+import Data.Text qualified as Text
+import Text.Earley.Mixfix (Associativity(..))
+import Text.Regex.Applicative
+import Text.Megaparsec.Pos
+
+
+scanChunk :: [Located Token] -> [ScannedLexicalItem]
+scanChunk ltoks =
+    let toks = unLocated <$> ltoks
+        matchOrErr re env pos = match re toks ?? error ("could not find lexical pattern in " <> env <> " at " <> sourcePosPretty pos)
+    in case ltoks of
+        Located pos (BeginEnv "definition") : _ -> matchOrErr definition "definition" pos
+        Located pos (BeginEnv "abbreviation") : _ -> matchOrErr abbreviation "abbreviation" pos
+        Located pos (BeginEnv "struct") :_ -> matchOrErr struct "struct definition" pos
+        Located pos (BeginEnv "inductive") :_ -> matchOrErr inductive "inductive definition" pos
+        _ -> []
+
+adaptChunks :: [[Located Token]] -> Lexicon -> Lexicon
+adaptChunks = extendLexicon . concatMap scanChunk
+
+data ScannedLexicalItem
+    = ScanAdj LexicalPhrase Marker
+    | ScanFun LexicalPhrase Marker
+    | ScanNoun LexicalPhrase Marker
+    | ScanStructNoun LexicalPhrase Marker
+    | ScanVerb LexicalPhrase Marker
+    | ScanRelationSymbol RelationSymbol Marker
+    | ScanFunctionSymbol FunctionSymbol Marker
+    | ScanPrefixPredicate PrefixPredicate Marker
+    | ScanStructOp Text -- we an use the command text as export name.
+    deriving (Show, Eq)
+
+skipUntilNextLexicalEnv :: RE Token [Token]
+skipUntilNextLexicalEnv = many (psym otherToken)
+    where
+        otherToken tok = tok /= BeginEnv "definition" && tok /= BeginEnv "struct" && tok /= BeginEnv "abbreviation"
+
+notEndOfLexicalEnvToken :: RE Token Token
+notEndOfLexicalEnvToken = psym innerToken
+    where
+        innerToken tok = tok /= EndEnv "definition" && tok /= EndEnv "struct" && tok /= EndEnv "abbreviation"
+
+definition ::  RE Token [ScannedLexicalItem]
+definition = do
+    sym (BeginEnv "definition")
+    few notEndOfLexicalEnvToken
+    m <- label
+    few anySym
+    lexicalItem <- head
+    few anySym
+    sym (EndEnv "definition")
+    skipUntilNextLexicalEnv
+    pure [lexicalItem m]
+
+abbreviation ::  RE Token [ScannedLexicalItem]
+abbreviation = do
+    sym (BeginEnv "abbreviation")
+    few anySym
+    m <- label
+    few anySym
+    lexicalItem <- head
+    few anySym
+    sym (EndEnv "abbreviation")
+    skipUntilNextLexicalEnv
+    pure [lexicalItem m]
+
+label :: RE Token Marker
+label = msym \case
+    Label m -> Just (Marker m)
+    _ -> Nothing
+
+-- | 'RE' that matches the head of a definition.
+head :: RE Token (Marker -> ScannedLexicalItem)
+-- Note that @<|>@ is left biased for 'RE', so we can just
+-- place 'adj' before 'verb' and do not have to worry about
+-- overlapping patterns.
+head = ScanNoun <$> noun
+    <|> ScanAdj <$> adj
+    <|> ScanVerb <$> verb
+    <|> ScanFun <$> fun
+    <|> ScanRelationSymbol <$> relationSymbol
+    <|> ScanFunctionSymbol <$> functionSymbol
+    <|> ScanPrefixPredicate <$> prefixPredicate
+
+inductive :: RE Token [ScannedLexicalItem]
+inductive = do
+    sym (BeginEnv "inductive")
+    few notEndOfLexicalEnvToken
+    m <- label
+    few anySym
+    lexicalItem <- functionSymbolInductive
+    few anySym
+    sym (EndEnv "inductive")
+    skipUntilNextLexicalEnv
+    pure [ScanFunctionSymbol lexicalItem m]
+
+struct :: RE Token [ScannedLexicalItem]
+struct = do
+    sym (BeginEnv "struct")
+    few anySym
+    m <- label
+    few anySym
+    lexicalItem <- ScanStructNoun . toLexicalPhrase <$> (an *> structPat <* var)
+    few anySym
+    lexicalItems <- structOps <|> pure []
+    sym (EndEnv "struct")
+    skipUntilNextLexicalEnv
+    pure (lexicalItem m : lexicalItems)
+
+structOps :: RE Token [ScannedLexicalItem]
+structOps = do
+    sym (BeginEnv "enumerate")
+    lexicalItems <- many structOp
+    sym (EndEnv "enumerate")
+    few anySym
+    pure lexicalItems
+
+structOp :: RE Token ScannedLexicalItem
+structOp = do
+    sym (Command "item")
+    op <- math command
+    pure (ScanStructOp op)
+
+noun :: RE Token LexicalPhrase
+noun = toLexicalPhrase <$> (var *> is *> an *> pat <* iff)
+
+adj :: RE Token LexicalPhrase
+adj = toLexicalPhrase <$> (var *> is *> pat <* iff)
+
+verb :: RE Token LexicalPhrase
+verb = toLexicalPhrase <$> (var *> pat <* iff)
+
+fun :: RE Token LexicalPhrase
+fun = toLexicalPhrase <$> (the *> pat <* (is <|> comma))
+
+relationSymbol :: RE Token RelationSymbol
+relationSymbol = math relator' <* iff
+    where
+        relator' = do
+            varSymbol
+            rel <- symbol
+            varSymbol
+            pure rel
+
+functionSymbol :: RE Token FunctionSymbol
+functionSymbol = do
+    sym (BeginEnv "math")
+    toks <- few nonDefinitionKeyword
+    sym (Symbol "=")
+    pure (fromToken <$> toks)
+    where
+        fromToken = \case
+            Variable _ -> Nothing -- Variables become slots.
+            tok -> Just tok -- Everything else is part of the pattern.
+
+functionSymbolInductive :: RE Token FunctionSymbol
+functionSymbolInductive = do
+    sym (BeginEnv "math")
+    toks <- few nonDefinitionKeyword
+    sym (Command "subseteq")
+    pure (fromToken <$> toks)
+    where
+        fromToken = \case
+            Variable _ -> Nothing -- Variables become slots.
+            tok -> Just tok -- Everything else is part of the pattern.
+
+prefixPredicate :: RE Token PrefixPredicate
+prefixPredicate = math prfx <* iff
+    where
+        prfx = do
+            r <- command
+            args <- many (sym InvisibleBraceL *> varSymbol <* sym InvisibleBraceR)
+            pure (PrefixPredicate r (length args))
+
+
+command :: RE Token Text
+command = msym \case
+    Command cmd -> Just cmd
+    _ -> Nothing
+
+var :: RE Token Token
+var = math varSymbol
+
+varSymbol :: RE Token Token
+varSymbol = psym isVar
+
+
+nonDefinitionKeyword :: RE Token Token
+nonDefinitionKeyword = psym (`notElem` keywords)
+    where
+        keywords =
+            [ Word "if"
+            , Word "iff"
+            , Symbol "="
+            , Command "iff"
+            ]
+
+
+pat :: RE Token [Token]
+pat = many (psym isLexicalPhraseToken <|> var)
+
+structPat :: RE Token [Token]
+structPat = many (psym isLexicalPhraseToken)
+
+math :: RE Token a -> RE Token a
+math re = sym (BeginEnv "math") *> re <* sym (EndEnv "math")
+
+-- | We allow /conditional perfection/: the first /@if@/ in a definition is interpreted as /@iff@/.
+iff :: RE Token ()
+iff = void (sym (Word "if")) -- Using @void@ is faster (only requires recognition).
+    <|> void (sym (Word "iff"))
+    <|> void (string [Word "if", Word "and", Word "only", Word "if"])
+    <|> void (sym (Word "denote"))
+    <|> void (sym (Word "stand") *> sym (Word "for"))
+{-# INLINE iff #-}
+
+an :: RE Token ()
+an = void (sym (Word "a"))
+    <|> void (sym (Word "an"))
+{-# INLINE an #-}
+
+is :: RE Token ()
+is = void (sym (Word "is") <|> sym (Word "denotes"))
+{-# INLINE is #-}
+
+the :: RE Token ()
+the = void (sym (Word "the"))
+{-# INLINE the #-}
+
+comma :: RE Token ()
+comma = void (sym (Symbol ","))
+{-# INLINE comma #-}
+
+
+isVar :: Token -> Bool
+isVar = \case
+    Variable _ -> True
+    _token -> False
+
+isCommand :: Token -> Bool
+isCommand = \case
+    Command _ -> True
+    _token -> False
+
+isNotDefnToken :: Token -> Bool
+isNotDefnToken = \case
+    BeginEnv "definition" -> False
+    EndEnv "definition" -> False
+    _token -> True
+
+isLexicalPhraseToken :: Token -> Bool
+isLexicalPhraseToken = \case
+    Word w -> w `Set.notMember` keywords
+    --
+    -- Simple commands (outside of math-mode) are allowed. This is useful
+    -- for defining lexical phrases containing symbolic expressions such as
+    -- `X is \Ttwo{}`, where `\Ttwo` is a macro that expands to `T_2`.
+    -- We also allow these macros to take arguments, hence the need to
+    -- allow grouping delimiters. They can also be used to escape the end
+    -- of the command for correct spacing, as in the above example.
+    --
+    Command _cmd -> True
+    InvisibleBraceL -> True
+    InvisibleBraceR -> True
+    --
+    -- No other tokens may occur in lexical phrases. In particular, no `_dot`
+    -- token may occur, limiting the lexical phrase to a single sentence.
+    -- Commas occurring in variable lists should be placed
+    -- within the math environment. Thus `$a,b$ are coprime iff`,
+    -- not `$a$,`$b$` are coprime iff`.
+    --
+    _token -> False
+    where
+        keywords = Set.fromList ["a", "an", "is", "are", "if", "iff", "denote", "stand", "let"]
+
+
+toLexicalPhrase :: [Token] -> LexicalPhrase
+toLexicalPhrase toks = component <$> toks
+    where
+        component = \case
+            Variable _ -> Nothing
+            tok        -> Just tok
+
+
+symbol :: RE Token Token
+symbol = msym $ \tok -> case tok of
+    Command _ -> Just tok
+    Symbol _ -> Just tok
+    _tok -> Nothing
+
+
+-- | Basic paradigms for pluralizations of nominals.
+guessNounPlural :: LexicalPhrase -> SgPl LexicalPhrase
+guessNounPlural item = SgPl item (pluralize item)
+    where
+        pluralize :: LexicalPhrase -> LexicalPhrase
+        pluralize = \case
+            Just (Word w) : pat'@(Just w' : _) | isPreposition w' -> Just (Word (Text.snoc w 's')) : pat'
+            [Just (Word w)] -> [Just (Word (Text.snoc w 's'))]
+            [tok, Just (Word w)] -> [tok, Just (Word (Text.snoc w 's'))]
+            [tok, tok', Just (Word w)] -> [tok, tok', Just (Word (Text.snoc w 's'))]
+            pat' -> pat'
+
+guessVerbPlural :: LexicalPhrase -> SgPl LexicalPhrase
+guessVerbPlural item = SgPl item itemPl
+    where
+        itemPl = case item of
+            Just (Word v) : rest -> case Text.unsnoc v of
+                Just (v', 's') -> Just (Word v') : rest
+                _ -> item
+            _ -> item
+
+isAdjR :: LexicalPhrase -> Bool
+isAdjR item = containsPreposition item || containsSlot item
+    where
+        containsPreposition, containsSlot :: LexicalPhrase -> Bool
+        containsPreposition = any isPreposition . catMaybes
+        containsSlot = (Nothing `elem`)
+
+isPreposition :: Token -> Bool
+isPreposition w = HS.member w (HS.map Word prepositions)
+
+-- | Right-biased set insertion, keeping the original set
+-- when inserting already present elements. @insertR@ is unfortunately
+-- a hidden function, even in @Data.Set.Internal@, so we approximate it
+-- here. In theory one could avoid the indirection of first forming the singleton
+-- set on the rhs.
+insertR' :: Hashable a => a -> HashSet a -> HashSet a
+insertR' x xs = xs `HS.union` HS.singleton x -- @union@ is left-biased.
+
+insertMapR :: Ord k => k -> a -> Map k a -> Map k a
+insertMapR k x xs = xs `Map.union` Map.singleton k x -- @union@ is left-biased.
+
+
+insertR :: Hashable k => k -> a -> HashMap k a -> HashMap k a
+insertR k x xs = xs `HM.union` HM.singleton k x -- @union@ is left-biased.
+
+-- | Takes the scanned lexical phrases and inserts them in the correct
+-- places in a lexicon.
+extendLexicon :: [ScannedLexicalItem] -> Lexicon -> Lexicon
+extendLexicon [] lexicon = lexicon
+-- Note that we only consider 'sg' in the 'Ord' instance of SgPl, so that
+-- known irregular plurals are preserved.
+extendLexicon (scan : scans) lexicon@Lexicon{..} = case scan of
+    ScanAdj item m -> if isAdjR item
+        then extendLexicon scans lexicon{lexiconAdjRs = insertR item m lexiconAdjRs}
+        else extendLexicon scans lexicon{lexiconAdjLs = insertR item m lexiconAdjLs}
+    ScanFun item m ->
+        extendLexicon scans lexicon{lexiconFuns = insertR (guessNounPlural item) m lexiconFuns}
+    ScanVerb item m ->
+        extendLexicon scans lexicon{lexiconVerbs = insertR (guessVerbPlural item) m lexiconVerbs}
+    ScanNoun item m ->
+        extendLexicon scans lexicon{lexiconNouns = insertR (guessNounPlural item) m lexiconNouns}
+    ScanStructNoun item m ->
+        extendLexicon scans lexicon{lexiconStructNouns = insertR (guessNounPlural item) m lexiconStructNouns}
+    ScanRelationSymbol item m ->
+        extendLexicon scans lexicon{lexiconRelationSymbols = insertR item m lexiconRelationSymbols}
+    ScanFunctionSymbol item m ->
+        if any (item `HM.member`) lexiconMixfix
+            then extendLexicon scans lexicon
+            else extendLexicon scans lexicon{lexiconMixfix = Seq.adjust (HM.insert item (NonAssoc, m)) 9 lexiconMixfix}
+    ScanStructOp op ->
+        extendLexicon scans lexicon{lexiconStructFun = insertR (StructSymbol op) (Marker op) lexiconStructFun}
+    ScanPrefixPredicate tok m ->
+        extendLexicon scans lexicon{lexiconPrefixPredicates = insertR tok m lexiconPrefixPredicates}
diff --git a/source/Syntax/Chunk.hs b/source/Syntax/Chunk.hs
new file mode 100644
index 0000000..604ccf6
--- /dev/null
+++ b/source/Syntax/Chunk.hs
@@ -0,0 +1,46 @@
+module Syntax.Chunk where
+
+import Base
+import Syntax.Token
+
+import Data.List qualified as List
+
+
+-- LATER This is just a naïve implementation of token chunks.
+-- It and  the current tokenizer should probably be replaced by
+-- a more efficient implementation that does chunking
+-- directly while tokenizing.
+
+chunkify :: [Located Token] -> [[Located Token]]
+chunkify [] = []
+chunkify (tok : toks) = case unLocated tok of
+    BeginEnv env | isTopLevelEnv env ->
+        let (axiomToks, otherToks) = envBlock env toks
+        in  (tok : axiomToks) : chunkify otherToks
+    --
+    -- Skip all tokens outside of toplevel environments.
+    _ -> chunkify toks
+
+isTopLevelEnv :: Text -> Bool
+isTopLevelEnv env = env `elem`
+    [ "abbreviation"
+    , "axiom"
+    , "claim"
+    , "corollary"
+    , "datatype"
+    , "definition"
+    , "inductive"
+    , "lemma"
+    , "proof"
+    , "proposition"
+    , "signature"
+    , "struct"
+    , "theorem"
+    ]
+
+envBlock :: Text -> [Located Token] -> ([Located Token], [Located Token])
+envBlock env toks =
+    let (pre, post) = List.span (\tok -> unLocated tok /= EndEnv env) toks
+    in case post of
+        [] -> (pre, post)
+        endEnv : post' -> (pre ++ [endEnv], post')
diff --git a/source/Syntax/Concrete.hs b/source/Syntax/Concrete.hs
new file mode 100644
index 0000000..8c6962d
--- /dev/null
+++ b/source/Syntax/Concrete.hs
@@ -0,0 +1,657 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE RecursiveDo #-}
+
+-- | Concrete syntax of the surface language.
+module Syntax.Concrete where
+
+import Base
+import Syntax.Abstract
+import Syntax.Concrete.Keywords
+import Syntax.Lexicon (Lexicon(..), lexiconAdjs, splitOnVariableSlot)
+import Syntax.Token
+
+import Data.HashSet qualified as HS
+import Data.List.NonEmpty qualified as NonEmpty
+import Data.HashMap.Strict qualified as HM
+import Text.Earley (Grammar, Prod, (<?>), rule, satisfy, terminal)
+import Text.Earley.Mixfix
+import Text.Megaparsec.Pos (SourcePos)
+
+
+grammar :: Lexicon -> Grammar r (Prod r Text (Located Token) Block)
+grammar lexicon@Lexicon{..} = mdo
+    let makeOp :: ([Maybe Token], Associativity) -> ([Maybe (Prod r Text (Located Token) Token)], Associativity)
+        makeOp (pat, assoc) = (map (fmap token) pat, assoc)
+        ops = map (map makeOp) (toList (HM.toList . HM.map fst <$> lexiconMixfix))
+        conns = map (map makeOp) lexiconConnectives
+
+    integer    <- rule (terminal maybeIntToken <?> "integer")
+    relator    <- rule $ unLocated <$> (satisfy (\ltok -> unLocated ltok `HM.member` lexiconRelationSymbols) <?> "relator")
+    varSymbol  <- rule (terminal maybeVarToken <?> "variable")
+    varSymbols <- rule (commaList varSymbol)
+    cmd        <- rule (terminal maybeCmdToken <?> "TEX command")
+--
+-- Formulas have three levels:
+--
+-- + Expressions: atoms or operators applied to atoms.
+-- + Chains:      comma-lists of expressions, separated by relators.
+-- + Formulas:    chains or connectives applied to chains.
+--
+-- For example, the formula @x, y < z \implies x, y < z + 1@ consist of the
+-- connective @\implies@ applied to two chains @x, y < z@ and @x, y < z + 1@.
+-- In turn, the chain @x, y < z + 1@ consist of three expressions,
+-- @x@, @y@, and @z + 1@. Finally, @z + 1@ consist the operator @+@
+-- applied to two atoms, the variable @z@ and the number literal @1@.
+--
+-- This split is due to the different behaviour of relators compared to
+-- operators and connectives. Relators can chain (@x < y < z@) and allow
+-- lists as arguments, as in the above example. Operators and connectives
+-- instead have precedence and fixity. The only syntactic difference between
+-- an operator and a connective is the relative precedence compared to relators.
+--
+    replaceBound  <- rule $ (,) <$> varSymbol <* _in <*> expr
+    replaceBounds <- rule $ commaList replaceBound
+    replaceFun    <- rule $ ExprReplace <$> expr <* _pipe <*> replaceBounds <*> optional (_pipe *> comprStmt)
+    comprStmt   <- rule $ (StmtFormula <$> formula) <|> text stmt
+
+    replacePredSymbolic <- rule $ ExprReplacePred <$> varSymbol <* _pipe <*> (command "exists" *> varSymbol) <* _in <*> expr <* _dot <*> (StmtFormula <$> formula)
+    replacePredText     <- rule $ ExprReplacePred <$> varSymbol <* _pipe <*> (begin "text" *> _exists *> beginMath *> varSymbol <* _in) <*> expr <* endMath <* _suchThat <*> stmt <* end "text"
+    replacePred         <- rule $ replacePredSymbolic <|> replacePredText
+
+    let exprStructOpOf ann = HS.foldr alg empty (HM.keysSet lexiconStructFun)
+            where
+                alg s prod = prod <|> (ExprStructOp <$> structSymbol s <*> ann)
+
+    exprStructOp <- rule (exprStructOpOf (optional (bracket expr)))
+
+    let bracedArgs1 ar arg = count1 ar $ group arg
+    let prefixPredicateOf f arg symb@(PrefixPredicate c ar) = f <$> pure symb <* command c <*> bracedArgs1 ar arg
+
+
+    exprParen   <- rule $ paren expr
+    exprInteger <- rule $ ExprInteger <$> integer
+    exprVar     <- rule $ ExprVar <$> varSymbol
+    exprTuple   <- rule $ makeTuple <$> paren (commaList2 expr)
+    exprSep     <- rule $ brace $ ExprSep <$> varSymbol <* _in <*> expr <* _pipe <*> comprStmt
+    exprReplace <- rule $ brace $ (replaceFun <|> replacePred)
+    exprFinSet  <- rule $ brace $ ExprFiniteSet <$> exprs
+    exprBase    <- rule $ asum [exprVar, exprInteger, exprStructOp, exprParen, exprTuple, exprSep, exprReplace, exprFinSet]
+    exprApp     <- rule $ ExprApp <$> exprBase <*> (paren expr <|> exprTuple)
+    expr        <- mixfixExpression ops (exprBase <|> exprApp) ExprOp
+    exprs       <- rule $ commaList expr
+
+    relationSign   <- rule $ pure Positive <|> (Negative <$ command "not")
+    relationExpr   <- rule $ RelationExpr <$> (command "mathrel" *> group expr)
+    relation       <- rule $ (RelationSymbol <$> relator) <|> relationExpr
+    chainBase      <- rule $ ChainBase <$> exprs <*> relationSign <*> relation <*> exprs
+    chainCons      <- rule $ ChainCons <$> exprs <*> relationSign <*> relation <*> chain
+    chain          <- rule $ chainCons <|> chainBase
+
+    formulaPredicate  <- rule $ asum $ prefixPredicateOf FormulaPredicate expr <$> HM.keys lexiconPrefixPredicates
+    formulaChain      <- rule $ FormulaChain <$> chain
+    formulaBottom     <- rule $ PropositionalConstant IsBottom <$ command "bot" <?> "\"\\bot\""
+    formulaTop        <- rule $ PropositionalConstant IsTop    <$ command "top" <?> "\"\\top\""
+    formulaExists     <- rule $ FormulaQuantified Existentially <$> (command "exists" *> varSymbols) <*> maybeBounded <* _dot <*> formula
+    formulaAll        <- rule $ FormulaQuantified Universally  <$> (command "forall" *> varSymbols) <*> maybeBounded <* _dot <*> formula
+    formulaQuantified <- rule $ formulaExists <|> formulaAll
+    formulaBase       <- rule $ asum [formulaChain, formulaPredicate, formulaBottom, formulaTop, paren formula]
+    formulaConn       <- mixfixExpression conns formulaBase makeConnective
+    formula           <- rule $ formulaQuantified <|> formulaConn
+
+-- These are asymmetric formulas (only variables are allowed on one side).
+-- They express judgements.
+--
+    assignment  <- rule $ (,) <$> varSymbol <* (_eq <|> _defeq) <*> expr
+    typing      <- rule $ (,) <$> varSymbols <* (_in <|> _colon) <*> expr
+
+    adjL     <- rule $ adjLOf lexicon term
+    adjR     <- rule $ adjROf lexicon term
+    adj      <- rule $ adjOf lexicon term
+    adjVar   <- rule $ adjOf lexicon var
+
+    var  <- rule $ math varSymbol
+    vars <- rule $ math varSymbols
+
+    verb     <- rule $ verbOf lexicon sg term
+    verbPl   <- rule $ verbOf lexicon pl term
+    verbVar  <- rule $ verbOf lexicon sg var
+
+    noun      <- rule $ nounOf lexicon sg term nounName -- Noun with optional variable name.
+    nounList  <- rule $ nounOf lexicon sg term nounNames -- Noun with a list of names.
+    nounVar   <- rule $ fst <$> nounOf lexicon sg var (pure Nameless) -- No names in defined nouns.
+    nounPl    <- rule $ nounOf lexicon pl term nounNames
+    nounPlMay <- rule $ nounOf lexicon pl term nounName
+
+    structNoun <- rule $ structNounOf lexicon sg var var
+    structNounNameless <- rule $ fst <$> structNounOf lexicon sg var (pure Nameless)
+
+
+    fun      <- rule $ funOf lexicon sg term
+    funVar   <- rule $ funOf lexicon sg var
+
+    attrRThat   <- rule $ AttrRThat <$> thatVerbPhrase
+    attrRThats  <- rule $ ((:[]) <$> attrRThat) <|> ((\a a' -> [a,a']) <$> attrRThat <* _and <*> attrRThat) <|> pure []
+    attrRs      <- rule $ ((:[]) <$> adjR)      <|> ((\a a' -> [a,a']) <$> adjR <* _and <*> adjR)           <|> pure []
+    attrRight   <- rule $ (<>) <$> attrRs <*> attrRThats
+
+    verbPhraseVerbSg    <- rule $ VPVerb <$> verb
+    verbPhraseVerbNotSg <- rule $ VPVerbNot <$> (_does *> _not *> verbPl)
+    verbPhraseAdjSg     <- rule $ VPAdj . (:|[]) <$> (_is *> adj)
+    verbPhraseAdjAnd    <- rule do {_is; a1 <- adj; _and; a2 <- adj; pure (VPAdj (a1 :| [a2]))}
+    verbPhraseAdjNotSg  <- rule $ VPAdjNot . (:|[]) <$> (_is *> _not *> adj)
+    verbPhraseNotSg     <- rule $ verbPhraseVerbNotSg <|> verbPhraseAdjNotSg
+    verbPhraseSg        <- rule $ verbPhraseVerbSg <|> verbPhraseAdjSg <|> verbPhraseAdjAnd <|> verbPhraseNotSg
+
+    -- LATER can cause technical ambiguities? verbPhraseVerbPl    <- rule $ VPVerb <$> verbPl
+    verbPhraseVerbNotPl <- rule $ VPVerbNot <$> (_do *> _not *> verbPl)
+    verbPhraseAdjPl     <- rule $ VPAdj . (:|[]) <$> (_are *> adj)
+    verbPhraseAdjNotPl  <- rule $ VPAdjNot . (:|[]) <$> (_are *> _not *> adj)
+    verbPhraseNotPl     <- rule $ verbPhraseVerbNotPl <|> verbPhraseAdjNotPl
+    verbPhrasePl        <- rule $ verbPhraseAdjPl <|> verbPhraseNotPl -- LATER <|> verbPhraseVerbPl
+
+
+
+    thatVerbPhrase    <- rule $ _that *> verbPhraseSg
+
+    nounName        <- rule $ optional (math varSymbol)
+    nounNames       <- rule $ math (commaList_ varSymbol) <|> pure []
+    nounPhrase      <- rule $ makeNounPhrase <$> many adjL <*> noun  <*> attrRight <*> optional suchStmt
+    nounPhrase'     <- rule $ makeNounPhrase <$> many adjL <*> nounList <*> attrRight <*> optional suchStmt
+    nounPhrasePl    <- rule $ makeNounPhrase <$> many adjL <*> nounPl <*> attrRight <*> optional suchStmt
+    nounPhrasePlMay <- rule $ makeNounPhrase <$> many adjL <*> nounPlMay <*> attrRight <*> optional suchStmt
+    nounPhraseMay   <- rule $ makeNounPhrase <$> many adjL <*> noun <*> attrRight <*> optional suchStmt
+
+    -- Quantification phrases for quantification and indfinite terms.
+    quantAll   <- rule $ QuantPhrase Universally <$> (_forEvery *> nounPhrase' <|> _forAll *> nounPhrasePl)
+    quantSome  <- rule $ QuantPhrase Existentially <$> (_some *> (nounPhrase' <|> nounPhrasePl))
+    quantNone  <- rule $ QuantPhrase Nonexistentially <$> (_no *> (nounPhrase' <|> nounPhrasePl))
+    quant      <- rule $ quantAll <|> quantSome <|> quantNone -- <|> quantUniq
+
+
+    termExpr       <- rule $ TermExpr <$> math expr
+    termFun        <- rule $ TermFun <$> (optional _the *> fun)
+    termIota       <- rule $ TermIota <$> (_the *> var) <* _suchThat <*> stmt
+    termAll        <- rule $ TermQuantified Universally <$> (_every *> nounPhraseMay)
+    termSome       <- rule $ TermQuantified Existentially <$> (_some *> nounPhraseMay)
+    termNo         <- rule $ TermQuantified Nonexistentially <$> (_no *> nounPhraseMay)
+    termQuantified <- rule $ termAll <|> termSome <|> termNo
+    term           <- rule $ termExpr <|> termFun <|> termQuantified <|> termIota
+
+-- Basic statements @stmt'@ are statements without any conjunctions or quantifiers.
+--
+    stmtVerbSg    <- rule $ StmtVerbPhrase <$> ((:| []) <$> term) <*> verbPhraseSg
+    stmtVerbPl    <-rule $ StmtVerbPhrase <$> andList1 term <*> verbPhrasePl
+    stmtVerb      <- rule $ stmtVerbSg <|> stmtVerbPl
+    stmtNounIs    <- rule $ StmtNoun <$> term <* _is <* _an <*> nounPhrase
+    stmtNounIsNot <- rule $ StmtNeg <$> (StmtNoun <$> term <* _is <* _not <* _an <*> nounPhrase)
+    stmtNoun      <- rule $ stmtNounIs <|> stmtNounIsNot
+    stmtStruct    <- rule $ StmtStruct <$> (term <* _is <* _an) <*> structNounNameless
+    stmtExists    <- rule $ StmtExists <$> (_exists *> _an *> nounPhrase')
+    stmtExist     <- rule $ StmtExists <$> (_exist *> nounPhrasePl)
+    stmtExistsNot <- rule $ StmtNeg . StmtExists <$> (_exists *> _no *> nounPhrase')
+    stmtFormula   <- rule $ StmtFormula <$> math formula
+    stmtBot       <- rule $ StmtFormula (PropositionalConstant IsBottom) <$ _contradiction
+    stmt'         <- rule $ stmtVerb <|> stmtNoun <|> stmtStruct <|> stmtFormula <|> stmtBot
+    stmtOr  <- rule $ stmt'   <|> (StmtConnected Disjunction <$> stmt'   <* _or  <*> stmt)
+    stmtAnd <- rule $ stmtOr  <|> (StmtConnected Conjunction <$> stmtOr  <* _and <*> stmt)
+    stmtIff <- rule $ stmtAnd <|> (StmtConnected Equivalence <$> stmtAnd <* _iff <*> stmt)
+    stmtIf  <- rule $ StmtConnected Implication <$> (_if *> stmt) <* optional _comma <* _then <*> stmt
+    stmtXor <- rule $ StmtConnected ExclusiveOr <$> (_either *> stmt) <* _or <*> stmt
+    stmtNor <- rule $ StmtConnected NegatedDisjunction <$> (_neither *> stmt) <* _nor <*> stmt
+    stmtNeg <- rule $ StmtNeg <$> (_itIsWrong *> stmt)
+
+    stmtQuantPhrase <- rule $ StmtQuantPhrase <$> (_for *> quant) <* optional _comma <* optional _have <*> stmt
+
+    suchStmt <- rule $ _suchThat *> stmt <* optional _comma
+
+
+   -- Symbolic quantifications with or without generalized bounds.
+    symbolicForall <- rule $ SymbolicForall
+        <$> ((_forAll <|> _forEvery) *> beginMath *> varSymbols)
+        <*> maybeBounded <* endMath
+        <*> optional suchStmt
+        <* optional _have <*> stmt
+    symbolicExists <- rule $ SymbolicExists
+        <$> ((_exists <|> _exist) *> beginMath *> varSymbols)
+        <*> maybeBounded <* endMath
+        <*> ((_suchThat *> stmt) <|> pure (StmtFormula (PropositionalConstant IsTop)))
+    symbolicNotExists <- rule $ SymbolicNotExists
+        <$> (_exists *> _no *> beginMath *> varSymbols)
+        <*> maybeBounded <* endMath
+        <* _suchThat <*> stmt
+    symbolicBound <- rule $ Bounded <$> relationSign <*> relation <*> expr
+    maybeBounded <- rule (pure Unbounded <|> symbolicBound)
+
+    symbolicQuantified <- rule $ symbolicForall <|> symbolicExists <|> symbolicNotExists
+
+    stmt <- rule $ asum [stmtNeg, stmtIf, stmtXor, stmtNor, stmtExists, stmtExist, stmtExistsNot, stmtQuantPhrase, stmtIff, symbolicQuantified] <?> "a statement"
+
+    asmLetIn        <- rule $ uncurry AsmLetIn <$> (_let *> math typing)
+    asmLetNoun      <- rule $ AsmLetNoun <$> (_let *> fmap pure var <* (_be <|> _denote) <* _an) <*> nounPhrase
+    asmLetNouns     <- rule $ AsmLetNoun <$> (_let *> vars <* (_be <|> _denote)) <*> nounPhrasePlMay
+    asmLetEq        <- rule $ uncurry AsmLetEq <$> (_let *> math assignment)
+    asmLetThe       <- rule $ AsmLetThe <$> (_let *> var <* _be <* _the) <*> fun
+    asmLetStruct    <- rule $ AsmLetStruct <$> (_let *> var <* _be <* _an) <*> structNounNameless
+    asmLet          <- rule $ asmLetNoun <|> asmLetNouns <|> asmLetIn <|> asmLetEq <|> asmLetThe <|> asmLetStruct
+    asmSuppose      <- rule $ AsmSuppose <$> (_suppose *> stmt)
+    asm             <- rule $ assumptionList (asmLet <|> asmSuppose) <* _dot
+    asms            <- rule $ concat <$> many asm
+
+    axiom <- rule $ Axiom <$> asms <* optional _then <*> stmt <* _dot
+
+    lemma <- rule $ Lemma <$> asms <* optional _then <*> stmt <* _dot
+
+    defnAdj <- rule $ DefnAdj <$> optional (_an *> nounPhrase) <*> var <* _is <*> adjVar
+    defnVerb <- rule $ DefnVerb <$> optional (_an *> nounPhrase) <*> var <*> verbVar
+    defnNoun <- rule $ DefnNoun <$> var <* _is <* _an <*> nounVar
+    defnRel <- rule $ DefnRel <$> (beginMath *> varSymbol) <*> relator <*> varSymbol <* endMath
+    defnSymbolicPredicate <- rule $ math $ asum $ prefixPredicateOf DefnSymbolicPredicate varSymbol <$> HM.keys lexiconPrefixPredicates
+    defnHead <- rule $ optional _write *> asum [defnAdj, defnVerb, defnNoun, defnRel, defnSymbolicPredicate]
+
+    defnIf <- rule $ Defn <$> asms <*> defnHead <* (_iff <|> _if) <*> stmt <* _dot
+    defnFunSymb <- rule $ _comma *> termExpr <* _comma --  ^ Optional symbolic equivalent.
+    defnFun <- rule $ DefnFun <$> asms <*> (optional _the *> funVar) <*> optional defnFunSymb <* _is <*> term <* _dot
+
+    symbolicPatternEqTerm <- rule do
+        pat <- beginMath *> symbolicPattern <* _eq
+        e <- expr <* endMath <* _dot
+        pure (pat, e)
+    defnOp <- rule $ uncurry DefnOp <$> symbolicPatternEqTerm
+
+    defn <- rule $ defnIf <|> defnFun <|> defnOp
+
+    abbreviationVerb  <- rule $ AbbreviationVerb <$> var <*> verbVar <* (_iff <|> _if) <*> stmt <* _dot
+    abbreviationAdj   <- rule $ AbbreviationAdj <$> var <* _is <*> adjVar <* (_iff <|> _if) <*> stmt <* _dot
+    abbreviationNoun  <- rule $ AbbreviationNoun <$>  var <* _is <* _an <*> nounVar <* (_iff <|> _if) <*> stmt <* _dot
+    abbreviationRel   <- rule $ AbbreviationRel <$> (beginMath *> varSymbol) <*> relator <*> varSymbol <* endMath <* (_iff <|> _if) <*> stmt <* _dot
+    abbreviationFun   <- rule $ AbbreviationFun <$> (_the *> funVar) <* (_is <|> _denotes) <*> term <* _dot
+    abbreviationEq    <- rule $ uncurry AbbreviationEq <$> symbolicPatternEqTerm
+    abbreviation      <- rule $ (abbreviationVerb <|> abbreviationAdj <|> abbreviationNoun <|> abbreviationRel <|> abbreviationFun <|> abbreviationEq)
+
+    datatypeFin <- rule $ DatatypeFin <$> fmap fst (_an *> noun) <*> (_is *> _oneOf *> orList2 (math cmd) <* _dot)
+    datatype    <- rule datatypeFin
+
+    unconditionalIntro <- rule $ IntroRule [] <$> math formula
+    conditionalIntro   <- rule $ IntroRule <$> (_if *> andList1_ (math formula)) <* _comma <* _then <*> math formula
+    inductiveIntro     <- rule $ (unconditionalIntro <|> conditionalIntro) <* _dot
+    inductiveDomain    <- rule $ math $ (,) <$> symbolicPattern <* _subseteq <*> expr
+    inductiveHead      <- rule $ _define *> inductiveDomain <* optional _inductively <* optional _asFollows <* _dot
+    inductive          <- rule $ uncurry Inductive <$> inductiveHead <*> enumerated1 inductiveIntro
+
+    signatureAdj      <- rule $ SignatureAdj <$> var <* _can <* _be <*> adjOf lexicon var
+    symbolicPattern   <- symbolicPatternOf ops varSymbol
+    signatureSymbolic <- rule $ SignatureSymbolic <$> math symbolicPattern <* _is <* _an <*> nounPhrase
+    signature         <- rule $ (,) <$> asms <* optional _then <*> (signatureAdj <|> signatureSymbolic) <* _dot
+
+    structFix <- rule do
+        beginMath
+        rawCmd <- cmd
+        endMath
+        pure (StructSymbol rawCmd)
+    structDefn <- rule $ do
+        _an
+        ~(structPhrase, structLabel) <- structNoun
+        _extends
+        structParents <- andList1_ (_an *> structNounNameless)
+        maybeFixes <- optional (_equipped *> enumerated structFix)
+        structAssumes <- (_suchThat *> enumeratedMarked (stmt <* _dot)) <|> ([] <$ _dot)
+        pure StructDefn
+            { structPhrase = structPhrase
+            , structLabel = structLabel
+            , structParents = structParents
+            , structFixes = maybeFixes ?? []
+            , structAssumes = structAssumes
+            }
+
+    justificationSet <- rule $ JustificationSetExt <$ _bySetExt
+    justificationRef <- rule $ JustificationRef <$> (_by *> ref)
+    justificationLocal <- rule $ JustificationLocal <$ (_by *> (_assumption <|> _definition))
+    justification <- rule (justificationSet <|> justificationRef <|> justificationLocal <|> pure JustificationEmpty)
+
+    trivial          <- rule $ Qed JustificationEmpty <$ _trivial <* _dot
+    omitted          <- rule $ Omitted <$ _omitted <* _dot
+    qedJustified     <- rule $ Qed <$> (_follows *> justification <* _dot)
+    qed              <- rule $ qedJustified <|> trivial <|> omitted <|> pure (Qed JustificationEmpty)
+
+    let alignedEq = symbol "&=" <?> "\"&=\""
+    explanation <- rule $ (text justification) <|> pure JustificationEmpty
+    equationItem <- rule $ (,) <$> (alignedEq *> expr) <*> explanation
+    equations <- rule $ Equation <$> expr <*> (many1 equationItem)
+
+    let alignedIff = symbol "&" *> command "iff" <?> "\"&\\iff\""
+    biconditionalItem <- rule $ (,) <$> (alignedIff *> formula) <*> explanation
+    biconditionals <- rule $ Biconditionals <$> formula <*> (many1 biconditionalItem)
+
+    calc  <- rule $ Calc <$> align (equations <|> biconditionals) <*> proof
+
+    caseOf           <- rule $ command "caseOf" *> token InvisibleBraceL *> stmt <* _dot <* token InvisibleBraceR
+    byCases          <- rule $ ByCase <$> env_ "byCase" (many1_ (Case <$> caseOf <*> proof))
+    byContradiction  <- rule $ ByContradiction <$ _suppose <* _not <* _dot <*> proof
+    bySetInduction   <- rule $ BySetInduction <$> proofBy (_in *> word "-induction" *> optional (word "on" *> term)) <*> proof
+    byOrdInduction   <- rule $ ByOrdInduction <$> proofBy (word "transfinite" *> word "induction" *> proof)
+    assume           <- rule $ Assume <$> (_suppose *> stmt <* _dot) <*> proof
+
+    fixSymbolic      <- rule $ FixSymbolic <$> (_fix *> beginMath *> varSymbols) <*> maybeBounded <* endMath <* _dot <*> proof
+    fixSuchThat      <- rule $ FixSuchThat <$> (_fix *> math varSymbols) <* _suchThat <*> stmt <* _dot <*> proof
+    fix              <- rule $ fixSymbolic <|> fixSuchThat
+
+    takeVar          <- rule $ TakeVar <$> (_take *> beginMath *> varSymbols) <*> maybeBounded <* endMath <* _suchThat <*> stmt <*> justification <* _dot <*> proof
+    takeNoun         <- rule $ TakeNoun <$> (_take *> _an *> (nounPhrase' <|> nounPhrasePl)) <*> justification <* _dot <*> proof
+    take             <- rule $ takeVar <|> takeNoun
+    suffices         <- rule $ Suffices <$> (_sufficesThat *> stmt) <*> (justification <* _dot) <*> proof
+    subclaim         <- rule $ Subclaim <$> (_show *> stmt <* _dot) <*> env_ "subproof" proof <*> proof
+    have             <- rule $ Have <$> optional (_since *> stmt <* _comma <* _have) <* optional _haveIntro <*> stmt <*> justification <* _dot <*> proof
+
+    define           <- rule $ Define <$> (_let *> beginMath *> varSymbol <* _eq) <*> expr <* endMath <* _dot <*> proof
+    defineFunction   <- rule $ DefineFunction <$> (_let *> beginMath *> varSymbol) <*> paren varSymbol <* _eq <*>  expr <* endMath <* _for <* beginMath <*> varSymbol <* _in <*> expr <* endMath <* _dot <*> proof
+
+    proof            <- rule $ asum [byContradiction, byCases, bySetInduction, byOrdInduction, calc, subclaim, assume, fix, take, have, suffices, define, defineFunction, qed]
+
+
+    blockAxiom  <- rule $ uncurry3 BlockAxiom     <$> envPos  "axiom" axiom
+    blockLemma  <- rule $ uncurry3 BlockLemma     <$> lemmaEnv lemma
+    blockProof  <- rule $ uncurry  BlockProof     <$> envPos_ "proof" proof
+    blockDefn   <- rule $ uncurry3 BlockDefn      <$> envPos "definition" defn
+    blockAbbr   <- rule $ uncurry3 BlockAbbr      <$> envPos "abbreviation" abbreviation
+    blockData   <- rule $ uncurry  BlockData      <$> envPos_ "datatype" datatype
+    blockInd    <- rule $ uncurry3 BlockInductive <$> envPos "inductive" inductive
+    blockSig    <- rule $ (\(p, (a, s)) -> BlockSig p a s) <$> envPos_ "signature" signature
+    blockStruct <- rule $ uncurry3 BlockStruct    <$> envPos "struct" structDefn
+    block       <- rule $ asum [blockAxiom, blockLemma, blockDefn, blockAbbr, blockData, blockInd, blockSig, blockStruct, blockProof]
+
+    -- Starting category.
+    pure block
+
+
+proofBy :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+proofBy method = bracket $ word "proof" *> word "by" *> method
+
+
+lemmaEnv :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, Marker, a)
+lemmaEnv content = asum
+    [ envPos "theorem" content
+    , envPos "lemma" content
+    , envPos "corollary" content
+    , envPos "claim" content
+    , envPos "proposition" content
+    ]
+
+
+-- | A disjunctive list with at least two items:
+-- * 'a or b'
+-- * 'a, b, or c'
+-- * 'a, b, c, or d'
+--
+orList2 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+orList2 item = ((:|) <$> item <*> many (_commaOr *> item))
+    <|> ((\i j -> i:|[j]) <$> item <* _or <*> item)
+
+
+-- | Nonempty textual lists of the form "a, b, c, and d".
+-- The final comma is mandatory, 'and' is not.
+-- Also allows "a and b". Should therefore be avoided in contexts where
+-- a logical conjunction would also be possible.
+-- Currently also allows additional 'and's after each comma...
+--
+andList1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+andList1 item = ((:|) <$> item <*> many (_commaAnd *> item))
+    <|> ((\i j -> i:|[j]) <$> item <* _and <*> item)
+
+-- | Like 'andList1', but drops the information about nonemptiness.
+andList1_ :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+andList1_ item = toList <$> andList1 item
+
+
+commaList :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+commaList item = (:|) <$> item <*> many (_comma *> item)
+
+-- | Like 'commaList', but drops the information about nonemptiness.
+commaList_ :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+commaList_ item = NonEmpty.toList <$> commaList item
+
+-- | Like 'commaList', but requires at least two items (and hence at least one comma).
+commaList2 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+commaList2 item = (:|) <$> item <* _comma <*> commaList_ item
+
+
+assumptionList :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+assumptionList item = NonEmpty.toList <$> andList1 item
+
+enumerated :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a]
+enumerated p = NonEmpty.toList <$> enumerated1 p
+
+enumerated1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a)
+enumerated1 p = begin "enumerate" *> many1 (command "item" *> p) <* end "enumerate" <?> "\"\\begin{enumerate} ...\""
+
+
+enumeratedMarked :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [(Marker, a)]
+enumeratedMarked p = NonEmpty.toList <$> enumeratedMarked1 p
+
+enumeratedMarked1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty (Marker, a))
+enumeratedMarked1 p = begin "enumerate" *> many1 ((,) <$> (command "item" *> label) <*> p) <* end "enumerate" <?> "\"\\begin{enumerate}\\item\\label{...}...\""
+
+
+
+
+-- This function could be rewritten, so that it can be used directly in the grammar,
+-- instead of with specialized variants.
+--
+phraseOf
+    :: (pat -> [a] -> b)
+    -> Lexicon
+    -> (Lexicon -> HashSet pat)
+    -> (pat -> LexicalPhrase)
+    -> Prod r Text (Located Token) a
+    -> Prod r Text (Located Token) b
+phraseOf constr lexicon selector proj arg =
+    uncurry constr <$> asum (fmap make pats)
+    where
+        pats = HS.toList (selector lexicon)
+        make pat = (\args -> (pat, args)) <$> go (proj pat)
+        go = \case
+            Just w : ws  -> token w *> go ws
+            Nothing : ws -> (:) <$> arg <*> go ws
+            []           -> pure []
+
+adjLOf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjLOf arg)
+adjLOf lexicon arg = phraseOf AdjL lexicon (HM.keysSet . lexiconAdjLs) id arg <?> "a left adjective"
+
+adjROf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjROf arg)
+adjROf lexicon arg = phraseOf AdjR lexicon (HM.keysSet . lexiconAdjRs) id arg <?> "a right adjective"
+
+adjOf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjOf arg)
+adjOf lexicon arg = phraseOf Adj lexicon (HM.keysSet . lexiconAdjs) id arg <?> "an adjective"
+
+verbOf
+    :: Lexicon
+    -> (SgPl LexicalPhrase -> LexicalPhrase)
+    -> Prod r Text (Located Token) a
+    -> Prod r Text (Located Token) (VerbOf a)
+verbOf lexicon proj arg = phraseOf Verb lexicon (HM.keysSet . lexiconVerbs) proj arg
+
+funOf
+    :: Lexicon
+    -> (SgPl LexicalPhrase -> LexicalPhrase)
+    -> Prod r Text (Located Token) a
+    -> Prod r Text (Located Token) (FunOf a)
+funOf lexicon proj arg = phraseOf Fun lexicon (HM.keysSet . lexiconFuns) proj arg <?> "functional phrase"
+
+
+-- | A noun with a @t VarSymbol@ as name(s).
+nounOf
+    :: Lexicon
+    -> (SgPl LexicalPhrase -> LexicalPhrase)
+    -> Prod r Text (Located Token) arg
+    -> Prod r Text (Located Token) (t VarSymbol)
+    -> Prod r Text (Located Token) (NounOf arg, t VarSymbol)
+nounOf lexicon proj arg vars =
+    (\(args1, xs, args2, pat) -> (Noun pat (args1 <> args2), xs)) <$> asum (fmap make pats) <?>  "a noun"
+    where
+        pats = HM.keys (lexiconNouns lexicon)
+        make pat =
+            let (pat1, pat2) = splitOnVariableSlot (proj pat)
+            in  (\args1 xs args2 -> (args1, xs, args2, pat)) <$> go pat1 <*> vars <*> go pat2
+        go = \case
+            Just w : ws  -> token w *> go ws
+            Nothing : ws -> (:) <$> arg <*> go ws
+            []           -> pure []
+
+structNounOf
+    :: Lexicon
+    -> (SgPl LexicalPhrase -> LexicalPhrase)
+    -> Prod r Text (Located Token) arg
+    -> Prod r Text (Located Token) name
+    -> Prod r Text (Located Token) (StructPhrase, name)
+structNounOf lexicon proj arg name =
+    (\(_args1, xs, _args2, pat) -> (pat, xs)) <$> asum (fmap make pats) <?> "a structure noun"
+    where
+        pats = HM.keys (lexiconStructNouns lexicon)
+        make pat =
+            let (pat1, pat2) = splitOnVariableSlot (proj pat)
+            in  (\args1 xs args2 -> (args1, xs, args2, pat)) <$> go pat1 <*> name <*> go pat2
+        go = \case
+            Just w : ws  -> token w *> go ws
+            Nothing : ws -> (:) <$> arg <*> go ws
+            []           -> pure []
+
+
+symbolicPatternOf
+    :: forall r. [[(Holey (Prod r Text (Located Token) Token), Associativity)]]
+    -> Prod r Text (Located Token) VarSymbol
+    -> Grammar r (Prod r Text (Located Token) SymbolPattern)
+symbolicPatternOf ops varSymbol = rule $ asum
+    [ go op
+    | ops' <- ops
+    , (op, _assoc) <- ops'
+    ] <?> "a symbolic pattern"
+    where
+        go :: Holey (Prod r Text (Located Token) Token) -> Prod r Text (Located Token) SymbolPattern
+        go [] = pure $ SymbolPattern [] []
+        go (head : tail) = case head of
+            Just symb -> (\s (SymbolPattern op vs) -> SymbolPattern (Just s : op) vs) <$> symb <*> go tail
+            Nothing -> (\v (SymbolPattern op vs) -> SymbolPattern (Nothing : op) (v : vs)) <$> varSymbol <*> go tail
+
+
+makeNounPhrase
+    :: [AdjL]
+    -> (Noun, t VarSymbol)
+    -> [AdjR]
+    -> Maybe Stmt
+    -> NounPhrase t
+makeNounPhrase ls (n, vs) rs ms = NounPhrase ls n vs rs ms
+
+
+
+
+begin, end :: Text -> Prod r Text (Located Token) SourcePos
+begin kind = tokenPos (BeginEnv kind) <?> ("\"\\begin{" <> kind <> "}\"")
+end kind   = tokenPos (EndEnv kind) <?> ("\"\\end{" <> kind <> "}\"")
+
+-- | Surround a production rule @body@ with an environment of a certain @kind@ requiring a marker specified in a @\\label@.
+-- Ignores the optional title after the beginning of the environment.
+envPos :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, Marker, a)
+envPos kind body = do
+    p <- begin kind <?> ("start of a \"" <> kind <> "\" environment")
+    optional title
+    m <- label
+    a <- body <* end kind
+    pure (p, m, a)
+    where
+        title :: Prod r Text (Located Token) [Token]
+        title = bracket (many (unLocated <$> satisfy (\ltok -> unLocated ltok /= BracketR)))
+
+-- 'env_' is like 'env', but without allowing titles.
+--
+envPos_ :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, a)
+envPos_ kind body = (,) <$> begin kind <*> (optional label *> body) <* end kind
+
+env_ :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+env_ kind body = begin kind *> optional label *> body <* end kind
+
+-- | A label specifying a marker for referencing via /@\\label{...}@/. Returns the marker text.
+label :: Prod r Text (Located Token) Marker
+label = label_ <?> "\"\\label{...}\""
+    where
+        label_ = terminal \ltok -> case unLocated ltok of
+            Label m -> Just (Marker m)
+            _tok -> Nothing
+
+-- | A reference via /@\\ref{...}@/. Returns the markers as text.
+ref :: Prod r Text (Located Token) (NonEmpty Marker)
+ref = terminal \ltok -> case unLocated ltok of
+    Ref ms -> Just (Marker <$> ms)
+    _tok -> Nothing
+
+math :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+math body = beginMath *> body <* endMath
+
+text :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+text body = begin "text" *> body <* end "text" <?> "\"\\text{...}\""
+
+beginMath, endMath :: Prod r Text (Located Token) SourcePos
+beginMath = begin "math" <?> "start of a formula, e.g. \"$\""
+endMath = end "math" <?> "end of a formula, e.g. \"$\""
+
+paren :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+paren body = token ParenL *> body <* token ParenR <?> "\"(...)\""
+
+bracket :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+bracket body = token BracketL *> body <* token BracketR <?> "\"[...]\""
+
+brace :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+brace body = token VisibleBraceL  *> body <* token VisibleBraceR <?> "\"\\{...\\}\""
+
+group :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+group body = token InvisibleBraceL *> body <* token InvisibleBraceR <?> "\"{...}\""
+
+align :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a
+align body = begin "align*" *> body <* end "align*"
+
+
+maybeVarToken :: Located Token -> Maybe VarSymbol
+maybeVarToken ltok = case unLocated ltok of
+    Variable x -> Just (NamedVar x)
+    _tok -> Nothing
+
+maybeWordToken :: Located Token -> Maybe Text
+maybeWordToken ltok = case unLocated ltok of
+    Word n -> Just n
+    _tok -> Nothing
+
+maybeIntToken :: Located Token -> Maybe Int
+maybeIntToken ltok = case unLocated ltok of
+    Integer n -> Just n
+    _tok -> Nothing
+
+maybeCmdToken :: Located Token -> Maybe Text
+maybeCmdToken ltok = case unLocated ltok of
+    Command n -> Just n
+    _tok -> Nothing
+
+structSymbol :: StructSymbol -> Prod r Text (Located Token) StructSymbol
+structSymbol s@(StructSymbol c) = terminal \ltok -> case unLocated ltok of
+    Command c' | c == c' -> Just s
+    _ -> Nothing
+
+-- | Tokens that are allowed to appear in labels of environments.
+maybeTagToken :: Located Token -> Maybe Text
+maybeTagToken ltok = case unLocated ltok of
+    Symbol "'" ->Just "'"
+    Symbol "-" -> Just ""
+    _ -> maybeWordToken ltok
+
+
+token :: Token -> Prod r Text (Located Token) Token
+token tok = terminal maybeToken <?> tokToText tok
+    where
+        maybeToken ltok = case unLocated ltok of
+            tok' | tok == tok' -> Just tok
+            _ -> Nothing
+
+tokenPos :: Token -> Prod r Text (Located Token) SourcePos
+tokenPos tok = terminal maybeToken <?> tokToText tok
+    where
+        maybeToken ltok = case unLocated ltok of
+            tok' | tok == tok' -> Just (startPos ltok)
+            _ -> Nothing
diff --git a/source/Syntax/Concrete/Keywords.hs b/source/Syntax/Concrete/Keywords.hs
new file mode 100644
index 0000000..e0f577e
--- /dev/null
+++ b/source/Syntax/Concrete/Keywords.hs
@@ -0,0 +1,222 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-|
+This module defines lots of keywords and various filler
+phrases. The prefix underscore indicates that we do not
+care about the parse result (analogous to  discarding
+like @...; _ <- action; ...@ in do-notation). Moreover,
+this convention allows the use of short names that would
+otherwise be Haskell keywords or clash with other definitions.
+Care should be taken with introducing too many variants of
+a keyword, lest the grammar becomes needlessly ambiguous!
+
+The names are chosen using the following criteria:
+
+   * As short as possible (e.g.: @_since@ over @_because@).
+
+   * Sound like a keyword (e.g.: @_show@).
+
+This module also defines symbols that have special uses
+(such as @_colon@ for its use in type signatures).
+-}
+module Syntax.Concrete.Keywords where
+
+
+import Base
+import Syntax.Token
+
+import Text.Earley (Prod, (<?>), terminal)
+import Text.Megaparsec (SourcePos)
+
+infixr 0 ?
+-- | Variant of '<?>' for annotating literal tokens.
+(?) :: Prod r Text t a -> Text -> Prod r Text t a
+p ? e = p <?> ("\"" <> e <> "\"")
+
+word :: Text -> Prod r Text (Located Token) SourcePos
+word w = terminal maybeToken
+   where
+      maybeToken ltok = case unLocated ltok of
+         Word w' | w == w' -> Just (startPos ltok)
+         _ -> Nothing
+
+symbol :: Text -> Prod r Text (Located Token) SourcePos
+symbol s = terminal maybeToken
+   where
+      maybeToken ltok = case unLocated ltok of
+         Symbol s' | s == s' -> Just (startPos ltok)
+         _ -> Nothing
+
+command :: Text -> Prod r Text (Located Token) SourcePos
+command cmd = terminal maybeToken
+   where
+      maybeToken ltok = case unLocated ltok of
+         Command cmd' | cmd == cmd' -> Just (startPos ltok)
+         _ -> Nothing
+
+_arity :: Prod r Text (Located Token) Int
+_arity = asum
+   [ 1  <$ word "unary"
+   , 2  <$ word "binary"
+   , 3  <$ word "ternary"
+   , 4  <$ word "quaternary"
+   , 5  <$ word "quinary"
+   , 6  <$ word "senary"
+   , 7  <$ word "septenary"
+   , 8  <$ word "octonary"
+   , 9  <$ word "nonary"
+   , 10 <$ word "denary"
+   ] <?> "\"unary\", \"binary\', ..."
+
+-- * Keywords
+
+_an :: Prod r Text (Located Token) SourcePos
+_an = word "a" <|> word "an" <?> "indefinite article"
+_and :: Prod r Text (Located Token) SourcePos
+_and = word "and" ? "and"
+_are :: Prod r Text (Located Token) SourcePos
+_are = word "are" ? "are"
+_asFollows :: Prod r Text (Located Token) SourcePos
+_asFollows = word "as" <* word "follows" ? "as follows"
+_assumption :: Prod r Text (Located Token) SourcePos
+_assumption = word "assumption" ? "assumption"
+_be :: Prod r Text (Located Token) SourcePos
+_be = word "be" ? "be"
+_by :: Prod r Text (Located Token) SourcePos
+_by = word "by" ? "by"
+_bySetExt :: Prod r Text (Located Token) SourcePos
+_bySetExt = word "by" <* ((word "set" ? "set") <* word "extensionality") ? "by set extensionality"
+_can :: Prod r Text (Located Token) SourcePos
+_can = word "can" ? "can"
+_consistsOf :: Prod r Text (Located Token) SourcePos
+_consistsOf = word "consists" <* word "of" ? "consists of"
+_contradiction :: Prod r Text (Located Token) SourcePos
+_contradiction = optional (word "a") *> word "contradiction" ? "a contradiction"
+_define :: Prod r Text (Located Token) SourcePos
+_define = word "define" ? "define"
+_definition :: Prod r Text (Located Token) SourcePos
+_definition = word "definition" ? "definition"
+_denote :: Prod r Text (Located Token) SourcePos
+_denote = word "denote" <|> (word "stand" <* word "for") ? "denote"
+_denotes :: Prod r Text (Located Token) SourcePos
+_denotes = word "denotes" ? "denotes"
+_do :: Prod r Text (Located Token) SourcePos
+_do = word "do" ? "do"
+_does :: Prod r Text (Located Token) SourcePos
+_does = word "does" ? "does"
+_either :: Prod r Text (Located Token) SourcePos
+_either = word "either" ? "either"
+_equipped :: Prod r Text (Located Token) SourcePos
+_equipped = (word "equipped" <|> word "together") <* word "with" ? "equipped with"
+_every :: Prod r Text (Located Token) SourcePos
+_every = (word "every") ? "every"
+_exist :: Prod r Text (Located Token) SourcePos
+_exist = word "there" <* word "exist" ? "there exist"
+_exists :: Prod r Text (Located Token) SourcePos
+_exists = word "there" <* word "exists" ? "there exists"
+_extends :: Prod r Text (Located Token) SourcePos
+_extends = (_is) <|> (word "consists" <* word "of") ? "consists of"
+_fix :: Prod r Text (Located Token) SourcePos
+_fix = word "fix" ? "fix"
+_follows :: Prod r Text (Located Token) SourcePos
+_follows = word "follows" ? "follows"
+_for :: Prod r Text (Located Token) SourcePos
+_for = word "for" ? "for"
+_forAll :: Prod r Text (Located Token) SourcePos
+_forAll = (word "for" <* word "all") <|> word "all" ? "all"
+_forEvery :: Prod r Text (Located Token) SourcePos
+_forEvery = (word "for" <* word "every") <|> (word "every") ? "for every"
+_have :: Prod r Text (Located Token) SourcePos
+_have = word "we" <* word "have" <* optional (word "that") ? "we have"
+_if :: Prod r Text (Located Token) SourcePos
+_if = word "if" ? "if"
+_iff :: Prod r Text (Located Token) SourcePos
+_iff = word "iff" <|> (word "if" <* word "and" <* word "only" <* word "if") ? "iff"
+_inductively :: Prod r Text (Located Token) SourcePos
+_inductively = word "inductively" ? "inductively"
+_is :: Prod r Text (Located Token) SourcePos
+_is = word "is" ? "is"
+_itIsWrong :: Prod r Text (Located Token) SourcePos
+_itIsWrong = word "it" <* word "is" <* (word "not" <* word "the" <* word "case" <|> word "wrong") <* word "that" ? "it is wrong that"
+_let :: Prod r Text (Located Token) SourcePos
+_let = word "let" ? "let"
+_neither :: Prod r Text (Located Token) SourcePos
+_neither = word "neither" ? "neither"
+_no :: Prod r Text (Located Token) SourcePos
+_no = word "no" ? "no"
+_nor :: Prod r Text (Located Token) SourcePos
+_nor = word "nor" ? "nor"
+_not :: Prod r Text (Located Token) SourcePos
+_not = word "not" ? "not"
+_omitted :: Prod r Text (Located Token) SourcePos
+_omitted = word "omitted" ? "omitted"
+_on :: Prod r Text (Located Token) SourcePos
+_on = word "on" ? "on"
+_oneOf :: Prod r Text (Located Token) SourcePos
+_oneOf = word "one" <* word "of" ? "one of"
+_or :: Prod r Text (Located Token) SourcePos
+_or = word "or" ? "or"
+_particularly :: Prod r Text (Located Token) SourcePos
+_particularly = (word "particularly" <|> (word "in" *> word "particular")) <* _comma ? "particularly"
+_relation :: Prod r Text (Located Token) SourcePos
+_relation = word "relation" ? "relation"
+_satisfying :: Prod r Text (Located Token) SourcePos
+_satisfying = _suchThat <|> word "satisfying" ? "satisfying"
+_setOf :: Prod r Text (Located Token) SourcePos
+_setOf = word "set" <* word "of" ? "set of"
+_show :: Prod r Text (Located Token) SourcePos
+_show = optional (word "first" <|> word "finally" <|> word "next" <|> word "now") *> optional (word "we") *> word "show" <* optional (word "that")
+_since :: Prod r Text (Located Token) SourcePos
+_since = word "since" <|> word "because" ? "since"
+_some :: Prod r Text (Located Token) SourcePos
+_some = word "some" ? "some"
+_suchThat :: Prod r Text (Located Token) SourcePos
+_suchThat = ((word "such" <* word "that") <|> (word "s" <* _dot <* word "t" <* _dot)) ? "such that"
+_sufficesThat :: Prod r Text (Located Token) SourcePos
+_sufficesThat = word "it" <* word "suffices" <* word "to" <* word "show" <* word "that" ? "it suffices to show"
+_suppose :: Prod r Text (Located Token) SourcePos
+_suppose = (word "suppose" <|> word "assume") <* optional (word "that") ? "assume"
+_take :: Prod r Text (Located Token) SourcePos
+_take = word "take" <|> word "consider" ? "take"
+_that :: Prod r Text (Located Token) SourcePos
+_that = word "that" ? "that"
+_the :: Prod r Text (Located Token) SourcePos
+_the = word "the" ? "the"
+_then :: Prod r Text (Located Token) SourcePos
+_then = word "then" ? "then"
+_throughout :: Prod r Text (Located Token) SourcePos
+_throughout = word "throughout" <* optional (word "this" <* word "section") <* optional _comma <|> (word "in" <* word "the" <* word "sequel") ? "throughout"
+_thus :: Prod r Text (Located Token) SourcePos
+_thus = word "thus" <|> word "then" <|> word "hence" <|> word "now" <|> word "finally" <|> word "therefore" ? "thus"
+_trivial :: Prod r Text (Located Token) SourcePos
+_trivial = word "straightforward" <|> word "trivial" ? "trivial"
+_unique :: Prod r Text (Located Token) SourcePos
+_unique = word "unique" ? "unique"
+_write :: Prod r Text (Located Token) SourcePos
+_write = (optional (word "we") *> word "say" <* optional (word "that")) <|> (optional (word "we") *> word "write") ? "write"
+
+-- | Introducing plain claims in proofs.
+_haveIntro :: Prod r Text (Located Token) SourcePos
+_haveIntro = _thus <|> _particularly <|> _have
+
+-- * Symbols
+
+_colon :: Prod r Text (Located Token) SourcePos
+_colon = symbol ":" ? ":"
+_pipe :: Prod r Text (Located Token) SourcePos
+_pipe = symbol "|" <|> command "mid" ? "\\mid"
+_comma :: Prod r Text (Located Token) SourcePos
+_comma = symbol "," ? ","
+_commaAnd :: Prod r Text (Located Token) SourcePos
+_commaAnd = symbol "," <* optional (word "and") ? ", and"
+_commaOr :: Prod r Text (Located Token) SourcePos
+_commaOr = symbol "," <* optional (word "or") ? ", or"
+_defeq :: Prod r Text (Located Token) SourcePos
+_defeq = symbol ":=" ? ":=" -- Should use `\coloneq` from unicode-math as display.
+_dot :: Prod r Text (Located Token) SourcePos
+_dot = symbol "." ? "."
+_eq :: Prod r Text (Located Token) SourcePos
+_eq = symbol "=" ? "="
+_in :: Prod r Text (Located Token) SourcePos
+_in = symbol "∈" <|> command "in" ? "\\in"
+_subseteq :: Prod r Text (Located Token) SourcePos
+_subseteq = command "subseteq" ? ":"
diff --git a/source/Syntax/Import.hs b/source/Syntax/Import.hs
new file mode 100644
index 0000000..33082f1
--- /dev/null
+++ b/source/Syntax/Import.hs
@@ -0,0 +1,43 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE ApplicativeDo #-}
+
+
+module Syntax.Import where
+
+
+import Base
+
+import Text.Regex.Applicative.Text
+import Data.CharSet qualified as CharSet
+import Data.Char (isAlphaNum)
+
+gatherImports :: Text -> [FilePath]
+gatherImports s = case findFirstPrefix imps s of
+    Nothing -> []
+    Just (paths, _) -> paths
+
+
+imps :: RE Char [FilePath]
+imps = do
+    mpath <- optional imp
+    paths <- many (few anySym *> string "\n" *> imp)
+    few anySym
+    begin
+    pure case mpath of
+        Nothing -> paths
+        Just path -> path : paths
+
+
+imp :: RE Char FilePath
+imp = do
+    -- Requiring a newline makes it possible to comment imports out.
+    string "\\import{"
+    path <- few (psym isTheoryNameChar)
+    sym '}'
+    pure path
+
+isTheoryNameChar :: Char -> Bool
+isTheoryNameChar c = isAlphaNum c || c `CharSet.member` CharSet.fromList ".-_/"
+
+begin :: RE Char Text
+begin = string "\\begin{"
diff --git a/source/Syntax/Internal.hs b/source/Syntax/Internal.hs
new file mode 100644
index 0000000..44603ad
--- /dev/null
+++ b/source/Syntax/Internal.hs
@@ -0,0 +1,612 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+-- | Data types for the internal (semantic) syntax tree.
+module Syntax.Internal
+    ( module Syntax.Internal
+    , module Syntax.Abstract
+    , module Syntax.LexicalPhrase
+    , module Syntax.Token
+    ) where
+
+
+import Base
+import Syntax.Lexicon (pattern PairSymbol, pattern ConsSymbol)
+import Syntax.LexicalPhrase (LexicalPhrase, SgPl(..), unsafeReadPhrase, unsafeReadPhraseSgPl)
+import Syntax.Token (Token(..))
+
+import Syntax.Abstract
+    ( Chain(..)
+    , Connective(..)
+    , VarSymbol(..)
+    , FunctionSymbol
+    , RelationSymbol
+    , StructSymbol (..)
+    , Relation
+    , VarSymbol(..)
+    , PropositionalConstant(..)
+    , StructPhrase
+    , Justification(..)
+    , Marker(..)
+    , pattern CarrierSymbol, pattern ConsSymbol
+    )
+
+import Bound
+import Bound.Scope
+import Data.Deriving (deriveShow1, deriveEq1, deriveOrd1)
+import Data.Hashable.Lifted
+import Data.HashMap.Strict qualified as HM
+import Data.Maybe
+import Data.Set qualified as Set
+import Data.List qualified as List
+import Data.List.NonEmpty qualified as NonEmpty
+import Text.Megaparsec.Pos (SourcePos)
+
+-- | 'Symbol's can be used as function and relation symbols.
+data Symbol
+    = SymbolMixfix FunctionSymbol
+    | SymbolFun (SgPl LexicalPhrase)
+    | SymbolInteger Int
+    | SymbolPredicate Predicate
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+data Predicate
+    = PredicateAdj LexicalPhrase
+    | PredicateVerb (SgPl LexicalPhrase)
+    | PredicateNoun (SgPl LexicalPhrase) -- ^ /@\<...\> is a \<...\>@/.
+    | PredicateRelation RelationSymbol
+    | PredicateSymbol Text
+    | PredicateNounStruct (SgPl LexicalPhrase) -- ^ /@\<...\> is a \<...\>@/.
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+
+data Quantifier
+    = Universally
+    | Existentially
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+type Formula = Term
+type Term = Expr
+type Expr = ExprOf VarSymbol
+
+
+-- | Internal higher-order expressions.
+data ExprOf a
+    = TermVar a
+    -- ^ Fresh constants disjoint from all user-named identifiers.
+    -- These can be used to eliminate higher-order constructs.
+    --
+    | TermSymbol Symbol [ExprOf a]
+    -- ^ Application of a symbol (including function and predicate symbols).
+    | TermSymbolStruct StructSymbol (Maybe (ExprOf a))
+    --
+    | Apply (ExprOf a) (NonEmpty (ExprOf a))
+    -- ^ Higher-order application.
+    --
+    | TermSep VarSymbol (ExprOf a) (Scope () ExprOf a)
+    -- ^ Set comprehension using seperation, e.g.: /@{ x ∈ X | P(x) }@/.
+    --
+    | Iota VarSymbol (Scope () ExprOf a)
+    -- ^ Definite descriptor.
+    --
+    | ReplacePred VarSymbol VarSymbol (ExprOf a) (Scope ReplacementVar ExprOf a)
+    -- ^ Replacement for single-valued predicates. The concrete syntax for these
+    -- syntactically requires a bounded existential quantifier in the condition:
+    --
+    -- /@$\\{ y | \\exists x\\in A. P(x,y) \\}$@/
+    --
+    -- In definitions the single-valuedness of @P@ becomes a proof obligation.
+    -- In other cases we could instead add it as constraint
+    --
+    -- /@$b\\in \\{ y | \\exists x\\in A. P(x,y) \\}$@/
+    -- /@iff@/
+    -- /@$\\exists x\\in A. P(x,y)$ and $P$ is single valued@/
+    --
+    --
+    | ReplaceFun (NonEmpty (VarSymbol, ExprOf a)) (Scope VarSymbol ExprOf a) (Scope VarSymbol ExprOf a)
+    -- ^ Set comprehension using functional replacement,
+    -- e.g.: /@{ f(x, y) | x ∈ X; y ∈ Y; P(x, y) }@/.
+    -- The list of pairs gives the domains, the integers in the scope point to list indices.
+    -- The first scope is the lhs, the optional scope can be used for additional constraints
+    -- on the variables (i.e. implicit separation over the product of the domains).
+    -- An out-of-bound index is an error, since otherwise replacement becomes unsound.
+    --
+    | Connected Connective (ExprOf a) (ExprOf a)
+    | Lambda (Scope VarSymbol ExprOf a)
+    | Quantified Quantifier (Scope VarSymbol ExprOf a)
+    | PropositionalConstant PropositionalConstant
+    | Not (ExprOf a)
+    deriving (Functor, Foldable, Traversable)
+
+data ReplacementVar = ReplacementDomVar | ReplacementRangeVar deriving (Show, Eq, Ord, Generic, Hashable)
+
+makeBound ''ExprOf
+
+deriveShow1 ''ExprOf
+deriveEq1   ''ExprOf
+deriveOrd1  ''ExprOf
+
+deriving instance Show a => Show (ExprOf a)
+deriving instance Eq   a => Eq   (ExprOf a)
+deriving instance Ord  a => Ord  (ExprOf a)
+
+deriving instance Generic (ExprOf a)
+deriving instance Generic1 ExprOf
+
+deriving instance Hashable1 ExprOf
+
+deriving instance Hashable a =>  Hashable (ExprOf a)
+
+
+abstractVarSymbol :: VarSymbol -> ExprOf VarSymbol -> Scope VarSymbol ExprOf VarSymbol
+abstractVarSymbol x = abstract (\y -> if x == y then Just x else Nothing)
+
+abstractVarSymbols :: Foldable t => t VarSymbol -> ExprOf VarSymbol -> Scope VarSymbol ExprOf VarSymbol
+abstractVarSymbols xs = abstract (\y -> if y `elem` xs then Just y else Nothing)
+
+-- | Use the given set of in scope structures to cast them to their carriers
+-- when occurring on the rhs of the element relation.
+-- Use the given 'Map' to annotate (unannotated) structure operations
+-- with the most recent inscope appropriate label.
+annotateWith :: Set VarSymbol -> HashMap StructSymbol VarSymbol -> Formula -> Formula
+annotateWith = go
+    where
+        go :: (Ord a) => Set a -> HashMap StructSymbol a -> ExprOf a -> ExprOf a
+        go labels ops = \case
+            TermSymbolStruct symb Nothing ->
+                TermSymbolStruct symb (TermVar <$> HM.lookup symb ops)
+            e@TermSymbolStruct{} ->
+                e
+            a `IsElementOf` TermVar x | x `Set.member` labels ->
+                go labels ops a `IsElementOf` TermSymbolStruct CarrierSymbol (Just (TermVar x))
+            Not a ->
+                Not (go labels ops a)
+            Connected conn a b ->
+                Connected conn (go labels ops a) (go labels ops b)
+            Quantified quant body ->
+                Quantified quant (toScope (go (Set.map F labels) (F <$> ops) (fromScope body)))
+            e@TermVar{} -> e
+            TermSymbol symb args ->
+                TermSymbol symb (go labels ops <$> args)
+            Apply e1 args ->
+                Apply (go labels ops e1) (go labels ops <$> args)
+            TermSep vs e scope ->
+                TermSep vs (go labels ops e) (toScope (go (Set.map F labels) (F <$> ops) (fromScope scope)))
+            Iota x body ->
+                Iota x (toScope (go (Set.map F labels) (F <$> ops) (fromScope body)))
+            ReplacePred y x xB scope ->
+                ReplacePred y x (go labels ops xB) (toScope (go (Set.map F labels) (F <$> ops) (fromScope scope)))
+            ReplaceFun bounds ap cond ->
+                ReplaceFun
+                    (fmap (\(x, e) -> (x, go labels ops e)) bounds)
+                    (toScope (go (Set.map F labels) (F <$> ops) (fromScope ap)))
+                    (toScope (go (Set.map F labels) (F <$> ops) (fromScope cond)))
+            Lambda body ->
+                Lambda (toScope (go (Set.map F labels) (F <$> ops) (fromScope body)))
+            e@PropositionalConstant{} -> e
+
+containsHigherOrderConstructs :: ExprOf a -> Bool
+containsHigherOrderConstructs = \case
+    TermSep {} -> True
+    Iota{} -> True
+    ReplacePred{}-> True
+    ReplaceFun{}-> True
+    Lambda{} -> True
+    Apply{} -> False -- FIXME: this is a lie in general; we need to add sortchecking to determine this.
+    TermVar{} -> False
+    PropositionalConstant{} -> False
+    TermSymbol _ es -> any containsHigherOrderConstructs es
+    Not e -> containsHigherOrderConstructs e
+    Connected _ e1 e2 -> containsHigherOrderConstructs e1 || containsHigherOrderConstructs e2
+    Quantified _ scope -> containsHigherOrderConstructs (fromScope scope)
+    TermSymbolStruct _ _ -> False
+
+pattern TermOp :: FunctionSymbol -> [ExprOf a] -> ExprOf a
+pattern TermOp op es = TermSymbol (SymbolMixfix op) es
+
+pattern TermConst :: Token -> ExprOf a
+pattern TermConst c = TermOp [Just c] []
+
+pattern TermPair :: ExprOf a -> ExprOf a -> ExprOf a
+pattern TermPair e1 e2 = TermOp PairSymbol [e1, e2]
+
+pattern Atomic :: Predicate -> [ExprOf a] -> ExprOf a
+pattern Atomic symbol args = TermSymbol (SymbolPredicate symbol) args
+
+
+pattern FormulaAdj :: ExprOf a -> LexicalPhrase -> [ExprOf a] -> ExprOf a
+pattern FormulaAdj e adj es = Atomic (PredicateAdj adj) (e:es)
+
+pattern FormulaVerb :: ExprOf a -> SgPl LexicalPhrase -> [ExprOf a] -> ExprOf a
+pattern FormulaVerb e verb es = Atomic (PredicateVerb verb) (e:es)
+
+pattern FormulaNoun :: ExprOf a -> SgPl LexicalPhrase -> [ExprOf a] -> ExprOf a
+pattern FormulaNoun e noun es = Atomic (PredicateNoun noun) (e:es)
+
+relationNoun :: Expr -> Formula
+relationNoun arg = FormulaNoun arg (unsafeReadPhraseSgPl "relation[/s]") []
+
+rightUniqueAdj :: Expr -> Formula
+rightUniqueAdj arg = FormulaAdj arg (unsafeReadPhrase "right-unique") []
+
+-- | Untyped quantification.
+pattern Forall, Exists :: Scope VarSymbol ExprOf a -> ExprOf a
+pattern Forall scope = Quantified Universally  scope
+pattern Exists scope = Quantified Existentially scope
+
+makeForall, makeExists :: Foldable t => t VarSymbol -> Formula -> Formula
+makeForall xs e = Quantified Universally (abstractVarSymbols xs e)
+makeExists xs e = Quantified Existentially (abstractVarSymbols xs e)
+
+instantiateSome :: NonEmpty VarSymbol -> Scope VarSymbol ExprOf VarSymbol -> Scope VarSymbol ExprOf VarSymbol
+instantiateSome xs scope = toScope (instantiateEither inst scope)
+    where
+        inst (Left x) | x `elem` xs = TermVar (F x)
+        inst (Left b) = TermVar (B b)
+        inst (Right fv) = TermVar (F fv)
+
+-- | Bind all free variables not occuring in the given set universally
+forallClosure :: Set VarSymbol -> Formula -> Formula
+forallClosure xs phi = if isClosed phi
+    then phi
+    else Quantified Universally (abstract isNamedVar phi)
+        where
+            isNamedVar :: VarSymbol -> Maybe VarSymbol
+            isNamedVar x = if x `Set.member` xs then Nothing else Just x
+
+freeVars :: ExprOf VarSymbol -> Set VarSymbol
+freeVars = Set.fromList . toList
+
+pattern And :: ExprOf a -> ExprOf a -> ExprOf a
+pattern And e1 e2 = Connected Conjunction e1 e2
+
+pattern Or :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Or e1 e2 = Connected Disjunction e1 e2
+
+pattern Implies :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Implies e1 e2 = Connected Implication e1 e2
+
+pattern Iff :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Iff e1 e2 = Connected Equivalence e1 e2
+
+pattern Xor :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Xor e1 e2 = Connected ExclusiveOr e1 e2
+
+
+pattern Bottom :: ExprOf a
+pattern Bottom = PropositionalConstant IsBottom
+
+pattern Top :: ExprOf a
+pattern Top = PropositionalConstant IsTop
+
+
+pattern Relation :: RelationSymbol -> [ExprOf a] -> ExprOf a
+pattern Relation rel es = Atomic (PredicateRelation rel) es
+
+-- | Membership.
+pattern IsElementOf :: ExprOf a -> ExprOf a -> ExprOf a
+pattern IsElementOf e1 e2 = Atomic (PredicateRelation (Command "in")) (e1 : [e2])
+
+-- | Membership.
+pattern IsNotElementOf :: ExprOf a -> ExprOf a -> ExprOf a
+pattern IsNotElementOf e1 e2 = Not (IsElementOf e1 e2)
+
+-- | Subset relation (non-strict).
+pattern IsSubsetOf :: ExprOf a -> ExprOf a -> ExprOf a
+pattern IsSubsetOf e1 e2 = Atomic (PredicateRelation (Command "subseteq")) (e1 : [e2])
+
+-- | Ordinal predicate.
+pattern IsOrd :: ExprOf a -> ExprOf a
+pattern IsOrd e1 = Atomic (PredicateNoun (SgPl [Just "ordinal"] [Just "ordinals"])) [e1]
+
+-- | Equality.
+pattern Equals :: ExprOf a -> ExprOf a -> ExprOf a
+pattern Equals e1 e2 = Atomic (PredicateRelation (Symbol "=")) (e1 : [e2])
+
+-- | Disequality.
+pattern NotEquals :: ExprOf a -> ExprOf a -> ExprOf a
+pattern NotEquals e1 e2 = Atomic (PredicateRelation (Command "neq")) (e1 : [e2])
+
+pattern EmptySet :: ExprOf a
+pattern EmptySet = TermSymbol (SymbolMixfix [Just (Command "emptyset")]) []
+
+makeConjunction :: [ExprOf a] -> ExprOf a
+makeConjunction = \case
+    [] -> Top
+    es -> List.foldl1' And es
+
+makeDisjunction :: [ExprOf a] -> ExprOf a
+makeDisjunction = \case
+    [] -> Bottom
+    es -> List.foldl1' Or es
+
+finiteSet :: NonEmpty (ExprOf a) -> ExprOf a
+finiteSet = foldr cons EmptySet
+    where
+        cons x y = TermSymbol (SymbolMixfix ConsSymbol) [x, y]
+
+isPositive :: ExprOf a -> Bool
+isPositive = \case
+    Not _ -> False
+    _ -> True
+
+dual :: ExprOf a -> ExprOf a
+dual = \case
+    Not f -> f
+    f -> Not f
+
+
+
+-- | Local assumptions.
+data Asm
+    = Asm Formula
+    | AsmStruct VarSymbol StructPhrase
+
+
+deriving instance Show Asm
+deriving instance Eq   Asm
+deriving instance Ord  Asm
+
+data StructAsm
+    = StructAsm VarSymbol StructPhrase
+
+
+
+data Axiom = Axiom [Asm] Formula
+
+deriving instance Show Axiom
+deriving instance Eq   Axiom
+deriving instance Ord  Axiom
+
+
+data Lemma = Lemma [Asm] Formula
+
+deriving instance Show Lemma
+deriving instance Eq   Lemma
+deriving instance Ord  Lemma
+
+
+data Defn
+    = DefnPredicate [Asm] Predicate (NonEmpty VarSymbol) Formula
+    | DefnFun [Asm] (SgPl LexicalPhrase) [VarSymbol] Term
+    | DefnOp FunctionSymbol [VarSymbol] Term
+
+deriving instance Show Defn
+deriving instance Eq   Defn
+deriving instance Ord  Defn
+
+data Inductive = Inductive
+    { inductiveSymbol :: FunctionSymbol
+    , inductiveParams :: [VarSymbol]
+    , inductiveDomain :: Expr
+    , inductiveIntros :: NonEmpty IntroRule
+    }
+    deriving (Show, Eq, Ord)
+
+data IntroRule = IntroRule
+    { introConditions :: [Formula] -- The inductively defined set may only appear as an argument of monotone operations on the rhs.
+    , introResult :: Formula -- TODO Refine.
+    }
+    deriving (Show, Eq, Ord)
+
+
+data Proof
+    = Omitted
+    -- ^ Ends a proof without further verification.
+    -- This results in a “gap” in the formalization.
+    | Qed Justification
+    -- ^ Ends of a proof, leaving automation to discharge the current goal using the given justification.
+    | ByContradiction Proof
+    -- ^ Take the dual of the current goal as an assumption and
+    -- set the goal to absurdity.
+    | BySetInduction (Maybe Term) Proof
+    -- ^ ∈-induction.
+    | ByOrdInduction Proof
+    -- ^ Transfinite induction for ordinals.
+    | Assume Formula Proof
+    -- ^ Simplify goals that are implications or disjunctions.
+    | Fix (NonEmpty VarSymbol) Formula Proof
+    -- ^ Simplify universal goals (with an optional bound or such that statement)
+    | Take (NonEmpty VarSymbol) Formula Justification Proof
+    -- ^ Use existential assumptions.
+    | Suffices Formula Justification Proof
+    | ByCase [Case]
+    -- ^ Proof by case. Disjunction of the case hypotheses 'Case'
+    -- must hold for this step to succeed. Each case starts a subproof,
+    -- keeping the same goal but adding the case hypothesis as an assumption.
+    -- Often this will be a classical split between /@P@/ and /@not P@/, in
+    -- which case the proof that /@P or not P@/ holds is easy.
+    --
+    | Have Formula Justification Proof
+    -- ^ An affirmation, e.g.: /@We have \<stmt\> by \<ref\>@/.
+    --
+    | Calc Calc Proof
+    | Subclaim Formula Proof Proof
+    -- ^ A claim is a sublemma with its own proof:
+    --
+    -- /@Show \<goal stmt\>. \<steps\>. \<continue other proof\>.@/
+    --
+    -- A successful first proof adds the claimed formula as an assumption
+    -- for the remaining proof.
+    --
+    | Define VarSymbol Term Proof
+    | DefineFunction VarSymbol VarSymbol Term Term Proof
+
+deriving instance Show Proof
+deriving instance Eq   Proof
+deriving instance Ord  Proof
+
+
+
+-- | A case of a case split.
+data Case = Case
+    { caseOf :: Formula
+    , caseProof :: Proof
+    }
+
+deriving instance Show Case
+deriving instance Eq   Case
+deriving instance Ord  Case
+
+-- | See 'Syntax.Abstract.Calc'.
+data Calc
+    = Equation Term (NonEmpty (Term, Justification))
+    | Biconditionals Term (NonEmpty (Term, Justification))
+
+deriving instance Show Calc
+deriving instance Eq   Calc
+deriving instance Ord  Calc
+
+calcResult :: Calc -> ExprOf VarSymbol
+calcResult = \case
+    Equation e eqns -> e `Equals` fst (NonEmpty.last eqns)
+    Biconditionals phi phis -> phi `Iff` fst (NonEmpty.last phis)
+
+calculation :: Calc -> [(ExprOf VarSymbol, Justification)]
+calculation = \case
+    Equation e1 eqns@((e2, jst) :| _) -> (e1 `Equals` e2, jst) : collectEquations (toList eqns)
+    Biconditionals p1 ps@((p2, jst) :| _) -> (p1 `Iff` p2, jst) : collectBiconditionals (toList ps)
+
+
+collectEquations :: [(ExprOf a, b)] -> [(ExprOf a, b)]
+collectEquations = \case
+    (e1, _) : eqns'@((e2, jst) : _) -> (e1 `Equals` e2, jst) : collectEquations eqns'
+    _ -> []
+
+collectBiconditionals :: [(ExprOf a, b)] -> [(ExprOf a, b)]
+collectBiconditionals = \case
+    (p1, _) : ps@((p2, jst) : _) -> (p1 `Iff` p2, jst) : collectEquations ps
+    _ -> []
+
+
+newtype Datatype
+    = DatatypeFin (NonEmpty Text)
+    deriving (Show, Eq, Ord)
+
+
+data Signature
+    = SignaturePredicate Predicate (NonEmpty VarSymbol)
+    | SignatureFormula Formula -- TODO: Reconsider, this is pretty lossy.
+
+deriving instance Show Signature
+deriving instance Eq   Signature
+deriving instance Ord  Signature
+
+data StructDefn = StructDefn
+    { structPhrase :: StructPhrase
+    -- ^ The noun phrase naming the structure, e.g.: @partial order@ or @abelian group@.
+    , structParents :: Set StructPhrase
+    , structDefnLabel :: VarSymbol
+    , structDefnFixes :: Set StructSymbol
+    -- ^ List of commands representing operations,
+    -- e.g.: @\\contained@ or @\\inv@. These are used as default operation names
+    -- in instantiations such as @Let $G$ be a group@.
+    -- The commands should be set up to handle an optional struct label
+    -- which would typically be rendered as a sub- or superscript, e.g.:
+    -- @\\contained[A]@ could render as ”⊑ᴬ“.
+    --    --
+    , structDefnAssumes :: [(Marker, Formula)]
+    -- ^ The assumption or axioms of the structure.
+    -- To be instantiate with the @structFixes@ of a given structure.
+    }
+
+deriving instance Show StructDefn
+deriving instance Eq   StructDefn
+deriving instance Ord  StructDefn
+
+
+data Abbreviation
+    = Abbreviation Symbol (Scope Int ExprOf Void)
+    deriving (Show, Eq, Ord)
+
+data Block
+    = BlockAxiom SourcePos Marker Axiom
+    | BlockLemma SourcePos Marker Lemma
+    | BlockProof SourcePos Proof
+    | BlockDefn SourcePos Marker Defn
+    | BlockAbbr SourcePos Marker Abbreviation
+    | BlockStruct SourcePos Marker StructDefn
+    | BlockInductive SourcePos Marker Inductive
+    | BlockSig SourcePos [Asm] Signature
+    deriving (Show, Eq, Ord)
+
+
+data Task = Task
+    { taskDirectness :: Directness
+    , taskHypotheses :: [(Marker, Formula)] -- ^ No guarantees on order.
+    , taskConjectureLabel :: Marker
+    , taskConjecture :: Formula
+    } deriving (Show, Eq, Generic, Hashable)
+
+
+-- | Indicates whether a given proof is direct or indirect.
+-- An indirect proof (i.e. a proof by contradiction) may
+-- cause an ATP to emit a warning about contradictory axioms.
+-- When we know that the proof is indirect, we want to ignore
+-- this warning. For relevance filtering we also want to know
+-- what our actual goal is, so we keep the original conjecture.
+data Directness
+    = Indirect Formula -- ^ The former conjecture.
+    | Direct
+    deriving (Show, Eq, Generic, Hashable)
+
+isIndirect :: Task -> Bool
+isIndirect task = case taskDirectness task of
+    Indirect _ -> True
+    Direct -> False
+
+
+-- | Boolean contraction of a task.
+contractionTask :: Task -> Task
+contractionTask task = task
+    { taskHypotheses = mapMaybe contract (taskHypotheses task)
+    , taskConjecture = contraction (taskConjecture task)
+    }
+
+contract :: (Marker, Formula) -> Maybe (Marker, Formula)
+contract (m, phi) = case contraction phi of
+    Top -> Nothing
+    phi' -> Just (m, phi')
+
+
+
+-- | Full boolean contraction.
+contraction :: ExprOf a -> ExprOf a
+contraction = \case
+    Connected conn f1 f2  -> atomicContraction (Connected conn (contraction f1) (contraction f2))
+    Quantified quant scope -> atomicContraction (Quantified quant (hoistScope contraction scope))
+    Not f -> Not (contraction f)
+    f -> f
+
+
+-- | Atomic boolean contraction.
+atomicContraction :: ExprOf a -> ExprOf a
+atomicContraction = \case
+    Top    `Iff` f      -> f
+    Bottom `Iff` f      -> Not f
+    f      `Iff` Top    -> f
+    f      `Iff` Bottom -> Not f
+
+    Top    `Implies` f      -> f
+    Bottom `Implies` _      -> Top
+    _      `Implies` Top    -> Top
+    f      `Implies` Bottom -> Not f
+
+    Top    `And` f      -> f
+    Bottom `And` _      -> Bottom
+    f      `And` Top    -> f
+    _      `And` Bottom -> Bottom
+
+    phi@(Quantified _quant scope) -> case unscope scope of
+        Top -> Top
+        Bottom -> Bottom
+        _ -> phi
+
+    Not Top    -> Bottom
+    Not Bottom -> Top
+
+    f -> f
diff --git a/source/Syntax/LexicalPhrase.hs b/source/Syntax/LexicalPhrase.hs
new file mode 100644
index 0000000..1743255
--- /dev/null
+++ b/source/Syntax/LexicalPhrase.hs
@@ -0,0 +1,93 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Syntax.LexicalPhrase where
+
+
+import Base
+import Syntax.Token (Token(..))
+
+import Data.Char (isAlpha)
+import Data.Text qualified as Text
+import Text.Earley.Mixfix (Holey)
+import Text.Earley (Grammar, Prod, (<?>), fullParses, parser, rule, token, satisfy)
+
+
+
+-- | 'LexicalPhrase's should be nonempty lists with at least one proper word token.
+-- Hyphens and quotes in words are treated as letters.
+-- Thus /@manifold-with-boundary@/ is a singleton lexical phrase (one word).
+--
+type LexicalPhrase = Holey Token
+
+-- MAYBE Add this instance by making LexicalPhrase a proper Type?
+-- Until then we can use the default instance for lists of prettyprintable things.
+--
+-- instance Pretty LexicalPhrase where
+--    pretty components = hsep (prettyComponent <$> components)
+--       where
+--          prettyComponent = \case
+--             Nothing -> "_"
+--             Just tok -> pretty tok
+
+
+
+-- | Split data by grammatical number (singular/plural).
+-- The 'Eq' and 'Ord' instances only consider the singular
+-- form so that we can prefer known irregular plurals over
+-- guessed irregular plurals when inserting items into
+-- the 'Lexicon'.
+data SgPl a
+    = SgPl {sg :: a, pl :: a}
+    deriving (Show, Functor, Generic, Hashable)
+
+instance Eq a => Eq (SgPl a) where (==) = (==) `on` sg
+instance Ord a => Ord (SgPl a) where compare = compare `on` sg
+
+
+unsafeReadPhrase :: String -> LexicalPhrase
+unsafeReadPhrase spec = case fst (fullParses (parser lexicalPhraseSpec) spec) of
+    pat : _ -> pat
+    _ -> error "unsafeReadPhrase failed"
+
+unsafeReadPhraseSgPl :: String -> SgPl LexicalPhrase
+unsafeReadPhraseSgPl spec = case fst (fullParses (parser lexicalPhraseSpecSgPl) spec) of
+    pat : _ -> pat
+    _ -> error "unsafeReadPhraseSgPl failed"
+
+
+lexicalPhraseSpec :: Grammar r (Prod r String Char LexicalPhrase)
+lexicalPhraseSpec = do
+    hole     <- rule $ Nothing <$ token '?' <?> "hole"
+    word     <- rule $ Just <$> many (satisfy (\c -> isAlpha c || c == '-'))
+    space    <- rule $ Just . (:[]) <$> token ' '
+    segment  <- rule $ hole <|> word
+    rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
+    where
+        makePhrase :: [Maybe String] -> LexicalPhrase
+        makePhrase pat = fmap makeWord pat
+
+
+lexicalPhraseSpecSgPl :: Grammar r (Prod r String Char (SgPl LexicalPhrase))
+lexicalPhraseSpecSgPl = do
+    space <- rule $ Just . (:[]) <$> token ' '
+    hole  <- rule $ (Nothing, Nothing) <$ token '?'<?> "hole"
+
+    word <- rule (many (satisfy isAlpha) <?> "word")
+    wordSgPl <- rule $ (,) <$> (token '[' *> word) <* token '/' <*> word <* token ']'
+    complexWord <- rule $ (\(a,b) -> (Just a, Just b)) . fuse <$>
+        many ((<>) <$> (dup <$> word) <*> wordSgPl) <?> "word"
+    segment  <- rule (hole <|> (dup . Just <$> word) <|> complexWord )
+    rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
+    where
+        dup x = (x,x)
+        fuse = \case
+            (a, b) : (c, d) : rest -> fuse ((a <> c, b <> d) : rest)
+            [(a, b)] -> (a, b)
+            _ -> error "Syntax.Abstract.fuse"
+
+        makePhrase :: [(Maybe String, Maybe String)] -> SgPl LexicalPhrase
+        makePhrase = (\(patSg, patPl) -> SgPl (fmap makeWord patSg) (fmap makeWord patPl)) . unzip
+
+makeWord :: Maybe String -> Maybe Token
+makeWord = fmap (Word . Text.pack)
diff --git a/source/Syntax/Lexicon.hs b/source/Syntax/Lexicon.hs
new file mode 100644
index 0000000..805b875
--- /dev/null
+++ b/source/Syntax/Lexicon.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | The 'Lexicon' describes the part of the grammar that extensible/dynamic.
+--
+-- The items of the 'Lexicon' are organized by their meaning and their
+-- syntactic behaviour. They are typically represented as some kind of
+-- pattern data which is then used to generate various production rules
+-- for the concrete grammar. This representation makes inspection and
+-- extension easier.
+--
+
+module Syntax.Lexicon
+    ( module Syntax.Lexicon
+    , pattern ConsSymbol
+    , pattern PairSymbol
+    , pattern CarrierSymbol
+    , pattern ApplySymbol
+    , pattern DomSymbol
+    ) where
+
+
+import Base
+import Syntax.Abstract
+
+import Data.List qualified as List
+import Data.Sequence qualified as Seq
+import Data.HashSet qualified as Set
+import Data.HashMap.Strict qualified as HM
+import Data.Text qualified as Text
+import Text.Earley.Mixfix (Holey, Associativity(..))
+
+
+data Lexicon = Lexicon
+    { lexiconMixfix             :: Seq (HashMap (Holey Token) (Associativity, Marker))
+    , lexiconConnectives        :: [[(Holey Token, Associativity)]]
+    , lexiconPrefixPredicates   :: LexicalItems PrefixPredicate
+    , lexiconStructFun          :: LexicalItems StructSymbol
+    , lexiconRelationSymbols    :: LexicalItems RelationSymbol
+    , lexiconVerbs              :: LexicalItems (SgPl LexicalPhrase)
+    , lexiconAdjLs              :: LexicalItems LexicalPhrase
+    , lexiconAdjRs              :: LexicalItems LexicalPhrase
+    , lexiconNouns              :: LexicalItems (SgPl LexicalPhrase)
+    , lexiconStructNouns        :: LexicalItems (SgPl LexicalPhrase)
+    , lexiconFuns               :: LexicalItems (SgPl LexicalPhrase)
+    } deriving (Show, Eq)
+
+-- Projection returning the union of both left and right attributes.
+--
+lexiconAdjs :: Lexicon -> HashMap LexicalPhrase Marker
+lexiconAdjs lexicon = lexiconAdjLs lexicon <> lexiconAdjRs lexicon
+
+lookupOp :: FunctionSymbol -> Seq (HashMap FunctionSymbol (assoc, Marker)) -> Either String Marker
+lookupOp f ops = case snd <$> firstJust (HM.lookup f) ops of
+    Just m -> Right m
+    Nothing -> Left (show f)
+
+lookupLexicalItem :: (Hashable a, Show a) => a -> LexicalItems a -> Either String Marker
+lookupLexicalItem a items = case HM.lookup a items of
+    Just m -> Right m
+    Nothing -> Left (show a)
+
+type LexicalItems a = HashMap a Marker
+
+builtins :: Lexicon
+builtins = Lexicon
+    { lexiconMixfix             = builtinMixfix
+    , lexiconPrefixPredicates   = builtinPrefixPredicates
+    , lexiconStructFun          = builtinStructOps
+    , lexiconConnectives        = builtinConnectives
+    , lexiconRelationSymbols    = builtinRelationSymbols
+    , lexiconAdjLs              = mempty
+    , lexiconAdjRs              = builtinAdjRs
+    , lexiconVerbs              = builtinVerbs
+    , lexiconNouns              = builtinNouns
+    , lexiconStructNouns        = builtinStructNouns
+    , lexiconFuns               = mempty
+    }
+
+-- INVARIANT: 10 precedence levels for now.
+builtinMixfix :: Seq (HashMap FunctionSymbol (Associativity, Marker))
+builtinMixfix = Seq.fromList $ (HM.fromList <$>)
+    [ []
+    , [binOp (Symbol "+") LeftAssoc "add", binOp (Command "union") LeftAssoc "union", binOp (Command "monus") LeftAssoc "monus"]
+    , [binOp (Command "relcomp") LeftAssoc "relcomp"]
+    , [binOp (Command "circ") LeftAssoc "circ"]
+    , [binOp (Command "mul") LeftAssoc "mul", binOp (Command "inter") LeftAssoc "inter"]
+    , [binOp (Command "setminus") LeftAssoc "setminus"]
+    , [binOp (Command "times") RightAssoc "times"]
+    , []
+    , prefixOps
+    , builtinIdentifiers
+    ]
+    where
+        builtinIdentifiers :: [(FunctionSymbol, (Associativity, Marker))]
+        builtinIdentifiers = identifier <$>
+            [ "emptyset"
+            , "naturals"
+            , "rationals"
+            , "reals"
+            , "unit"
+            , "zero"
+            ]
+
+prefixOps :: [(FunctionSymbol, (Associativity, Marker))]
+prefixOps =
+    [ ([Just (Command "unions"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "unions"))
+    , ([Just (Command "cumul"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "cumul"))
+    , ([Just (Command "fst"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "fst"))
+    , ([Just (Command "snd"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "snd"))
+    , ([Just (Command "pow"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "pow"))
+    , (ConsSymbol, (NonAssoc, "cons"))
+    , (PairSymbol, (NonAssoc, "pair"))
+    -- NOTE Is now defined and hence no longer necessary , (ApplySymbol, (NonAssoc, "apply"))
+    ]
+
+
+builtinStructOps :: LexicalItems StructSymbol
+builtinStructOps = HM.fromList
+    [ (CarrierSymbol, "carrier")
+    ]
+
+identifier :: Text -> (Holey Token, (Associativity, Marker))
+identifier cmd = ([Just (Command cmd)], (NonAssoc, Marker cmd))
+
+
+builtinRelationSymbols :: LexicalItems RelationSymbol
+builtinRelationSymbols = HM.fromList
+    [ (Symbol "=", "eq")
+    , (Command "neq", "neq")
+    , (Command "in", "elem")
+    , (Command "notin", "notelem") -- Alternative to @\not\in@.
+    ]
+
+builtinPrefixPredicates :: LexicalItems PrefixPredicate
+builtinPrefixPredicates = HM.fromList
+    [ (PrefixPredicate "Cong" 4, "cong")
+    , (PrefixPredicate "Betw" 3, "betw")
+    ]
+
+
+builtinConnectives :: [[(Holey Token, Associativity)]]
+builtinConnectives =
+    [ [binOp' (Command "iff") NonAssoc]
+    , [binOp' (Command "implies") RightAssoc]
+    , [binOp' (Command "lor") LeftAssoc]
+    , [binOp' (Command "land") LeftAssoc]
+    , [([Just (Command "lnot"), Nothing], NonAssoc)]
+    ]
+
+
+binOp :: Token -> Associativity -> Marker -> (Holey Token, (Associativity, Marker))
+binOp tok assoc m = ([Nothing, Just tok, Nothing], (assoc, m))
+
+binOp' :: Token -> Associativity -> (Holey Token, Associativity)
+binOp' tok assoc = ([Nothing, Just tok, Nothing], assoc)
+
+builtinAdjRs :: LexicalItems LexicalPhrase
+builtinAdjRs = HM.fromList
+    [ (unsafeReadPhrase "equal to ?", "eq")
+    ]
+
+builtinVerbs :: LexicalItems (SgPl LexicalPhrase)
+builtinVerbs = HM.fromList
+    [ (unsafeReadPhraseSgPl "equal[s/] ?", "eq")
+    ]
+
+
+-- Some of these do/should correspond to mathlib structures,
+-- e.g.: lattice, complete lattice, ring, etc.
+--
+builtinNouns :: LexicalItems (SgPl LexicalPhrase)
+builtinNouns = HM.mapKeys unsafeReadPhraseSgPl (HM.fromList
+    -- Nullary
+    [ ("set[/s]", "set")
+    , ("point[/s]", "point")
+    , ("element[/s] of ?", "elem")
+    ])
+
+_Onesorted :: SgPl LexicalPhrase
+_Onesorted = unsafeReadPhraseSgPl "onesorted structure[/s]"
+
+builtinStructNouns :: LexicalItems (SgPl LexicalPhrase)
+builtinStructNouns = HM.singleton _Onesorted "onesorted_structure"
+
+
+-- | Naïve splitting of lexical phrases to insert a variable slot for names in noun phrases,
+-- as in /@there exists a linear form $h$ on $E$@/, where the underlying pattern is
+-- /@linear form on ?@/. In this case we would get:
+--
+-- > splitOnVariableSlot (sg (unsafeReadPhraseSgPl "linear form[/s] on ?"))
+-- > ==
+-- > (unsafeReadPhrase "linear form", unsafeReadPhrase "on ?")
+--
+splitOnVariableSlot :: LexicalPhrase -> (LexicalPhrase, LexicalPhrase)
+splitOnVariableSlot phrase = case prepositionIndices <> nonhyphenatedSlotIndices of
+    [] -> (phrase, []) -- Place variable slot at the end.
+    is -> List.splitAt (minimum is) phrase
+    where
+        prepositionIndices, slotIndices, nonhyphenatedSlotIndices :: [Int] -- Ascending.
+        prepositionIndices = List.findIndices isPreposition phrase
+        slotIndices = List.findIndices isNothing phrase
+        nonhyphenatedSlotIndices = [i | i <- slotIndices, noHyphen (nth (i + 1) phrase)]
+
+        isPreposition :: Maybe Token -> Bool
+        isPreposition = \case
+            Just (Word w) -> w `Set.member` prepositions
+            _ -> False
+
+        noHyphen :: Maybe (Maybe Token) -> Bool
+        noHyphen = \case
+            Just (Just (Word w)) -> Text.head w /= '-'
+            -- If we arrive here, either the pattern is over (`Nothing`) or the next
+            -- part of the pattern is not a word that starts with a hyphen.
+            _ -> True
+
+
+-- Preposition are a closed class, but this list is not yet exhaustive.
+-- It can and should be extended when needed. The following list is a
+-- selection of the prepositions found at
+-- https://en.wikipedia.org/wiki/List_of_English_prepositions.
+--
+prepositions :: HashSet Text
+prepositions = Set.fromList
+    [ "about"
+    , "above"
+    , "across"
+    , "after"
+    , "against"
+    , "along", "alongside"
+    , "amid", "amidst"
+    , "among"
+    , "around"
+    , "as"
+    , "at"
+    , "atop"
+    , "before"
+    , "behind"
+    , "below"
+    , "beneath"
+    , "beside", "besides"
+    , "between"
+    , "beyond"
+    , "but"
+    , "by"
+    , "except"
+    , "for"
+    , "from"
+    , "in", "inside", "into"
+    , "like"
+    , "modulo", "mod"
+    , "near"
+    , "next"
+    , "of"
+    , "off"
+    , "on"
+    , "onto"
+    , "opposite"
+    , "out"
+    , "over"
+    , "past"
+    , "per"
+    , "sans"
+    , "till"
+    , "to"
+    , "under"
+    , "underneath"
+    , "unlike"
+    , "unto"
+    , "up", "upon"
+    , "versus"
+    , "via"
+    , "with"
+    , "within"
+    , "without"
+    ]
diff --git a/source/Syntax/Token.hs b/source/Syntax/Token.hs
new file mode 100644
index 0000000..7c8606c
--- /dev/null
+++ b/source/Syntax/Token.hs
@@ -0,0 +1,438 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- |
+-- This module defines the lexer and its associated data types.
+-- The lexer takes `Text` as input and produces a stream of tokens
+-- annotated with positional information. This information is bundled
+-- together with the original raw input for producing error messages.
+--
+-- The lexer perfoms some normalizations to make describing the grammar easier.
+-- Words outside of math environments are case-folded. Some commands are analysed
+-- as variable tokens and are equivalent to their respective unicode variants
+-- (α, β, γ, ..., 𝔸, 𝔹, ℂ, ...). Similarly, @\\begin{...}@ and @\\end{...}@ commands
+-- are each parsed as single tokens.
+--
+module Syntax.Token
+    ( Token(..)
+    , tokToString
+    , tokToText
+    , TokStream(..)
+    , Located(..)
+    , runLexer
+    ) where
+
+
+import Base hiding (many)
+
+import Control.Monad.Combinators
+import Control.Monad.State.Strict
+import Data.Char (isAsciiLower)
+import Data.List.NonEmpty qualified as NonEmpty
+import Data.Text qualified as Text
+import Prettyprinter (Pretty(..))
+import Text.Megaparsec hiding (Token, Label, label)
+import Text.Megaparsec.Char qualified as Char
+import Text.Megaparsec.Char.Lexer qualified as Lexer
+
+
+runLexer :: String -> Text -> Either (ParseErrorBundle Text Void) [Located Token]
+runLexer file raw = runParser (evalStateT toks initLexerState) file raw
+
+
+type Lexer = StateT LexerState (Parsec Void Text)
+
+
+data LexerState = LexerState
+    { textNesting :: !Int
+    -- ^ Represents nesting of braces inside of the @\text{...}@
+    -- command. When we encounter @\text@ the token mode switches
+    -- to text tokens. In order to switch back to math mode correctly
+    -- we need to count the braces.
+    , mode :: !Mode
+    } deriving (Show, Eq)
+
+initLexerState :: LexerState
+initLexerState = LexerState 0 TextMode
+
+incrNesting, decrNesting :: LexerState -> LexerState
+incrNesting (LexerState n m) = LexerState (succ n) m
+decrNesting (LexerState n m) = LexerState (pred n) m
+
+data Mode = TextMode | MathMode deriving (Show, Eq)
+
+isTextMode, isMathMode :: Lexer Bool
+isTextMode = do
+    m <- gets mode
+    pure (m == TextMode)
+isMathMode = do
+    m <- gets mode
+    pure (m == MathMode)
+
+setTextMode, setMathMode :: Lexer ()
+setTextMode = do
+    st <- get
+    put st{mode = TextMode}
+setMathMode = do
+    st <- get
+    put st{mode = MathMode}
+
+-- |
+-- A token stream as input stream for a parser. Contains the raw input
+-- before tokenization as 'Text' for showing error messages.
+--
+data TokStream = TokStream
+    { rawInput :: !Text
+    , unTokStream :: ![Located Token]
+    } deriving (Show, Eq)
+
+instance Semigroup TokStream where
+    TokStream raw1 toks1 <> TokStream raw2 toks2 = TokStream (raw1 <> raw2) (toks1 <> toks2)
+
+instance Monoid TokStream where
+    mempty = TokStream mempty mempty
+
+-- | A LaTeX token.
+-- Invisible delimiters 'InvisibleBraceL' and 'InvisibleBraceR' are
+-- unescaped braces used for grouping in TEX (@{@),
+-- visibles braces are escaped braces (@\\{@).
+data Token
+    = Word !Text
+    | Variable !Text
+    | Symbol !Text
+    | Integer !Int
+    | Command !Text
+    | Label Text -- ^ A /@\\label{...}@/ command (case-sensitive).
+    | Ref (NonEmpty Text) -- ^ A /@\\ref{...}@/ command (case-sensitive).
+    | BeginEnv !Text
+    | EndEnv !Text
+    | ParenL | ParenR
+    | BracketL | BracketR
+    | VisibleBraceL | VisibleBraceR
+    | InvisibleBraceL | InvisibleBraceR
+    deriving (Show, Eq, Ord, Generic, Hashable)
+
+instance IsString Token where
+    fromString w = Word (Text.pack w)
+
+tokToText :: Token -> Text
+tokToText = \case
+    Word w -> w
+    Variable v -> v
+    Symbol s -> s
+    Integer n -> Text.pack (show n)
+    Command cmd -> Text.cons '\\' cmd
+    Label m -> "\\label{" <> m <> "}"
+    Ref ms -> "\\ref{" <> Text.intercalate ", " (toList ms) <> "}"
+    BeginEnv "math" -> "$"
+    EndEnv "math" -> "$"
+    BeginEnv env -> "\\begin{" <> env <> "}"
+    EndEnv env -> "\\end{" <> env <> "}"
+    ParenL -> "("
+    ParenR -> ")"
+    BracketL -> "["
+    BracketR -> "]"
+    VisibleBraceL -> "\\{"
+    VisibleBraceR -> "\\}"
+    InvisibleBraceL -> "{"
+    InvisibleBraceR -> "}"
+
+tokToString :: Token -> String
+tokToString = Text.unpack . tokToText
+
+instance Pretty Token where
+    pretty = \case
+        Word w -> pretty w
+        Variable v -> pretty v
+        Symbol s -> pretty s
+        Integer n -> pretty n
+        Command cmd -> "\\" <> pretty cmd
+        Label m -> "\\label{" <> pretty m <> "}"
+        Ref m -> "\\ref{" <> pretty m <> "}"
+        BeginEnv env -> "\\begin{" <> pretty env <> "}"
+        EndEnv env -> "\\end{" <> pretty env <> "}"
+        ParenL -> "("
+        ParenR -> ")"
+        BracketL -> "["
+        BracketR -> "]"
+        VisibleBraceL -> "\\{"
+        VisibleBraceR -> "\\}"
+        InvisibleBraceL -> "{"
+        InvisibleBraceR -> "}"
+
+
+data Located a = Located
+    { startPos :: !SourcePos
+    , unLocated :: !a
+    } deriving (Show)
+
+instance Eq a  => Eq  (Located a) where (==) = (==) `on` unLocated
+instance Ord a => Ord (Located a) where compare = compare `on` unLocated
+
+
+-- | Parses tokens, switching tokenizing mode when encountering math environments.
+toks :: Lexer [Located Token]
+toks = whitespace *> goNormal id <* eof
+    where
+        goNormal f = do
+            r <- optional tok
+            case r of
+                Nothing -> pure (f [])
+                Just t@(Located _ (BeginEnv "math")) -> goMath (f . (t:))
+                Just t@(Located _ (BeginEnv "align*")) -> goMath (f . (t:))
+                Just t -> goNormal (f . (t:))
+        goText f = do
+            r <- optional textToken
+            case r of
+                Nothing -> pure (f [])
+                Just t@(Located _ (BeginEnv "math")) -> goMathInText (f . (t:))
+                Just t@(Located _ (EndEnv "text")) -> goMath (f . (t:))
+                Just t@(Located _ (EndEnv "explanation")) -> goMath (f . (t:))
+                Just t -> goText (f . (t:))
+        goMath f = do
+            r <- optional mathToken
+            case r of
+                Nothing -> pure (f [])
+                Just t@(Located _ (EndEnv "math")) -> goNormal (f . (t:))
+                Just t@(Located _ (EndEnv "align*")) -> goNormal (f . (t:))
+                Just t@(Located _ (BeginEnv "text")) -> goText (f . (t:))
+                Just t@(Located _ (BeginEnv "explanation")) -> goText (f . (t:))
+                Just t -> goMath (f . (t:))
+        goMathInText f = do
+            r <- optional mathToken
+            case r of
+                Nothing -> pure (f [])
+                Just t@(Located _ (EndEnv "math")) -> goText (f . (t:))
+                Just t@(Located _ (BeginEnv "text")) -> goText (f . (t:))
+                Just t -> goMathInText (f . (t:))
+{-# INLINE toks #-}
+
+-- | Parses a single normal mode token.
+tok :: Lexer (Located Token)
+tok =
+    word <|> var <|> symbol <|> mathBegin <|> alignBegin <|> begin <|> end <|> opening <|> closing <|> label <|> ref <|> command
+
+-- | Parses a single math mode token.
+mathToken :: Lexer (Located Token)
+mathToken =
+    var <|> symbol <|> number <|> begin <|> alignEnd <|> end <|> opening <|> closing <|> beginText <|> beginExplanation <|> mathEnd <|> command
+
+beginText :: Lexer (Located Token)
+beginText = lexeme do
+    Char.string "\\text{"
+    setTextMode
+    pure (BeginEnv "text")
+
+-- | Same as text modulo spacing, so we treat it synonymously
+beginExplanation :: Lexer (Located Token)
+beginExplanation = lexeme do
+    Char.string "\\explanation{"
+    setTextMode
+    pure (BeginEnv "text")
+
+-- | Normal mode embedded into math mode via @\text{...}@.
+textToken :: Lexer (Located Token)
+textToken = word <|> symbol <|> begin <|> end <|> textEnd <|> mathBegin <|> alignBegin <|> opening' <|> closing' <|> ref <|> command
+    where
+        textEnd = lexeme do
+            0 <- gets textNesting -- Otherwise fail.
+            Char.char '}'
+            setMathMode
+            pure (EndEnv "text")
+
+        opening' = lexeme (brace <|> group <|> paren <|> bracket)
+            where
+                brace = VisibleBraceL <$ lexeme (Char.string "\\{")
+                group = InvisibleBraceL <$ lexeme (Char.char '{') <* modify' incrNesting
+                paren = ParenL <$ lexeme (Char.char '(')
+                bracket = BracketL <$ lexeme (Char.char '[')
+
+        closing' = lexeme (brace <|> group <|> paren <|> bracket)
+            where
+                brace = VisibleBraceR <$ lexeme (Char.string "\\}")
+                group = InvisibleBraceR <$ lexeme (Char.char '}') <* modify' decrNesting
+                paren = ParenR <$ lexeme (Char.char ')')
+                bracket = BracketR <$ lexeme (Char.char ']')
+
+
+-- | Parses a single begin math token.
+mathBegin :: Lexer (Located Token)
+mathBegin = guardM isTextMode *> lexeme do
+    Char.string "\\(" <|> Char.string "\\[" <|> Char.string "$"
+    setMathMode
+    pure (BeginEnv "math")
+
+alignBegin :: Lexer (Located Token)
+alignBegin = guardM isTextMode *> lexeme do
+    Char.string "\\begin{align*}"
+    setMathMode
+    pure (BeginEnv "align*")
+
+
+-- | Parses a single end math token.
+mathEnd :: Lexer (Located Token)
+mathEnd = guardM isMathMode *> lexeme do
+    Char.string "\\)" <|> Char.string "\\]" <|> Char.string "$"
+    setTextMode
+    pure (EndEnv "math")
+
+alignEnd :: Lexer (Located Token)
+alignEnd = guardM isMathMode *> lexeme do
+    Char.string "\\end{align*}"
+    setTextMode
+    pure (EndEnv "align*")
+
+
+-- | Parses a word. Words are returned casefolded, since we want to ignore their case later on.
+word :: Lexer (Located Token)
+word = guardM isTextMode *> lexeme do
+    w <- some (Char.letterChar <|> Char.char '\'' <|> Char.char '-')
+    let t = Word (Text.toCaseFold (Text.pack w))
+    pure t
+
+number :: Lexer (Located Token)
+number = lexeme $ Integer <$> Lexer.decimal
+
+
+var :: Lexer (Located Token)
+var = guardM isMathMode *> lexeme (fmap Variable var')
+    where
+    var' = do
+        alphabeticPart <- letter <|> bb <|> greek
+        variationPart  <- subscriptNumber <|> ticked <|> pure ""
+        pure (alphabeticPart <> variationPart)
+
+    subscriptNumber :: Lexer Text
+    subscriptNumber = do
+        Char.char '_'
+        n <- some Char.digitChar
+        pure (Text.pack n)
+
+    -- Temporary hack to fit the TPTP format.
+    ticked :: Lexer Text
+    ticked = do
+        ticks <- some $ Char.char '\''
+        let ticks' = "prime" <$ ticks :: [Text]
+        pure (Text.concat ticks')
+
+    letter :: Lexer Text
+    letter = fmap Text.singleton Char.letterChar
+
+    greek :: Lexer Text
+    greek = try do
+        Char.char '\\'
+        l <- symbolParser greeks
+        notFollowedBy Char.letterChar
+        pure l
+
+    greeks :: [Text]
+    greeks =
+        [ "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta"
+        , "iota", "kappa", "lambda", "mu", "nu", "xi", "pi", "rho", "sigma"
+        , "tau", "upsilon", "phi", "chi", "psi", "omega"
+        , "Gamma", "Delta", "Theta", "Lambda", "Xi", "Pi", "Sigma", "Upsilon"
+        , "Phi", "Psi", "Omega"
+        ]
+
+    bb :: Lexer Text
+    bb = do
+        Char.string "\\mathbb{"
+        l <-  symbolParser bbs
+        Char.char '}'
+        pure $ "bb" <> l
+
+    bbs :: [Text]
+    bbs = Text.singleton <$> ['A'..'Z']
+
+
+    symbolParser :: [Text] -> Lexer Text
+    symbolParser symbols = asum (fmap Char.string symbols)
+
+
+symbol :: Lexer (Located Token)
+symbol = lexeme do
+    symb <- some (satisfy (`elem` symbols))
+    pure (Symbol (Text.pack symb))
+        where
+        symbols :: [Char]
+        symbols = ".,:;!?@=≠+-/|^><≤≥*&≈⊂⊃⊆⊇∈“”‘’"
+
+-- | Parses a TEX-style command.
+command :: Lexer (Located Token)
+command = lexeme do
+    Char.char '\\'
+    cmd <- some Char.letterChar
+    pure (Command (Text.pack cmd))
+
+-- | Parses the beginning of an environment.
+-- Commits only after having seen "\begin{".
+begin :: Lexer (Located Token)
+begin = lexeme do
+    Char.string "\\begin{"
+    env <- some (Char.letterChar <|> Char.char '*')
+    Char.char '}'
+    pure (BeginEnv (Text.pack env))
+
+-- | Parses a label command and extracts its marker.
+label :: Lexer (Located Token)
+label = lexeme do
+    Char.string "\\label{"
+    m <- marker
+    Char.char '}'
+    pure (Label m)
+
+-- | Parses a label command and extracts its marker.
+ref :: Lexer (Located Token)
+ref = lexeme do
+    -- @\\cref@ is from @cleveref@ and @\\hyperref@ is from @hyperref@
+    cmd <- Char.string "\\ref{" <|> Char.string "\\cref{" <|> Char.string "\\hyperref["
+    ms <- NonEmpty.fromList <$> marker `sepBy1` Char.char ','
+    case cmd of
+        "\\hyperref[" -> Char.string "]{" *> some (satisfy (/= '}')) *> Char.char '}' *> pure (Ref ms)
+        _ -> Char.char '}' *> pure (Ref ms)
+
+marker :: Lexer Text
+marker = takeWhile1P Nothing (\x -> isAsciiLower x || x == '_')
+
+-- | Parses the end of an environment.
+-- Commits only after having seen "\end{".
+end :: Lexer (Located Token)
+end = lexeme do
+    Char.string "\\end{"
+    env <- some (Char.letterChar <|> Char.char '*')
+    Char.char '}'
+    pure (EndEnv (Text.pack env))
+
+-- | Parses an opening delimiter.
+opening :: Lexer (Located Token)
+opening = lexeme (paren <|> brace <|> group <|> bracket)
+    where
+        brace = VisibleBraceL <$ lexeme (Char.string "\\{")
+        group = InvisibleBraceL <$ lexeme (Char.char '{')
+        paren = ParenL <$ lexeme (Char.char '(')
+        bracket = BracketL <$ lexeme (Char.char '[')
+
+-- | Parses a closing delimiter.
+closing :: Lexer (Located Token)
+closing = lexeme (paren <|> brace <|> group <|> bracket)
+    where
+        brace = VisibleBraceR <$ lexeme (Char.string "\\}")
+        group = InvisibleBraceR <$ lexeme (Char.char '}')
+        paren = ParenR <$ lexeme (Char.char ')')
+        bracket = BracketR <$ lexeme (Char.char ']')
+
+-- | Turns a Lexer into one that tracks the source position of the token
+-- and consumes trailing whitespace.
+lexeme :: Lexer a -> Lexer (Located a)
+lexeme p = do
+    start <- getSourcePos
+    t <- p
+    whitespace
+    pure (Located start t)
+
+space :: Lexer ()
+space = void (Char.char ' ' <|> Char.char '\n')
+    <|> void (Char.string "\\ " <|> Char.string "\\\\" <|> Char.string "\\!" <|> Char.string "\\," <|> Char.string "\\:" <|> Char.string "\\;" <|> Char.string "\\;")
+
+whitespace :: Lexer ()
+whitespace = Lexer.space (void (some space)) (Lexer.skipLineComment "%") empty
author	adelon <22380201+adelon@users.noreply.github.com>	2024-02-10 02:22:14 +0100
committer	adelon <22380201+adelon@users.noreply.github.com>	2024-02-10 02:22:14 +0100
commit	442d732696ad431b84f6e5c72b6ee785be4fd968 (patch)
tree	b476f395e7e91d67bacb6758bc84914b8711593f /source/Syntax