From 442d732696ad431b84f6e5c72b6ee785be4fd968 Mon Sep 17 00:00:00 2001 From: adelon <22380201+adelon@users.noreply.github.com> Date: Sat, 10 Feb 2024 02:22:14 +0100 Subject: Initial commit --- source/Syntax/Concrete.hs | 657 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 657 insertions(+) create mode 100644 source/Syntax/Concrete.hs (limited to 'source/Syntax/Concrete.hs') diff --git a/source/Syntax/Concrete.hs b/source/Syntax/Concrete.hs new file mode 100644 index 0000000..8c6962d --- /dev/null +++ b/source/Syntax/Concrete.hs @@ -0,0 +1,657 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE RecursiveDo #-} + +-- | Concrete syntax of the surface language. +module Syntax.Concrete where + +import Base +import Syntax.Abstract +import Syntax.Concrete.Keywords +import Syntax.Lexicon (Lexicon(..), lexiconAdjs, splitOnVariableSlot) +import Syntax.Token + +import Data.HashSet qualified as HS +import Data.List.NonEmpty qualified as NonEmpty +import Data.HashMap.Strict qualified as HM +import Text.Earley (Grammar, Prod, (), rule, satisfy, terminal) +import Text.Earley.Mixfix +import Text.Megaparsec.Pos (SourcePos) + + +grammar :: Lexicon -> Grammar r (Prod r Text (Located Token) Block) +grammar lexicon@Lexicon{..} = mdo + let makeOp :: ([Maybe Token], Associativity) -> ([Maybe (Prod r Text (Located Token) Token)], Associativity) + makeOp (pat, assoc) = (map (fmap token) pat, assoc) + ops = map (map makeOp) (toList (HM.toList . HM.map fst <$> lexiconMixfix)) + conns = map (map makeOp) lexiconConnectives + + integer <- rule (terminal maybeIntToken "integer") + relator <- rule $ unLocated <$> (satisfy (\ltok -> unLocated ltok `HM.member` lexiconRelationSymbols) "relator") + varSymbol <- rule (terminal maybeVarToken "variable") + varSymbols <- rule (commaList varSymbol) + cmd <- rule (terminal maybeCmdToken "TEX command") +-- +-- Formulas have three levels: +-- +-- + Expressions: atoms or operators applied to atoms. +-- + Chains: comma-lists of expressions, separated by relators. +-- + Formulas: chains or connectives applied to chains. +-- +-- For example, the formula @x, y < z \implies x, y < z + 1@ consist of the +-- connective @\implies@ applied to two chains @x, y < z@ and @x, y < z + 1@. +-- In turn, the chain @x, y < z + 1@ consist of three expressions, +-- @x@, @y@, and @z + 1@. Finally, @z + 1@ consist the operator @+@ +-- applied to two atoms, the variable @z@ and the number literal @1@. +-- +-- This split is due to the different behaviour of relators compared to +-- operators and connectives. Relators can chain (@x < y < z@) and allow +-- lists as arguments, as in the above example. Operators and connectives +-- instead have precedence and fixity. The only syntactic difference between +-- an operator and a connective is the relative precedence compared to relators. +-- + replaceBound <- rule $ (,) <$> varSymbol <* _in <*> expr + replaceBounds <- rule $ commaList replaceBound + replaceFun <- rule $ ExprReplace <$> expr <* _pipe <*> replaceBounds <*> optional (_pipe *> comprStmt) + comprStmt <- rule $ (StmtFormula <$> formula) <|> text stmt + + replacePredSymbolic <- rule $ ExprReplacePred <$> varSymbol <* _pipe <*> (command "exists" *> varSymbol) <* _in <*> expr <* _dot <*> (StmtFormula <$> formula) + replacePredText <- rule $ ExprReplacePred <$> varSymbol <* _pipe <*> (begin "text" *> _exists *> beginMath *> varSymbol <* _in) <*> expr <* endMath <* _suchThat <*> stmt <* end "text" + replacePred <- rule $ replacePredSymbolic <|> replacePredText + + let exprStructOpOf ann = HS.foldr alg empty (HM.keysSet lexiconStructFun) + where + alg s prod = prod <|> (ExprStructOp <$> structSymbol s <*> ann) + + exprStructOp <- rule (exprStructOpOf (optional (bracket expr))) + + let bracedArgs1 ar arg = count1 ar $ group arg + let prefixPredicateOf f arg symb@(PrefixPredicate c ar) = f <$> pure symb <* command c <*> bracedArgs1 ar arg + + + exprParen <- rule $ paren expr + exprInteger <- rule $ ExprInteger <$> integer + exprVar <- rule $ ExprVar <$> varSymbol + exprTuple <- rule $ makeTuple <$> paren (commaList2 expr) + exprSep <- rule $ brace $ ExprSep <$> varSymbol <* _in <*> expr <* _pipe <*> comprStmt + exprReplace <- rule $ brace $ (replaceFun <|> replacePred) + exprFinSet <- rule $ brace $ ExprFiniteSet <$> exprs + exprBase <- rule $ asum [exprVar, exprInteger, exprStructOp, exprParen, exprTuple, exprSep, exprReplace, exprFinSet] + exprApp <- rule $ ExprApp <$> exprBase <*> (paren expr <|> exprTuple) + expr <- mixfixExpression ops (exprBase <|> exprApp) ExprOp + exprs <- rule $ commaList expr + + relationSign <- rule $ pure Positive <|> (Negative <$ command "not") + relationExpr <- rule $ RelationExpr <$> (command "mathrel" *> group expr) + relation <- rule $ (RelationSymbol <$> relator) <|> relationExpr + chainBase <- rule $ ChainBase <$> exprs <*> relationSign <*> relation <*> exprs + chainCons <- rule $ ChainCons <$> exprs <*> relationSign <*> relation <*> chain + chain <- rule $ chainCons <|> chainBase + + formulaPredicate <- rule $ asum $ prefixPredicateOf FormulaPredicate expr <$> HM.keys lexiconPrefixPredicates + formulaChain <- rule $ FormulaChain <$> chain + formulaBottom <- rule $ PropositionalConstant IsBottom <$ command "bot" "\"\\bot\"" + formulaTop <- rule $ PropositionalConstant IsTop <$ command "top" "\"\\top\"" + formulaExists <- rule $ FormulaQuantified Existentially <$> (command "exists" *> varSymbols) <*> maybeBounded <* _dot <*> formula + formulaAll <- rule $ FormulaQuantified Universally <$> (command "forall" *> varSymbols) <*> maybeBounded <* _dot <*> formula + formulaQuantified <- rule $ formulaExists <|> formulaAll + formulaBase <- rule $ asum [formulaChain, formulaPredicate, formulaBottom, formulaTop, paren formula] + formulaConn <- mixfixExpression conns formulaBase makeConnective + formula <- rule $ formulaQuantified <|> formulaConn + +-- These are asymmetric formulas (only variables are allowed on one side). +-- They express judgements. +-- + assignment <- rule $ (,) <$> varSymbol <* (_eq <|> _defeq) <*> expr + typing <- rule $ (,) <$> varSymbols <* (_in <|> _colon) <*> expr + + adjL <- rule $ adjLOf lexicon term + adjR <- rule $ adjROf lexicon term + adj <- rule $ adjOf lexicon term + adjVar <- rule $ adjOf lexicon var + + var <- rule $ math varSymbol + vars <- rule $ math varSymbols + + verb <- rule $ verbOf lexicon sg term + verbPl <- rule $ verbOf lexicon pl term + verbVar <- rule $ verbOf lexicon sg var + + noun <- rule $ nounOf lexicon sg term nounName -- Noun with optional variable name. + nounList <- rule $ nounOf lexicon sg term nounNames -- Noun with a list of names. + nounVar <- rule $ fst <$> nounOf lexicon sg var (pure Nameless) -- No names in defined nouns. + nounPl <- rule $ nounOf lexicon pl term nounNames + nounPlMay <- rule $ nounOf lexicon pl term nounName + + structNoun <- rule $ structNounOf lexicon sg var var + structNounNameless <- rule $ fst <$> structNounOf lexicon sg var (pure Nameless) + + + fun <- rule $ funOf lexicon sg term + funVar <- rule $ funOf lexicon sg var + + attrRThat <- rule $ AttrRThat <$> thatVerbPhrase + attrRThats <- rule $ ((:[]) <$> attrRThat) <|> ((\a a' -> [a,a']) <$> attrRThat <* _and <*> attrRThat) <|> pure [] + attrRs <- rule $ ((:[]) <$> adjR) <|> ((\a a' -> [a,a']) <$> adjR <* _and <*> adjR) <|> pure [] + attrRight <- rule $ (<>) <$> attrRs <*> attrRThats + + verbPhraseVerbSg <- rule $ VPVerb <$> verb + verbPhraseVerbNotSg <- rule $ VPVerbNot <$> (_does *> _not *> verbPl) + verbPhraseAdjSg <- rule $ VPAdj . (:|[]) <$> (_is *> adj) + verbPhraseAdjAnd <- rule do {_is; a1 <- adj; _and; a2 <- adj; pure (VPAdj (a1 :| [a2]))} + verbPhraseAdjNotSg <- rule $ VPAdjNot . (:|[]) <$> (_is *> _not *> adj) + verbPhraseNotSg <- rule $ verbPhraseVerbNotSg <|> verbPhraseAdjNotSg + verbPhraseSg <- rule $ verbPhraseVerbSg <|> verbPhraseAdjSg <|> verbPhraseAdjAnd <|> verbPhraseNotSg + + -- LATER can cause technical ambiguities? verbPhraseVerbPl <- rule $ VPVerb <$> verbPl + verbPhraseVerbNotPl <- rule $ VPVerbNot <$> (_do *> _not *> verbPl) + verbPhraseAdjPl <- rule $ VPAdj . (:|[]) <$> (_are *> adj) + verbPhraseAdjNotPl <- rule $ VPAdjNot . (:|[]) <$> (_are *> _not *> adj) + verbPhraseNotPl <- rule $ verbPhraseVerbNotPl <|> verbPhraseAdjNotPl + verbPhrasePl <- rule $ verbPhraseAdjPl <|> verbPhraseNotPl -- LATER <|> verbPhraseVerbPl + + + + thatVerbPhrase <- rule $ _that *> verbPhraseSg + + nounName <- rule $ optional (math varSymbol) + nounNames <- rule $ math (commaList_ varSymbol) <|> pure [] + nounPhrase <- rule $ makeNounPhrase <$> many adjL <*> noun <*> attrRight <*> optional suchStmt + nounPhrase' <- rule $ makeNounPhrase <$> many adjL <*> nounList <*> attrRight <*> optional suchStmt + nounPhrasePl <- rule $ makeNounPhrase <$> many adjL <*> nounPl <*> attrRight <*> optional suchStmt + nounPhrasePlMay <- rule $ makeNounPhrase <$> many adjL <*> nounPlMay <*> attrRight <*> optional suchStmt + nounPhraseMay <- rule $ makeNounPhrase <$> many adjL <*> noun <*> attrRight <*> optional suchStmt + + -- Quantification phrases for quantification and indfinite terms. + quantAll <- rule $ QuantPhrase Universally <$> (_forEvery *> nounPhrase' <|> _forAll *> nounPhrasePl) + quantSome <- rule $ QuantPhrase Existentially <$> (_some *> (nounPhrase' <|> nounPhrasePl)) + quantNone <- rule $ QuantPhrase Nonexistentially <$> (_no *> (nounPhrase' <|> nounPhrasePl)) + quant <- rule $ quantAll <|> quantSome <|> quantNone -- <|> quantUniq + + + termExpr <- rule $ TermExpr <$> math expr + termFun <- rule $ TermFun <$> (optional _the *> fun) + termIota <- rule $ TermIota <$> (_the *> var) <* _suchThat <*> stmt + termAll <- rule $ TermQuantified Universally <$> (_every *> nounPhraseMay) + termSome <- rule $ TermQuantified Existentially <$> (_some *> nounPhraseMay) + termNo <- rule $ TermQuantified Nonexistentially <$> (_no *> nounPhraseMay) + termQuantified <- rule $ termAll <|> termSome <|> termNo + term <- rule $ termExpr <|> termFun <|> termQuantified <|> termIota + +-- Basic statements @stmt'@ are statements without any conjunctions or quantifiers. +-- + stmtVerbSg <- rule $ StmtVerbPhrase <$> ((:| []) <$> term) <*> verbPhraseSg + stmtVerbPl <-rule $ StmtVerbPhrase <$> andList1 term <*> verbPhrasePl + stmtVerb <- rule $ stmtVerbSg <|> stmtVerbPl + stmtNounIs <- rule $ StmtNoun <$> term <* _is <* _an <*> nounPhrase + stmtNounIsNot <- rule $ StmtNeg <$> (StmtNoun <$> term <* _is <* _not <* _an <*> nounPhrase) + stmtNoun <- rule $ stmtNounIs <|> stmtNounIsNot + stmtStruct <- rule $ StmtStruct <$> (term <* _is <* _an) <*> structNounNameless + stmtExists <- rule $ StmtExists <$> (_exists *> _an *> nounPhrase') + stmtExist <- rule $ StmtExists <$> (_exist *> nounPhrasePl) + stmtExistsNot <- rule $ StmtNeg . StmtExists <$> (_exists *> _no *> nounPhrase') + stmtFormula <- rule $ StmtFormula <$> math formula + stmtBot <- rule $ StmtFormula (PropositionalConstant IsBottom) <$ _contradiction + stmt' <- rule $ stmtVerb <|> stmtNoun <|> stmtStruct <|> stmtFormula <|> stmtBot + stmtOr <- rule $ stmt' <|> (StmtConnected Disjunction <$> stmt' <* _or <*> stmt) + stmtAnd <- rule $ stmtOr <|> (StmtConnected Conjunction <$> stmtOr <* _and <*> stmt) + stmtIff <- rule $ stmtAnd <|> (StmtConnected Equivalence <$> stmtAnd <* _iff <*> stmt) + stmtIf <- rule $ StmtConnected Implication <$> (_if *> stmt) <* optional _comma <* _then <*> stmt + stmtXor <- rule $ StmtConnected ExclusiveOr <$> (_either *> stmt) <* _or <*> stmt + stmtNor <- rule $ StmtConnected NegatedDisjunction <$> (_neither *> stmt) <* _nor <*> stmt + stmtNeg <- rule $ StmtNeg <$> (_itIsWrong *> stmt) + + stmtQuantPhrase <- rule $ StmtQuantPhrase <$> (_for *> quant) <* optional _comma <* optional _have <*> stmt + + suchStmt <- rule $ _suchThat *> stmt <* optional _comma + + + -- Symbolic quantifications with or without generalized bounds. + symbolicForall <- rule $ SymbolicForall + <$> ((_forAll <|> _forEvery) *> beginMath *> varSymbols) + <*> maybeBounded <* endMath + <*> optional suchStmt + <* optional _have <*> stmt + symbolicExists <- rule $ SymbolicExists + <$> ((_exists <|> _exist) *> beginMath *> varSymbols) + <*> maybeBounded <* endMath + <*> ((_suchThat *> stmt) <|> pure (StmtFormula (PropositionalConstant IsTop))) + symbolicNotExists <- rule $ SymbolicNotExists + <$> (_exists *> _no *> beginMath *> varSymbols) + <*> maybeBounded <* endMath + <* _suchThat <*> stmt + symbolicBound <- rule $ Bounded <$> relationSign <*> relation <*> expr + maybeBounded <- rule (pure Unbounded <|> symbolicBound) + + symbolicQuantified <- rule $ symbolicForall <|> symbolicExists <|> symbolicNotExists + + stmt <- rule $ asum [stmtNeg, stmtIf, stmtXor, stmtNor, stmtExists, stmtExist, stmtExistsNot, stmtQuantPhrase, stmtIff, symbolicQuantified] "a statement" + + asmLetIn <- rule $ uncurry AsmLetIn <$> (_let *> math typing) + asmLetNoun <- rule $ AsmLetNoun <$> (_let *> fmap pure var <* (_be <|> _denote) <* _an) <*> nounPhrase + asmLetNouns <- rule $ AsmLetNoun <$> (_let *> vars <* (_be <|> _denote)) <*> nounPhrasePlMay + asmLetEq <- rule $ uncurry AsmLetEq <$> (_let *> math assignment) + asmLetThe <- rule $ AsmLetThe <$> (_let *> var <* _be <* _the) <*> fun + asmLetStruct <- rule $ AsmLetStruct <$> (_let *> var <* _be <* _an) <*> structNounNameless + asmLet <- rule $ asmLetNoun <|> asmLetNouns <|> asmLetIn <|> asmLetEq <|> asmLetThe <|> asmLetStruct + asmSuppose <- rule $ AsmSuppose <$> (_suppose *> stmt) + asm <- rule $ assumptionList (asmLet <|> asmSuppose) <* _dot + asms <- rule $ concat <$> many asm + + axiom <- rule $ Axiom <$> asms <* optional _then <*> stmt <* _dot + + lemma <- rule $ Lemma <$> asms <* optional _then <*> stmt <* _dot + + defnAdj <- rule $ DefnAdj <$> optional (_an *> nounPhrase) <*> var <* _is <*> adjVar + defnVerb <- rule $ DefnVerb <$> optional (_an *> nounPhrase) <*> var <*> verbVar + defnNoun <- rule $ DefnNoun <$> var <* _is <* _an <*> nounVar + defnRel <- rule $ DefnRel <$> (beginMath *> varSymbol) <*> relator <*> varSymbol <* endMath + defnSymbolicPredicate <- rule $ math $ asum $ prefixPredicateOf DefnSymbolicPredicate varSymbol <$> HM.keys lexiconPrefixPredicates + defnHead <- rule $ optional _write *> asum [defnAdj, defnVerb, defnNoun, defnRel, defnSymbolicPredicate] + + defnIf <- rule $ Defn <$> asms <*> defnHead <* (_iff <|> _if) <*> stmt <* _dot + defnFunSymb <- rule $ _comma *> termExpr <* _comma -- ^ Optional symbolic equivalent. + defnFun <- rule $ DefnFun <$> asms <*> (optional _the *> funVar) <*> optional defnFunSymb <* _is <*> term <* _dot + + symbolicPatternEqTerm <- rule do + pat <- beginMath *> symbolicPattern <* _eq + e <- expr <* endMath <* _dot + pure (pat, e) + defnOp <- rule $ uncurry DefnOp <$> symbolicPatternEqTerm + + defn <- rule $ defnIf <|> defnFun <|> defnOp + + abbreviationVerb <- rule $ AbbreviationVerb <$> var <*> verbVar <* (_iff <|> _if) <*> stmt <* _dot + abbreviationAdj <- rule $ AbbreviationAdj <$> var <* _is <*> adjVar <* (_iff <|> _if) <*> stmt <* _dot + abbreviationNoun <- rule $ AbbreviationNoun <$> var <* _is <* _an <*> nounVar <* (_iff <|> _if) <*> stmt <* _dot + abbreviationRel <- rule $ AbbreviationRel <$> (beginMath *> varSymbol) <*> relator <*> varSymbol <* endMath <* (_iff <|> _if) <*> stmt <* _dot + abbreviationFun <- rule $ AbbreviationFun <$> (_the *> funVar) <* (_is <|> _denotes) <*> term <* _dot + abbreviationEq <- rule $ uncurry AbbreviationEq <$> symbolicPatternEqTerm + abbreviation <- rule $ (abbreviationVerb <|> abbreviationAdj <|> abbreviationNoun <|> abbreviationRel <|> abbreviationFun <|> abbreviationEq) + + datatypeFin <- rule $ DatatypeFin <$> fmap fst (_an *> noun) <*> (_is *> _oneOf *> orList2 (math cmd) <* _dot) + datatype <- rule datatypeFin + + unconditionalIntro <- rule $ IntroRule [] <$> math formula + conditionalIntro <- rule $ IntroRule <$> (_if *> andList1_ (math formula)) <* _comma <* _then <*> math formula + inductiveIntro <- rule $ (unconditionalIntro <|> conditionalIntro) <* _dot + inductiveDomain <- rule $ math $ (,) <$> symbolicPattern <* _subseteq <*> expr + inductiveHead <- rule $ _define *> inductiveDomain <* optional _inductively <* optional _asFollows <* _dot + inductive <- rule $ uncurry Inductive <$> inductiveHead <*> enumerated1 inductiveIntro + + signatureAdj <- rule $ SignatureAdj <$> var <* _can <* _be <*> adjOf lexicon var + symbolicPattern <- symbolicPatternOf ops varSymbol + signatureSymbolic <- rule $ SignatureSymbolic <$> math symbolicPattern <* _is <* _an <*> nounPhrase + signature <- rule $ (,) <$> asms <* optional _then <*> (signatureAdj <|> signatureSymbolic) <* _dot + + structFix <- rule do + beginMath + rawCmd <- cmd + endMath + pure (StructSymbol rawCmd) + structDefn <- rule $ do + _an + ~(structPhrase, structLabel) <- structNoun + _extends + structParents <- andList1_ (_an *> structNounNameless) + maybeFixes <- optional (_equipped *> enumerated structFix) + structAssumes <- (_suchThat *> enumeratedMarked (stmt <* _dot)) <|> ([] <$ _dot) + pure StructDefn + { structPhrase = structPhrase + , structLabel = structLabel + , structParents = structParents + , structFixes = maybeFixes ?? [] + , structAssumes = structAssumes + } + + justificationSet <- rule $ JustificationSetExt <$ _bySetExt + justificationRef <- rule $ JustificationRef <$> (_by *> ref) + justificationLocal <- rule $ JustificationLocal <$ (_by *> (_assumption <|> _definition)) + justification <- rule (justificationSet <|> justificationRef <|> justificationLocal <|> pure JustificationEmpty) + + trivial <- rule $ Qed JustificationEmpty <$ _trivial <* _dot + omitted <- rule $ Omitted <$ _omitted <* _dot + qedJustified <- rule $ Qed <$> (_follows *> justification <* _dot) + qed <- rule $ qedJustified <|> trivial <|> omitted <|> pure (Qed JustificationEmpty) + + let alignedEq = symbol "&=" "\"&=\"" + explanation <- rule $ (text justification) <|> pure JustificationEmpty + equationItem <- rule $ (,) <$> (alignedEq *> expr) <*> explanation + equations <- rule $ Equation <$> expr <*> (many1 equationItem) + + let alignedIff = symbol "&" *> command "iff" "\"&\\iff\"" + biconditionalItem <- rule $ (,) <$> (alignedIff *> formula) <*> explanation + biconditionals <- rule $ Biconditionals <$> formula <*> (many1 biconditionalItem) + + calc <- rule $ Calc <$> align (equations <|> biconditionals) <*> proof + + caseOf <- rule $ command "caseOf" *> token InvisibleBraceL *> stmt <* _dot <* token InvisibleBraceR + byCases <- rule $ ByCase <$> env_ "byCase" (many1_ (Case <$> caseOf <*> proof)) + byContradiction <- rule $ ByContradiction <$ _suppose <* _not <* _dot <*> proof + bySetInduction <- rule $ BySetInduction <$> proofBy (_in *> word "-induction" *> optional (word "on" *> term)) <*> proof + byOrdInduction <- rule $ ByOrdInduction <$> proofBy (word "transfinite" *> word "induction" *> proof) + assume <- rule $ Assume <$> (_suppose *> stmt <* _dot) <*> proof + + fixSymbolic <- rule $ FixSymbolic <$> (_fix *> beginMath *> varSymbols) <*> maybeBounded <* endMath <* _dot <*> proof + fixSuchThat <- rule $ FixSuchThat <$> (_fix *> math varSymbols) <* _suchThat <*> stmt <* _dot <*> proof + fix <- rule $ fixSymbolic <|> fixSuchThat + + takeVar <- rule $ TakeVar <$> (_take *> beginMath *> varSymbols) <*> maybeBounded <* endMath <* _suchThat <*> stmt <*> justification <* _dot <*> proof + takeNoun <- rule $ TakeNoun <$> (_take *> _an *> (nounPhrase' <|> nounPhrasePl)) <*> justification <* _dot <*> proof + take <- rule $ takeVar <|> takeNoun + suffices <- rule $ Suffices <$> (_sufficesThat *> stmt) <*> (justification <* _dot) <*> proof + subclaim <- rule $ Subclaim <$> (_show *> stmt <* _dot) <*> env_ "subproof" proof <*> proof + have <- rule $ Have <$> optional (_since *> stmt <* _comma <* _have) <* optional _haveIntro <*> stmt <*> justification <* _dot <*> proof + + define <- rule $ Define <$> (_let *> beginMath *> varSymbol <* _eq) <*> expr <* endMath <* _dot <*> proof + defineFunction <- rule $ DefineFunction <$> (_let *> beginMath *> varSymbol) <*> paren varSymbol <* _eq <*> expr <* endMath <* _for <* beginMath <*> varSymbol <* _in <*> expr <* endMath <* _dot <*> proof + + proof <- rule $ asum [byContradiction, byCases, bySetInduction, byOrdInduction, calc, subclaim, assume, fix, take, have, suffices, define, defineFunction, qed] + + + blockAxiom <- rule $ uncurry3 BlockAxiom <$> envPos "axiom" axiom + blockLemma <- rule $ uncurry3 BlockLemma <$> lemmaEnv lemma + blockProof <- rule $ uncurry BlockProof <$> envPos_ "proof" proof + blockDefn <- rule $ uncurry3 BlockDefn <$> envPos "definition" defn + blockAbbr <- rule $ uncurry3 BlockAbbr <$> envPos "abbreviation" abbreviation + blockData <- rule $ uncurry BlockData <$> envPos_ "datatype" datatype + blockInd <- rule $ uncurry3 BlockInductive <$> envPos "inductive" inductive + blockSig <- rule $ (\(p, (a, s)) -> BlockSig p a s) <$> envPos_ "signature" signature + blockStruct <- rule $ uncurry3 BlockStruct <$> envPos "struct" structDefn + block <- rule $ asum [blockAxiom, blockLemma, blockDefn, blockAbbr, blockData, blockInd, blockSig, blockStruct, blockProof] + + -- Starting category. + pure block + + +proofBy :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +proofBy method = bracket $ word "proof" *> word "by" *> method + + +lemmaEnv :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, Marker, a) +lemmaEnv content = asum + [ envPos "theorem" content + , envPos "lemma" content + , envPos "corollary" content + , envPos "claim" content + , envPos "proposition" content + ] + + +-- | A disjunctive list with at least two items: +-- * 'a or b' +-- * 'a, b, or c' +-- * 'a, b, c, or d' +-- +orList2 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a) +orList2 item = ((:|) <$> item <*> many (_commaOr *> item)) + <|> ((\i j -> i:|[j]) <$> item <* _or <*> item) + + +-- | Nonempty textual lists of the form "a, b, c, and d". +-- The final comma is mandatory, 'and' is not. +-- Also allows "a and b". Should therefore be avoided in contexts where +-- a logical conjunction would also be possible. +-- Currently also allows additional 'and's after each comma... +-- +andList1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a) +andList1 item = ((:|) <$> item <*> many (_commaAnd *> item)) + <|> ((\i j -> i:|[j]) <$> item <* _and <*> item) + +-- | Like 'andList1', but drops the information about nonemptiness. +andList1_ :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a] +andList1_ item = toList <$> andList1 item + + +commaList :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a) +commaList item = (:|) <$> item <*> many (_comma *> item) + +-- | Like 'commaList', but drops the information about nonemptiness. +commaList_ :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a] +commaList_ item = NonEmpty.toList <$> commaList item + +-- | Like 'commaList', but requires at least two items (and hence at least one comma). +commaList2 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a) +commaList2 item = (:|) <$> item <* _comma <*> commaList_ item + + +assumptionList :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a] +assumptionList item = NonEmpty.toList <$> andList1 item + +enumerated :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [a] +enumerated p = NonEmpty.toList <$> enumerated1 p + +enumerated1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty a) +enumerated1 p = begin "enumerate" *> many1 (command "item" *> p) <* end "enumerate" "\"\\begin{enumerate} ...\"" + + +enumeratedMarked :: Prod r Text (Located Token) a -> Prod r Text (Located Token) [(Marker, a)] +enumeratedMarked p = NonEmpty.toList <$> enumeratedMarked1 p + +enumeratedMarked1 :: Prod r Text (Located Token) a -> Prod r Text (Located Token) (NonEmpty (Marker, a)) +enumeratedMarked1 p = begin "enumerate" *> many1 ((,) <$> (command "item" *> label) <*> p) <* end "enumerate" "\"\\begin{enumerate}\\item\\label{...}...\"" + + + + +-- This function could be rewritten, so that it can be used directly in the grammar, +-- instead of with specialized variants. +-- +phraseOf + :: (pat -> [a] -> b) + -> Lexicon + -> (Lexicon -> HashSet pat) + -> (pat -> LexicalPhrase) + -> Prod r Text (Located Token) a + -> Prod r Text (Located Token) b +phraseOf constr lexicon selector proj arg = + uncurry constr <$> asum (fmap make pats) + where + pats = HS.toList (selector lexicon) + make pat = (\args -> (pat, args)) <$> go (proj pat) + go = \case + Just w : ws -> token w *> go ws + Nothing : ws -> (:) <$> arg <*> go ws + [] -> pure [] + +adjLOf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjLOf arg) +adjLOf lexicon arg = phraseOf AdjL lexicon (HM.keysSet . lexiconAdjLs) id arg "a left adjective" + +adjROf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjROf arg) +adjROf lexicon arg = phraseOf AdjR lexicon (HM.keysSet . lexiconAdjRs) id arg "a right adjective" + +adjOf :: Lexicon -> Prod r Text (Located Token) arg -> Prod r Text (Located Token) (AdjOf arg) +adjOf lexicon arg = phraseOf Adj lexicon (HM.keysSet . lexiconAdjs) id arg "an adjective" + +verbOf + :: Lexicon + -> (SgPl LexicalPhrase -> LexicalPhrase) + -> Prod r Text (Located Token) a + -> Prod r Text (Located Token) (VerbOf a) +verbOf lexicon proj arg = phraseOf Verb lexicon (HM.keysSet . lexiconVerbs) proj arg + +funOf + :: Lexicon + -> (SgPl LexicalPhrase -> LexicalPhrase) + -> Prod r Text (Located Token) a + -> Prod r Text (Located Token) (FunOf a) +funOf lexicon proj arg = phraseOf Fun lexicon (HM.keysSet . lexiconFuns) proj arg "functional phrase" + + +-- | A noun with a @t VarSymbol@ as name(s). +nounOf + :: Lexicon + -> (SgPl LexicalPhrase -> LexicalPhrase) + -> Prod r Text (Located Token) arg + -> Prod r Text (Located Token) (t VarSymbol) + -> Prod r Text (Located Token) (NounOf arg, t VarSymbol) +nounOf lexicon proj arg vars = + (\(args1, xs, args2, pat) -> (Noun pat (args1 <> args2), xs)) <$> asum (fmap make pats) "a noun" + where + pats = HM.keys (lexiconNouns lexicon) + make pat = + let (pat1, pat2) = splitOnVariableSlot (proj pat) + in (\args1 xs args2 -> (args1, xs, args2, pat)) <$> go pat1 <*> vars <*> go pat2 + go = \case + Just w : ws -> token w *> go ws + Nothing : ws -> (:) <$> arg <*> go ws + [] -> pure [] + +structNounOf + :: Lexicon + -> (SgPl LexicalPhrase -> LexicalPhrase) + -> Prod r Text (Located Token) arg + -> Prod r Text (Located Token) name + -> Prod r Text (Located Token) (StructPhrase, name) +structNounOf lexicon proj arg name = + (\(_args1, xs, _args2, pat) -> (pat, xs)) <$> asum (fmap make pats) "a structure noun" + where + pats = HM.keys (lexiconStructNouns lexicon) + make pat = + let (pat1, pat2) = splitOnVariableSlot (proj pat) + in (\args1 xs args2 -> (args1, xs, args2, pat)) <$> go pat1 <*> name <*> go pat2 + go = \case + Just w : ws -> token w *> go ws + Nothing : ws -> (:) <$> arg <*> go ws + [] -> pure [] + + +symbolicPatternOf + :: forall r. [[(Holey (Prod r Text (Located Token) Token), Associativity)]] + -> Prod r Text (Located Token) VarSymbol + -> Grammar r (Prod r Text (Located Token) SymbolPattern) +symbolicPatternOf ops varSymbol = rule $ asum + [ go op + | ops' <- ops + , (op, _assoc) <- ops' + ] "a symbolic pattern" + where + go :: Holey (Prod r Text (Located Token) Token) -> Prod r Text (Located Token) SymbolPattern + go [] = pure $ SymbolPattern [] [] + go (head : tail) = case head of + Just symb -> (\s (SymbolPattern op vs) -> SymbolPattern (Just s : op) vs) <$> symb <*> go tail + Nothing -> (\v (SymbolPattern op vs) -> SymbolPattern (Nothing : op) (v : vs)) <$> varSymbol <*> go tail + + +makeNounPhrase + :: [AdjL] + -> (Noun, t VarSymbol) + -> [AdjR] + -> Maybe Stmt + -> NounPhrase t +makeNounPhrase ls (n, vs) rs ms = NounPhrase ls n vs rs ms + + + + +begin, end :: Text -> Prod r Text (Located Token) SourcePos +begin kind = tokenPos (BeginEnv kind) ("\"\\begin{" <> kind <> "}\"") +end kind = tokenPos (EndEnv kind) ("\"\\end{" <> kind <> "}\"") + +-- | Surround a production rule @body@ with an environment of a certain @kind@ requiring a marker specified in a @\\label@. +-- Ignores the optional title after the beginning of the environment. +envPos :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, Marker, a) +envPos kind body = do + p <- begin kind ("start of a \"" <> kind <> "\" environment") + optional title + m <- label + a <- body <* end kind + pure (p, m, a) + where + title :: Prod r Text (Located Token) [Token] + title = bracket (many (unLocated <$> satisfy (\ltok -> unLocated ltok /= BracketR))) + +-- 'env_' is like 'env', but without allowing titles. +-- +envPos_ :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) (SourcePos, a) +envPos_ kind body = (,) <$> begin kind <*> (optional label *> body) <* end kind + +env_ :: Text -> Prod r Text (Located Token) a -> Prod r Text (Located Token) a +env_ kind body = begin kind *> optional label *> body <* end kind + +-- | A label specifying a marker for referencing via /@\\label{...}@/. Returns the marker text. +label :: Prod r Text (Located Token) Marker +label = label_ "\"\\label{...}\"" + where + label_ = terminal \ltok -> case unLocated ltok of + Label m -> Just (Marker m) + _tok -> Nothing + +-- | A reference via /@\\ref{...}@/. Returns the markers as text. +ref :: Prod r Text (Located Token) (NonEmpty Marker) +ref = terminal \ltok -> case unLocated ltok of + Ref ms -> Just (Marker <$> ms) + _tok -> Nothing + +math :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +math body = beginMath *> body <* endMath + +text :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +text body = begin "text" *> body <* end "text" "\"\\text{...}\"" + +beginMath, endMath :: Prod r Text (Located Token) SourcePos +beginMath = begin "math" "start of a formula, e.g. \"$\"" +endMath = end "math" "end of a formula, e.g. \"$\"" + +paren :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +paren body = token ParenL *> body <* token ParenR "\"(...)\"" + +bracket :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +bracket body = token BracketL *> body <* token BracketR "\"[...]\"" + +brace :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +brace body = token VisibleBraceL *> body <* token VisibleBraceR "\"\\{...\\}\"" + +group :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +group body = token InvisibleBraceL *> body <* token InvisibleBraceR "\"{...}\"" + +align :: Prod r Text (Located Token) a -> Prod r Text (Located Token) a +align body = begin "align*" *> body <* end "align*" + + +maybeVarToken :: Located Token -> Maybe VarSymbol +maybeVarToken ltok = case unLocated ltok of + Variable x -> Just (NamedVar x) + _tok -> Nothing + +maybeWordToken :: Located Token -> Maybe Text +maybeWordToken ltok = case unLocated ltok of + Word n -> Just n + _tok -> Nothing + +maybeIntToken :: Located Token -> Maybe Int +maybeIntToken ltok = case unLocated ltok of + Integer n -> Just n + _tok -> Nothing + +maybeCmdToken :: Located Token -> Maybe Text +maybeCmdToken ltok = case unLocated ltok of + Command n -> Just n + _tok -> Nothing + +structSymbol :: StructSymbol -> Prod r Text (Located Token) StructSymbol +structSymbol s@(StructSymbol c) = terminal \ltok -> case unLocated ltok of + Command c' | c == c' -> Just s + _ -> Nothing + +-- | Tokens that are allowed to appear in labels of environments. +maybeTagToken :: Located Token -> Maybe Text +maybeTagToken ltok = case unLocated ltok of + Symbol "'" ->Just "'" + Symbol "-" -> Just "" + _ -> maybeWordToken ltok + + +token :: Token -> Prod r Text (Located Token) Token +token tok = terminal maybeToken tokToText tok + where + maybeToken ltok = case unLocated ltok of + tok' | tok == tok' -> Just tok + _ -> Nothing + +tokenPos :: Token -> Prod r Text (Located Token) SourcePos +tokenPos tok = terminal maybeToken tokToText tok + where + maybeToken ltok = case unLocated ltok of + tok' | tok == tok' -> Just (startPos ltok) + _ -> Nothing -- cgit v1.2.3