diff options
| author | adelon <22380201+adelon@users.noreply.github.com> | 2024-02-10 02:22:14 +0100 |
|---|---|---|
| committer | adelon <22380201+adelon@users.noreply.github.com> | 2024-02-10 02:22:14 +0100 |
| commit | 442d732696ad431b84f6e5c72b6ee785be4fd968 (patch) | |
| tree | b476f395e7e91d67bacb6758bc84914b8711593f /source/Syntax/Lexicon.hs | |
Initial commit
Diffstat (limited to 'source/Syntax/Lexicon.hs')
| -rw-r--r-- | source/Syntax/Lexicon.hs | 275 |
1 files changed, 275 insertions, 0 deletions
diff --git a/source/Syntax/Lexicon.hs b/source/Syntax/Lexicon.hs new file mode 100644 index 0000000..805b875 --- /dev/null +++ b/source/Syntax/Lexicon.hs @@ -0,0 +1,275 @@ +{-# LANGUAGE NoImplicitPrelude #-} + +-- | The 'Lexicon' describes the part of the grammar that extensible/dynamic. +-- +-- The items of the 'Lexicon' are organized by their meaning and their +-- syntactic behaviour. They are typically represented as some kind of +-- pattern data which is then used to generate various production rules +-- for the concrete grammar. This representation makes inspection and +-- extension easier. +-- + +module Syntax.Lexicon + ( module Syntax.Lexicon + , pattern ConsSymbol + , pattern PairSymbol + , pattern CarrierSymbol + , pattern ApplySymbol + , pattern DomSymbol + ) where + + +import Base +import Syntax.Abstract + +import Data.List qualified as List +import Data.Sequence qualified as Seq +import Data.HashSet qualified as Set +import Data.HashMap.Strict qualified as HM +import Data.Text qualified as Text +import Text.Earley.Mixfix (Holey, Associativity(..)) + + +data Lexicon = Lexicon + { lexiconMixfix :: Seq (HashMap (Holey Token) (Associativity, Marker)) + , lexiconConnectives :: [[(Holey Token, Associativity)]] + , lexiconPrefixPredicates :: LexicalItems PrefixPredicate + , lexiconStructFun :: LexicalItems StructSymbol + , lexiconRelationSymbols :: LexicalItems RelationSymbol + , lexiconVerbs :: LexicalItems (SgPl LexicalPhrase) + , lexiconAdjLs :: LexicalItems LexicalPhrase + , lexiconAdjRs :: LexicalItems LexicalPhrase + , lexiconNouns :: LexicalItems (SgPl LexicalPhrase) + , lexiconStructNouns :: LexicalItems (SgPl LexicalPhrase) + , lexiconFuns :: LexicalItems (SgPl LexicalPhrase) + } deriving (Show, Eq) + +-- Projection returning the union of both left and right attributes. +-- +lexiconAdjs :: Lexicon -> HashMap LexicalPhrase Marker +lexiconAdjs lexicon = lexiconAdjLs lexicon <> lexiconAdjRs lexicon + +lookupOp :: FunctionSymbol -> Seq (HashMap FunctionSymbol (assoc, Marker)) -> Either String Marker +lookupOp f ops = case snd <$> firstJust (HM.lookup f) ops of + Just m -> Right m + Nothing -> Left (show f) + +lookupLexicalItem :: (Hashable a, Show a) => a -> LexicalItems a -> Either String Marker +lookupLexicalItem a items = case HM.lookup a items of + Just m -> Right m + Nothing -> Left (show a) + +type LexicalItems a = HashMap a Marker + +builtins :: Lexicon +builtins = Lexicon + { lexiconMixfix = builtinMixfix + , lexiconPrefixPredicates = builtinPrefixPredicates + , lexiconStructFun = builtinStructOps + , lexiconConnectives = builtinConnectives + , lexiconRelationSymbols = builtinRelationSymbols + , lexiconAdjLs = mempty + , lexiconAdjRs = builtinAdjRs + , lexiconVerbs = builtinVerbs + , lexiconNouns = builtinNouns + , lexiconStructNouns = builtinStructNouns + , lexiconFuns = mempty + } + +-- INVARIANT: 10 precedence levels for now. +builtinMixfix :: Seq (HashMap FunctionSymbol (Associativity, Marker)) +builtinMixfix = Seq.fromList $ (HM.fromList <$>) + [ [] + , [binOp (Symbol "+") LeftAssoc "add", binOp (Command "union") LeftAssoc "union", binOp (Command "monus") LeftAssoc "monus"] + , [binOp (Command "relcomp") LeftAssoc "relcomp"] + , [binOp (Command "circ") LeftAssoc "circ"] + , [binOp (Command "mul") LeftAssoc "mul", binOp (Command "inter") LeftAssoc "inter"] + , [binOp (Command "setminus") LeftAssoc "setminus"] + , [binOp (Command "times") RightAssoc "times"] + , [] + , prefixOps + , builtinIdentifiers + ] + where + builtinIdentifiers :: [(FunctionSymbol, (Associativity, Marker))] + builtinIdentifiers = identifier <$> + [ "emptyset" + , "naturals" + , "rationals" + , "reals" + , "unit" + , "zero" + ] + +prefixOps :: [(FunctionSymbol, (Associativity, Marker))] +prefixOps = + [ ([Just (Command "unions"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "unions")) + , ([Just (Command "cumul"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "cumul")) + , ([Just (Command "fst"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "fst")) + , ([Just (Command "snd"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "snd")) + , ([Just (Command "pow"), Just InvisibleBraceL, Nothing, Just InvisibleBraceR], (NonAssoc, "pow")) + , (ConsSymbol, (NonAssoc, "cons")) + , (PairSymbol, (NonAssoc, "pair")) + -- NOTE Is now defined and hence no longer necessary , (ApplySymbol, (NonAssoc, "apply")) + ] + + +builtinStructOps :: LexicalItems StructSymbol +builtinStructOps = HM.fromList + [ (CarrierSymbol, "carrier") + ] + +identifier :: Text -> (Holey Token, (Associativity, Marker)) +identifier cmd = ([Just (Command cmd)], (NonAssoc, Marker cmd)) + + +builtinRelationSymbols :: LexicalItems RelationSymbol +builtinRelationSymbols = HM.fromList + [ (Symbol "=", "eq") + , (Command "neq", "neq") + , (Command "in", "elem") + , (Command "notin", "notelem") -- Alternative to @\not\in@. + ] + +builtinPrefixPredicates :: LexicalItems PrefixPredicate +builtinPrefixPredicates = HM.fromList + [ (PrefixPredicate "Cong" 4, "cong") + , (PrefixPredicate "Betw" 3, "betw") + ] + + +builtinConnectives :: [[(Holey Token, Associativity)]] +builtinConnectives = + [ [binOp' (Command "iff") NonAssoc] + , [binOp' (Command "implies") RightAssoc] + , [binOp' (Command "lor") LeftAssoc] + , [binOp' (Command "land") LeftAssoc] + , [([Just (Command "lnot"), Nothing], NonAssoc)] + ] + + +binOp :: Token -> Associativity -> Marker -> (Holey Token, (Associativity, Marker)) +binOp tok assoc m = ([Nothing, Just tok, Nothing], (assoc, m)) + +binOp' :: Token -> Associativity -> (Holey Token, Associativity) +binOp' tok assoc = ([Nothing, Just tok, Nothing], assoc) + +builtinAdjRs :: LexicalItems LexicalPhrase +builtinAdjRs = HM.fromList + [ (unsafeReadPhrase "equal to ?", "eq") + ] + +builtinVerbs :: LexicalItems (SgPl LexicalPhrase) +builtinVerbs = HM.fromList + [ (unsafeReadPhraseSgPl "equal[s/] ?", "eq") + ] + + +-- Some of these do/should correspond to mathlib structures, +-- e.g.: lattice, complete lattice, ring, etc. +-- +builtinNouns :: LexicalItems (SgPl LexicalPhrase) +builtinNouns = HM.mapKeys unsafeReadPhraseSgPl (HM.fromList + -- Nullary + [ ("set[/s]", "set") + , ("point[/s]", "point") + , ("element[/s] of ?", "elem") + ]) + +_Onesorted :: SgPl LexicalPhrase +_Onesorted = unsafeReadPhraseSgPl "onesorted structure[/s]" + +builtinStructNouns :: LexicalItems (SgPl LexicalPhrase) +builtinStructNouns = HM.singleton _Onesorted "onesorted_structure" + + +-- | Naïve splitting of lexical phrases to insert a variable slot for names in noun phrases, +-- as in /@there exists a linear form $h$ on $E$@/, where the underlying pattern is +-- /@linear form on ?@/. In this case we would get: +-- +-- > splitOnVariableSlot (sg (unsafeReadPhraseSgPl "linear form[/s] on ?")) +-- > == +-- > (unsafeReadPhrase "linear form", unsafeReadPhrase "on ?") +-- +splitOnVariableSlot :: LexicalPhrase -> (LexicalPhrase, LexicalPhrase) +splitOnVariableSlot phrase = case prepositionIndices <> nonhyphenatedSlotIndices of + [] -> (phrase, []) -- Place variable slot at the end. + is -> List.splitAt (minimum is) phrase + where + prepositionIndices, slotIndices, nonhyphenatedSlotIndices :: [Int] -- Ascending. + prepositionIndices = List.findIndices isPreposition phrase + slotIndices = List.findIndices isNothing phrase + nonhyphenatedSlotIndices = [i | i <- slotIndices, noHyphen (nth (i + 1) phrase)] + + isPreposition :: Maybe Token -> Bool + isPreposition = \case + Just (Word w) -> w `Set.member` prepositions + _ -> False + + noHyphen :: Maybe (Maybe Token) -> Bool + noHyphen = \case + Just (Just (Word w)) -> Text.head w /= '-' + -- If we arrive here, either the pattern is over (`Nothing`) or the next + -- part of the pattern is not a word that starts with a hyphen. + _ -> True + + +-- Preposition are a closed class, but this list is not yet exhaustive. +-- It can and should be extended when needed. The following list is a +-- selection of the prepositions found at +-- https://en.wikipedia.org/wiki/List_of_English_prepositions. +-- +prepositions :: HashSet Text +prepositions = Set.fromList + [ "about" + , "above" + , "across" + , "after" + , "against" + , "along", "alongside" + , "amid", "amidst" + , "among" + , "around" + , "as" + , "at" + , "atop" + , "before" + , "behind" + , "below" + , "beneath" + , "beside", "besides" + , "between" + , "beyond" + , "but" + , "by" + , "except" + , "for" + , "from" + , "in", "inside", "into" + , "like" + , "modulo", "mod" + , "near" + , "next" + , "of" + , "off" + , "on" + , "onto" + , "opposite" + , "out" + , "over" + , "past" + , "per" + , "sans" + , "till" + , "to" + , "under" + , "underneath" + , "unlike" + , "unto" + , "up", "upon" + , "versus" + , "via" + , "with" + , "within" + , "without" + ] |
