summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authoradelon <22380201+adelon@users.noreply.github.com>2024-05-07 16:01:51 +0200
committeradelon <22380201+adelon@users.noreply.github.com>2024-05-07 16:01:51 +0200
commitbc2ea0a384548aab50991c4de365f1afbad9a284 (patch)
tree27595cd55d7c99be1f3267956dd2dd32bf9b1311 /source
parentec26071e4c57ca86a5df92bbef228fb73467450d (diff)
Sketch lexicon mechanism
Diffstat (limited to 'source')
-rw-r--r--source/Api.hs10
-rw-r--r--source/Syntax/LexiconFile.hs58
2 files changed, 64 insertions, 4 deletions
diff --git a/source/Api.hs b/source/Api.hs
index ac277f5..a8f6148 100644
--- a/source/Api.hs
+++ b/source/Api.hs
@@ -36,21 +36,22 @@ import Base
import Checking
import Checking.Cache
import Encoding
+import Filter(filterTask)
import Meaning (meaning, GlossError(..))
+import Megalodon qualified
import Provers
import Syntax.Abstract qualified as Raw
import Syntax.Adapt (adaptChunks, scanChunk, ScannedLexicalItem)
+import Syntax.Chunk
import Syntax.Concrete
import Syntax.Import
-import Syntax.Chunk
import Syntax.Internal qualified as Internal
import Syntax.Lexicon (Lexicon, builtins)
+import Syntax.LexiconFile
import Syntax.Token
import TheoryGraph (TheoryGraph, Precedes(..))
import TheoryGraph qualified
import Tptp.UnsortedFirstOrder qualified as Tptp
-import Filter(filterTask)
-import Megalodon qualified
import Control.Monad.Logger
import Data.List (intercalate)
@@ -120,7 +121,8 @@ scan :: MonadIO io => FilePath -> io [ScannedLexicalItem]
scan input = do
tokenStream <- tokenize input
let chunks = chunkify (unTokStream tokenStream)
- pure (concatMap scanChunk chunks)
+ items <- liftIO parseLexiconFile
+ pure ((concatMap scanChunk chunks) <> items)
-- | Parse a file. Throws a 'ParseException' when tokenizing, scanning, or
diff --git a/source/Syntax/LexiconFile.hs b/source/Syntax/LexiconFile.hs
new file mode 100644
index 0000000..be70a6b
--- /dev/null
+++ b/source/Syntax/LexiconFile.hs
@@ -0,0 +1,58 @@
+module Syntax.LexiconFile where
+
+import Base hiding (many)
+import Syntax.Adapt
+import Syntax.LexicalPhrase
+import Syntax.Abstract
+
+import Data.Char (isAlphaNum, isAsciiLower, isLetter, isDigit)
+import Data.Text qualified as Text
+import Data.Text.IO qualified as Text
+import Text.Earley.Mixfix (Holey)
+import Text.Megaparsec hiding (Token, Label, label)
+import Text.Megaparsec.Char qualified as Char
+import UnliftIO.Directory
+
+
+type LexiconFileParser = Parsec Void Text
+
+parseLexiconFile :: IO [ScannedLexicalItem]
+parseLexiconFile = do
+ currentDir <- getCurrentDirectory
+ let csvPath = (currentDir <> "lexicon.csv")
+ csv <- Text.readFile csvPath
+ case runParser lexiconFile csvPath csv of
+ Left err -> fail (errorBundlePretty err)
+ Right entries -> pure entries
+
+lexiconFile :: LexiconFileParser [ScannedLexicalItem]
+lexiconFile = many line <* eof
+
+line :: LexiconFileParser ScannedLexicalItem
+line = do
+ c <- satisfy isAsciiLower
+ cs <- takeWhileP Nothing (\x -> isAsciiLower x || isDigit x || x == '_')
+ let marker = Marker (Text.cons c cs)
+ Char.char ','
+ kind <- takeWhile1P Nothing isLetter
+ Char.char ','
+ item <- case kind of
+ "adj" -> do
+ entry <- takeWhile1P Nothing (\x -> isAlphaNum x || x == '\'' || x == '-' || x == ' ')
+ pure (ScanAdj (unsafeReadPhrase (Text.unpack entry)) marker)
+ "rel" -> do
+ entry <- tokenSingle
+ pure (ScanRelationSymbol entry marker)
+ "const" -> do
+ entry <- tokenPattern
+ pure (ScanFunctionSymbol entry marker)
+ _ -> error "Unrecognized lexical item kind in lexicon file."
+ optional Char.eol
+ pure item
+
+tokenSingle :: LexiconFileParser Token
+tokenSingle = Command <$> (single '\\' *> takeWhile1P Nothing (\x -> isAlphaNum x))
+
+-- TODO allow spaces
+tokenPattern :: LexiconFileParser (Holey Token)
+tokenPattern = some (Just <$> tokenSingle <|> Nothing <$ single '?')