diff options
| author | adelon <22380201+adelon@users.noreply.github.com> | 2024-05-07 16:01:51 +0200 |
|---|---|---|
| committer | adelon <22380201+adelon@users.noreply.github.com> | 2024-05-07 16:01:51 +0200 |
| commit | bc2ea0a384548aab50991c4de365f1afbad9a284 (patch) | |
| tree | 27595cd55d7c99be1f3267956dd2dd32bf9b1311 /source | |
| parent | ec26071e4c57ca86a5df92bbef228fb73467450d (diff) | |
Sketch lexicon mechanism
Diffstat (limited to 'source')
| -rw-r--r-- | source/Api.hs | 10 | ||||
| -rw-r--r-- | source/Syntax/LexiconFile.hs | 58 |
2 files changed, 64 insertions, 4 deletions
diff --git a/source/Api.hs b/source/Api.hs index ac277f5..a8f6148 100644 --- a/source/Api.hs +++ b/source/Api.hs @@ -36,21 +36,22 @@ import Base import Checking import Checking.Cache import Encoding +import Filter(filterTask) import Meaning (meaning, GlossError(..)) +import Megalodon qualified import Provers import Syntax.Abstract qualified as Raw import Syntax.Adapt (adaptChunks, scanChunk, ScannedLexicalItem) +import Syntax.Chunk import Syntax.Concrete import Syntax.Import -import Syntax.Chunk import Syntax.Internal qualified as Internal import Syntax.Lexicon (Lexicon, builtins) +import Syntax.LexiconFile import Syntax.Token import TheoryGraph (TheoryGraph, Precedes(..)) import TheoryGraph qualified import Tptp.UnsortedFirstOrder qualified as Tptp -import Filter(filterTask) -import Megalodon qualified import Control.Monad.Logger import Data.List (intercalate) @@ -120,7 +121,8 @@ scan :: MonadIO io => FilePath -> io [ScannedLexicalItem] scan input = do tokenStream <- tokenize input let chunks = chunkify (unTokStream tokenStream) - pure (concatMap scanChunk chunks) + items <- liftIO parseLexiconFile + pure ((concatMap scanChunk chunks) <> items) -- | Parse a file. Throws a 'ParseException' when tokenizing, scanning, or diff --git a/source/Syntax/LexiconFile.hs b/source/Syntax/LexiconFile.hs new file mode 100644 index 0000000..be70a6b --- /dev/null +++ b/source/Syntax/LexiconFile.hs @@ -0,0 +1,58 @@ +module Syntax.LexiconFile where + +import Base hiding (many) +import Syntax.Adapt +import Syntax.LexicalPhrase +import Syntax.Abstract + +import Data.Char (isAlphaNum, isAsciiLower, isLetter, isDigit) +import Data.Text qualified as Text +import Data.Text.IO qualified as Text +import Text.Earley.Mixfix (Holey) +import Text.Megaparsec hiding (Token, Label, label) +import Text.Megaparsec.Char qualified as Char +import UnliftIO.Directory + + +type LexiconFileParser = Parsec Void Text + +parseLexiconFile :: IO [ScannedLexicalItem] +parseLexiconFile = do + currentDir <- getCurrentDirectory + let csvPath = (currentDir <> "lexicon.csv") + csv <- Text.readFile csvPath + case runParser lexiconFile csvPath csv of + Left err -> fail (errorBundlePretty err) + Right entries -> pure entries + +lexiconFile :: LexiconFileParser [ScannedLexicalItem] +lexiconFile = many line <* eof + +line :: LexiconFileParser ScannedLexicalItem +line = do + c <- satisfy isAsciiLower + cs <- takeWhileP Nothing (\x -> isAsciiLower x || isDigit x || x == '_') + let marker = Marker (Text.cons c cs) + Char.char ',' + kind <- takeWhile1P Nothing isLetter + Char.char ',' + item <- case kind of + "adj" -> do + entry <- takeWhile1P Nothing (\x -> isAlphaNum x || x == '\'' || x == '-' || x == ' ') + pure (ScanAdj (unsafeReadPhrase (Text.unpack entry)) marker) + "rel" -> do + entry <- tokenSingle + pure (ScanRelationSymbol entry marker) + "const" -> do + entry <- tokenPattern + pure (ScanFunctionSymbol entry marker) + _ -> error "Unrecognized lexical item kind in lexicon file." + optional Char.eol + pure item + +tokenSingle :: LexiconFileParser Token +tokenSingle = Command <$> (single '\\' *> takeWhile1P Nothing (\x -> isAlphaNum x)) + +-- TODO allow spaces +tokenPattern :: LexiconFileParser (Holey Token) +tokenPattern = some (Just <$> tokenSingle <|> Nothing <$ single '?') |
