summaryrefslogtreecommitdiff
path: root/source/Syntax/LexiconFile.hs
blob: b700621c3a4de2cbec2fa215cb5851d7d26aafa8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
module Syntax.LexiconFile where

import Base hiding (many)
import Syntax.Adapt
import Syntax.LexicalPhrase
import Syntax.Abstract

import Data.Char (isAlphaNum, isAsciiLower, isLetter, isDigit)
import Data.Text qualified as Text
import Data.Text.IO qualified as Text
import Text.Earley.Mixfix (Holey)
import Text.Megaparsec hiding (Token, Label, label)
import Text.Megaparsec.Char qualified as Char
import UnliftIO.Directory
import System.FilePath

type LexiconFileParser = Parsec Void Text

parseLexiconFile :: IO [ScannedLexicalItem]
parseLexiconFile = do
    currentDir  <- getCurrentDirectory
    let csvPath = (currentDir </> "library" </> "lexicon.csv")
    csv <- Text.readFile csvPath
    case runParser lexiconFile csvPath csv of
        Left err -> fail (errorBundlePretty err)
        Right entries -> pure entries

lexiconFile :: LexiconFileParser [ScannedLexicalItem]
lexiconFile = many line <* eof

line :: LexiconFileParser ScannedLexicalItem
line = do
    c <- satisfy isAsciiLower
    cs <- takeWhileP Nothing (\x -> isAsciiLower x || isDigit x || x == '_')
    let marker = Marker (Text.cons c cs)
    Char.char ','
    kind <- takeWhile1P Nothing isLetter
    Char.char ','
    item <- case kind of
        "adj" -> do
            entry <- takeWhile1P Nothing (\x -> isAlphaNum x || x == '\'' || x == '-' || x == ' ')
            pure (ScanAdj (unsafeReadPhrase (Text.unpack entry)) marker)
        "rel" -> do
            entry <- tokenSingle
            pure (ScanRelationSymbol entry marker)
        "const" -> do
            entry <- tokenPattern
            pure (ScanFunctionSymbol entry marker)
        _ -> error "Unrecognized lexical item kind in lexicon file."
    optional Char.eol
    pure item

tokenSingle :: LexiconFileParser Token
tokenSingle = Command <$> (single '\\' *> takeWhile1P Nothing (\x -> isAlphaNum x))

-- TODO allow spaces
tokenPattern :: LexiconFileParser (Holey Token)
tokenPattern = some (Just <$> tokenSingle <|> Nothing <$ single '?')