blob: b700621c3a4de2cbec2fa215cb5851d7d26aafa8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
module Syntax.LexiconFile where
import Base hiding (many)
import Syntax.Adapt
import Syntax.LexicalPhrase
import Syntax.Abstract
import Data.Char (isAlphaNum, isAsciiLower, isLetter, isDigit)
import Data.Text qualified as Text
import Data.Text.IO qualified as Text
import Text.Earley.Mixfix (Holey)
import Text.Megaparsec hiding (Token, Label, label)
import Text.Megaparsec.Char qualified as Char
import UnliftIO.Directory
import System.FilePath
type LexiconFileParser = Parsec Void Text
parseLexiconFile :: IO [ScannedLexicalItem]
parseLexiconFile = do
currentDir <- getCurrentDirectory
let csvPath = (currentDir </> "library" </> "lexicon.csv")
csv <- Text.readFile csvPath
case runParser lexiconFile csvPath csv of
Left err -> fail (errorBundlePretty err)
Right entries -> pure entries
lexiconFile :: LexiconFileParser [ScannedLexicalItem]
lexiconFile = many line <* eof
line :: LexiconFileParser ScannedLexicalItem
line = do
c <- satisfy isAsciiLower
cs <- takeWhileP Nothing (\x -> isAsciiLower x || isDigit x || x == '_')
let marker = Marker (Text.cons c cs)
Char.char ','
kind <- takeWhile1P Nothing isLetter
Char.char ','
item <- case kind of
"adj" -> do
entry <- takeWhile1P Nothing (\x -> isAlphaNum x || x == '\'' || x == '-' || x == ' ')
pure (ScanAdj (unsafeReadPhrase (Text.unpack entry)) marker)
"rel" -> do
entry <- tokenSingle
pure (ScanRelationSymbol entry marker)
"const" -> do
entry <- tokenPattern
pure (ScanFunctionSymbol entry marker)
_ -> error "Unrecognized lexical item kind in lexicon file."
optional Char.eol
pure item
tokenSingle :: LexiconFileParser Token
tokenSingle = Command <$> (single '\\' *> takeWhile1P Nothing (\x -> isAlphaNum x))
-- TODO allow spaces
tokenPattern :: LexiconFileParser (Holey Token)
tokenPattern = some (Just <$> tokenSingle <|> Nothing <$ single '?')
|