1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Syntax.LexicalPhrase where
import Base
import Syntax.Token (Token(..))
import Data.Char (isAlpha)
import Data.Text qualified as Text
import Text.Earley.Mixfix (Holey)
import Text.Earley (Grammar, Prod, (<?>), fullParses, parser, rule, token, satisfy)
-- | 'LexicalPhrase's should be nonempty lists with at least one proper word token.
-- Hyphens and quotes in words are treated as letters.
-- Thus /@manifold-with-boundary@/ is a singleton lexical phrase (one word).
--
type LexicalPhrase = Holey Token
-- MAYBE Add this instance by making LexicalPhrase a proper Type?
-- Until then we can use the default instance for lists of prettyprintable things.
--
-- instance Pretty LexicalPhrase where
-- pretty components = hsep (prettyComponent <$> components)
-- where
-- prettyComponent = \case
-- Nothing -> "_"
-- Just tok -> pretty tok
-- | Split data by grammatical number (singular/plural).
-- The 'Eq' and 'Ord' instances only consider the singular
-- form so that we can prefer known irregular plurals over
-- guessed irregular plurals when inserting items into
-- the 'Lexicon'.
data SgPl a
= SgPl {sg :: a, pl :: a}
deriving (Show, Functor, Generic, Hashable)
instance Eq a => Eq (SgPl a) where (==) = (==) `on` sg
instance Ord a => Ord (SgPl a) where compare = compare `on` sg
unsafeReadPhrase :: String -> LexicalPhrase
unsafeReadPhrase spec = case fst (fullParses (parser lexicalPhraseSpec) spec) of
pat : _ -> pat
_ -> error "unsafeReadPhrase failed"
unsafeReadPhraseSgPl :: String -> SgPl LexicalPhrase
unsafeReadPhraseSgPl spec = case fst (fullParses (parser lexicalPhraseSpecSgPl) spec) of
pat : _ -> pat
_ -> error "unsafeReadPhraseSgPl failed"
lexicalPhraseSpec :: Grammar r (Prod r String Char LexicalPhrase)
lexicalPhraseSpec = do
hole <- rule $ Nothing <$ token '?' <?> "hole"
word <- rule $ Just <$> many (satisfy (\c -> isAlpha c || c == '-'))
space <- rule $ Just . (:[]) <$> token ' '
segment <- rule $ hole <|> word
rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
where
makePhrase :: [Maybe String] -> LexicalPhrase
makePhrase pat = fmap makeWord pat
lexicalPhraseSpecSgPl :: Grammar r (Prod r String Char (SgPl LexicalPhrase))
lexicalPhraseSpecSgPl = do
space <- rule $ Just . (:[]) <$> token ' '
hole <- rule $ (Nothing, Nothing) <$ token '?'<?> "hole"
word <- rule (many (satisfy isAlpha) <?> "word")
wordSgPl <- rule $ (,) <$> (token '[' *> word) <* token '/' <*> word <* token ']'
complexWord <- rule $ (\(a,b) -> (Just a, Just b)) . fuse <$>
many ((<>) <$> (dup <$> word) <*> wordSgPl) <?> "word"
segment <- rule (hole <|> (dup . Just <$> word) <|> complexWord )
rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
where
dup x = (x,x)
fuse = \case
(a, b) : (c, d) : rest -> fuse ((a <> c, b <> d) : rest)
[(a, b)] -> (a, b)
_ -> error "Syntax.Abstract.fuse"
makePhrase :: [(Maybe String, Maybe String)] -> SgPl LexicalPhrase
makePhrase = (\(patSg, patPl) -> SgPl (fmap makeWord patSg) (fmap makeWord patPl)) . unzip
makeWord :: Maybe String -> Maybe Token
makeWord = fmap (Word . Text.pack)
|