summaryrefslogtreecommitdiff
path: root/source/Syntax/LexicalPhrase.hs
diff options
context:
space:
mode:
authoradelon <22380201+adelon@users.noreply.github.com>2024-02-10 02:22:14 +0100
committeradelon <22380201+adelon@users.noreply.github.com>2024-02-10 02:22:14 +0100
commit442d732696ad431b84f6e5c72b6ee785be4fd968 (patch)
treeb476f395e7e91d67bacb6758bc84914b8711593f /source/Syntax/LexicalPhrase.hs
Initial commit
Diffstat (limited to 'source/Syntax/LexicalPhrase.hs')
-rw-r--r--source/Syntax/LexicalPhrase.hs93
1 files changed, 93 insertions, 0 deletions
diff --git a/source/Syntax/LexicalPhrase.hs b/source/Syntax/LexicalPhrase.hs
new file mode 100644
index 0000000..1743255
--- /dev/null
+++ b/source/Syntax/LexicalPhrase.hs
@@ -0,0 +1,93 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Syntax.LexicalPhrase where
+
+
+import Base
+import Syntax.Token (Token(..))
+
+import Data.Char (isAlpha)
+import Data.Text qualified as Text
+import Text.Earley.Mixfix (Holey)
+import Text.Earley (Grammar, Prod, (<?>), fullParses, parser, rule, token, satisfy)
+
+
+
+-- | 'LexicalPhrase's should be nonempty lists with at least one proper word token.
+-- Hyphens and quotes in words are treated as letters.
+-- Thus /@manifold-with-boundary@/ is a singleton lexical phrase (one word).
+--
+type LexicalPhrase = Holey Token
+
+-- MAYBE Add this instance by making LexicalPhrase a proper Type?
+-- Until then we can use the default instance for lists of prettyprintable things.
+--
+-- instance Pretty LexicalPhrase where
+-- pretty components = hsep (prettyComponent <$> components)
+-- where
+-- prettyComponent = \case
+-- Nothing -> "_"
+-- Just tok -> pretty tok
+
+
+
+-- | Split data by grammatical number (singular/plural).
+-- The 'Eq' and 'Ord' instances only consider the singular
+-- form so that we can prefer known irregular plurals over
+-- guessed irregular plurals when inserting items into
+-- the 'Lexicon'.
+data SgPl a
+ = SgPl {sg :: a, pl :: a}
+ deriving (Show, Functor, Generic, Hashable)
+
+instance Eq a => Eq (SgPl a) where (==) = (==) `on` sg
+instance Ord a => Ord (SgPl a) where compare = compare `on` sg
+
+
+unsafeReadPhrase :: String -> LexicalPhrase
+unsafeReadPhrase spec = case fst (fullParses (parser lexicalPhraseSpec) spec) of
+ pat : _ -> pat
+ _ -> error "unsafeReadPhrase failed"
+
+unsafeReadPhraseSgPl :: String -> SgPl LexicalPhrase
+unsafeReadPhraseSgPl spec = case fst (fullParses (parser lexicalPhraseSpecSgPl) spec) of
+ pat : _ -> pat
+ _ -> error "unsafeReadPhraseSgPl failed"
+
+
+lexicalPhraseSpec :: Grammar r (Prod r String Char LexicalPhrase)
+lexicalPhraseSpec = do
+ hole <- rule $ Nothing <$ token '?' <?> "hole"
+ word <- rule $ Just <$> many (satisfy (\c -> isAlpha c || c == '-'))
+ space <- rule $ Just . (:[]) <$> token ' '
+ segment <- rule $ hole <|> word
+ rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
+ where
+ makePhrase :: [Maybe String] -> LexicalPhrase
+ makePhrase pat = fmap makeWord pat
+
+
+lexicalPhraseSpecSgPl :: Grammar r (Prod r String Char (SgPl LexicalPhrase))
+lexicalPhraseSpecSgPl = do
+ space <- rule $ Just . (:[]) <$> token ' '
+ hole <- rule $ (Nothing, Nothing) <$ token '?'<?> "hole"
+
+ word <- rule (many (satisfy isAlpha) <?> "word")
+ wordSgPl <- rule $ (,) <$> (token '[' *> word) <* token '/' <*> word <* token ']'
+ complexWord <- rule $ (\(a,b) -> (Just a, Just b)) . fuse <$>
+ many ((<>) <$> (dup <$> word) <*> wordSgPl) <?> "word"
+ segment <- rule (hole <|> (dup . Just <$> word) <|> complexWord )
+ rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment)
+ where
+ dup x = (x,x)
+ fuse = \case
+ (a, b) : (c, d) : rest -> fuse ((a <> c, b <> d) : rest)
+ [(a, b)] -> (a, b)
+ _ -> error "Syntax.Abstract.fuse"
+
+ makePhrase :: [(Maybe String, Maybe String)] -> SgPl LexicalPhrase
+ makePhrase = (\(patSg, patPl) -> SgPl (fmap makeWord patSg) (fmap makeWord patPl)) . unzip
+
+makeWord :: Maybe String -> Maybe Token
+makeWord = fmap (Word . Text.pack)