diff options
| author | adelon <22380201+adelon@users.noreply.github.com> | 2024-02-10 02:22:14 +0100 |
|---|---|---|
| committer | adelon <22380201+adelon@users.noreply.github.com> | 2024-02-10 02:22:14 +0100 |
| commit | 442d732696ad431b84f6e5c72b6ee785be4fd968 (patch) | |
| tree | b476f395e7e91d67bacb6758bc84914b8711593f /source/Syntax/LexicalPhrase.hs | |
Initial commit
Diffstat (limited to 'source/Syntax/LexicalPhrase.hs')
| -rw-r--r-- | source/Syntax/LexicalPhrase.hs | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/source/Syntax/LexicalPhrase.hs b/source/Syntax/LexicalPhrase.hs new file mode 100644 index 0000000..1743255 --- /dev/null +++ b/source/Syntax/LexicalPhrase.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE NoImplicitPrelude #-} + +module Syntax.LexicalPhrase where + + +import Base +import Syntax.Token (Token(..)) + +import Data.Char (isAlpha) +import Data.Text qualified as Text +import Text.Earley.Mixfix (Holey) +import Text.Earley (Grammar, Prod, (<?>), fullParses, parser, rule, token, satisfy) + + + +-- | 'LexicalPhrase's should be nonempty lists with at least one proper word token. +-- Hyphens and quotes in words are treated as letters. +-- Thus /@manifold-with-boundary@/ is a singleton lexical phrase (one word). +-- +type LexicalPhrase = Holey Token + +-- MAYBE Add this instance by making LexicalPhrase a proper Type? +-- Until then we can use the default instance for lists of prettyprintable things. +-- +-- instance Pretty LexicalPhrase where +-- pretty components = hsep (prettyComponent <$> components) +-- where +-- prettyComponent = \case +-- Nothing -> "_" +-- Just tok -> pretty tok + + + +-- | Split data by grammatical number (singular/plural). +-- The 'Eq' and 'Ord' instances only consider the singular +-- form so that we can prefer known irregular plurals over +-- guessed irregular plurals when inserting items into +-- the 'Lexicon'. +data SgPl a + = SgPl {sg :: a, pl :: a} + deriving (Show, Functor, Generic, Hashable) + +instance Eq a => Eq (SgPl a) where (==) = (==) `on` sg +instance Ord a => Ord (SgPl a) where compare = compare `on` sg + + +unsafeReadPhrase :: String -> LexicalPhrase +unsafeReadPhrase spec = case fst (fullParses (parser lexicalPhraseSpec) spec) of + pat : _ -> pat + _ -> error "unsafeReadPhrase failed" + +unsafeReadPhraseSgPl :: String -> SgPl LexicalPhrase +unsafeReadPhraseSgPl spec = case fst (fullParses (parser lexicalPhraseSpecSgPl) spec) of + pat : _ -> pat + _ -> error "unsafeReadPhraseSgPl failed" + + +lexicalPhraseSpec :: Grammar r (Prod r String Char LexicalPhrase) +lexicalPhraseSpec = do + hole <- rule $ Nothing <$ token '?' <?> "hole" + word <- rule $ Just <$> many (satisfy (\c -> isAlpha c || c == '-')) + space <- rule $ Just . (:[]) <$> token ' ' + segment <- rule $ hole <|> word + rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment) + where + makePhrase :: [Maybe String] -> LexicalPhrase + makePhrase pat = fmap makeWord pat + + +lexicalPhraseSpecSgPl :: Grammar r (Prod r String Char (SgPl LexicalPhrase)) +lexicalPhraseSpecSgPl = do + space <- rule $ Just . (:[]) <$> token ' ' + hole <- rule $ (Nothing, Nothing) <$ token '?'<?> "hole" + + word <- rule (many (satisfy isAlpha) <?> "word") + wordSgPl <- rule $ (,) <$> (token '[' *> word) <* token '/' <*> word <* token ']' + complexWord <- rule $ (\(a,b) -> (Just a, Just b)) . fuse <$> + many ((<>) <$> (dup <$> word) <*> wordSgPl) <?> "word" + segment <- rule (hole <|> (dup . Just <$> word) <|> complexWord ) + rule $ (\s ss -> makePhrase (s:ss)) <$> segment <*> many (space *> segment) + where + dup x = (x,x) + fuse = \case + (a, b) : (c, d) : rest -> fuse ((a <> c, b <> d) : rest) + [(a, b)] -> (a, b) + _ -> error "Syntax.Abstract.fuse" + + makePhrase :: [(Maybe String, Maybe String)] -> SgPl LexicalPhrase + makePhrase = (\(patSg, patPl) -> SgPl (fmap makeWord patSg) (fmap makeWord patPl)) . unzip + +makeWord :: Maybe String -> Maybe Token +makeWord = fmap (Word . Text.pack) |
