diff options
| author | hallgren <hallgren@chalmers.se> | 2014-04-09 14:13:18 +0000 |
|---|---|---|
| committer | hallgren <hallgren@chalmers.se> | 2014-04-09 14:13:18 +0000 |
| commit | 04a6260eeaf626cf4774d087d8810d438f554b46 (patch) | |
| tree | a8447466f700f40d0f86246a5b6988b0923dea33 /src/runtime | |
| parent | 4479bb81b756767fef32faec2822e2bb74dcb320 (diff) | |
PGF Service: a bit more clever lexer=text
Only change the first word to lowercase if the original input is not found in
the grammar's morphology. This allows parsing of sentenses starting with "I" in
English, nouns in German and proper names in other languages, but it can make
the wrong choice for multi-words.
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/haskell/PGF/Lexing.hs | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/src/runtime/haskell/PGF/Lexing.hs b/src/runtime/haskell/PGF/Lexing.hs index 808a2af6f..10d8332f7 100644 --- a/src/runtime/haskell/PGF/Lexing.hs +++ b/src/runtime/haskell/PGF/Lexing.hs @@ -2,8 +2,13 @@ module PGF.Lexing where import Data.Char(isSpace,toLower,toUpper) -- * Text lexing +-- | Text lexing with standard word capitalization of the first word of every sentence lexText :: String -> [String] -lexText = uncap . lext where +lexText = lexText' uncapitInit + +-- | Text lexing with custom treatment of the first word of every sentence. +lexText' :: (String->String) -> String -> [String] +lexText' uncap1 = uncap . lext where lext s = case s of c:cs | isMajorPunct c -> [c] : uncap (lext cs) c:cs | isMinorPunct c -> [c] : lext cs @@ -11,7 +16,7 @@ lexText = uncap . lext where _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs _ -> [s] uncap s = case s of - (c:cs):ws -> (toLower c : cs):ws + w:ws -> uncap1 w:ws _ -> s unlexText :: [String] -> String @@ -78,6 +83,11 @@ capitInit s = case s of c:cs -> toUpper c : cs _ -> s +-- | Uncapitalize first letter +uncapitInit s = case s of + c:cs -> toLower c : cs + _ -> s + -- | Unquote each string wrapped in double quotes unquote = map unq where unq s = case s of |
