summaryrefslogtreecommitdiff
path: root/src/runtime/haskell
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2014-04-09 14:13:18 +0000
committerhallgren <hallgren@chalmers.se>2014-04-09 14:13:18 +0000
commit04a6260eeaf626cf4774d087d8810d438f554b46 (patch)
treea8447466f700f40d0f86246a5b6988b0923dea33 /src/runtime/haskell
parent4479bb81b756767fef32faec2822e2bb74dcb320 (diff)
PGF Service: a bit more clever lexer=text
Only change the first word to lowercase if the original input is not found in the grammar's morphology. This allows parsing of sentenses starting with "I" in English, nouns in German and proper names in other languages, but it can make the wrong choice for multi-words.
Diffstat (limited to 'src/runtime/haskell')
-rw-r--r--src/runtime/haskell/PGF/Lexing.hs14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/runtime/haskell/PGF/Lexing.hs b/src/runtime/haskell/PGF/Lexing.hs
index 808a2af6f..10d8332f7 100644
--- a/src/runtime/haskell/PGF/Lexing.hs
+++ b/src/runtime/haskell/PGF/Lexing.hs
@@ -2,8 +2,13 @@ module PGF.Lexing where
import Data.Char(isSpace,toLower,toUpper)
-- * Text lexing
+-- | Text lexing with standard word capitalization of the first word of every sentence
lexText :: String -> [String]
-lexText = uncap . lext where
+lexText = lexText' uncapitInit
+
+-- | Text lexing with custom treatment of the first word of every sentence.
+lexText' :: (String->String) -> String -> [String]
+lexText' uncap1 = uncap . lext where
lext s = case s of
c:cs | isMajorPunct c -> [c] : uncap (lext cs)
c:cs | isMinorPunct c -> [c] : lext cs
@@ -11,7 +16,7 @@ lexText = uncap . lext where
_:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
_ -> [s]
uncap s = case s of
- (c:cs):ws -> (toLower c : cs):ws
+ w:ws -> uncap1 w:ws
_ -> s
unlexText :: [String] -> String
@@ -78,6 +83,11 @@ capitInit s = case s of
c:cs -> toUpper c : cs
_ -> s
+-- | Uncapitalize first letter
+uncapitInit s = case s of
+ c:cs -> toLower c : cs
+ _ -> s
+
-- | Unquote each string wrapped in double quotes
unquote = map unq where
unq s = case s of