summaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2010-11-30 14:50:24 +0000
committeraarne <aarne@chalmers.se>2010-11-30 14:50:24 +0000
commit752a7b803062b17e675b3617fae4ae08d809c60b (patch)
treef86e25f706f02ad5a6b1ad4d27ac29c762af0c7f /src/compiler
parent667e7e67d3b9f4808fab3d46a83e110e61b1edec (diff)
format .gfm for multiple modules in the same file; includes lines with ;-separated words
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/GF/Command/Commands.hs3
-rw-r--r--src/compiler/GF/Command/Importing.hs7
-rw-r--r--src/compiler/GF/Compile/Multi.hs165
3 files changed, 174 insertions, 1 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 48f9b4f49..d320396d5 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -407,6 +407,9 @@ allCommands env@(pgf, mos) = Map.fromList [
"If a grammar with the same concrete name is already in the state",
"it is overwritten - but only if compilation succeeds.",
"The grammar parser depends on the file name suffix:",
+ " .cf context-free (labelled BNF) source",
+ " .ebnf extended BNF source",
+ " .gfm multi-module GF source",
" .gf normal GF source",
" .gfo compiled GF source",
" .pgf precompiled grammar in Portable Grammar Format"
diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs
index 80f3833ee..3d05868b1 100644
--- a/src/compiler/GF/Command/Importing.hs
+++ b/src/compiler/GF/Command/Importing.hs
@@ -4,6 +4,7 @@ import PGF
import PGF.Data
import GF.Compile
+import GF.Compile.Multi (readMulti)
import GF.Grammar (identC, SourceGrammar) -- for cc command
import GF.Grammar.CF
import GF.Grammar.EBNF
@@ -20,8 +21,12 @@ importGrammar :: PGF -> Options -> [FilePath] -> IO PGF
importGrammar pgf0 _ [] = return pgf0
importGrammar pgf0 opts files =
case takeExtensions (last files) of
- ".cf" -> importCF opts files getCF
+ ".cf" -> importCF opts files getCF
".ebnf" -> importCF opts files getEBNF
+ ".gfm" -> do
+ ascss <- mapM readMulti files
+ let cs = concatMap snd ascss
+ importGrammar pgf0 opts cs
s | elem s [".gf",".gfo"] -> do
res <- appIOE $ compileToPGF opts files
case res of
diff --git a/src/compiler/GF/Compile/Multi.hs b/src/compiler/GF/Compile/Multi.hs
new file mode 100644
index 000000000..59e8ed34c
--- /dev/null
+++ b/src/compiler/GF/Compile/Multi.hs
@@ -0,0 +1,165 @@
+module GF.Compile.Multi (readMulti) where
+
+import Data.List
+import Data.Char
+
+-- AR 29 November 2010
+-- quick way of writing a multilingual lexicon and (with some more work) a grammar
+-- also several modules in one file
+-- file suffix .gfm (GF Multi)
+
+
+{-
+-- This multi-line comment is a possible file in the format.
+-- comments are as in GF, one-liners
+
+-- always start by declaring lang names as follows
+> langs Eng Fin Swe
+
+-- baseline rules: semicolon-separated line-by-line entries update abs and cncs, adding to S
+cheers ; skål ; terveydeksi
+
+-- alternatives within a language are comma-separated
+cheers ; skål ; terveydeksi, kippis
+
+-- more advanced: verbatim abstract rules prefixed by "> abs"
+> abs cat Drink ;
+> abs fun drink : Drink -> S ;
+
+-- verbatim concrete rules prefixed by ">" and comma-separated language list
+> Eng,Swe lin Gin = "gin" ;
+
+-- multiple modules: modules as usual. Each module has to start from a new line.
+-- Should be UTF-8 encoded.
+
+-}
+
+{-
+main = do
+ xx <- getArgs
+ if null xx then putStrLn usage else do
+ let (opts,file) = (init xx, last xx)
+ (absn,cncns) <- readMulti opts file
+ if elem "-pgf" xx
+ then do
+ system ("gf -make -s -optimize-pgf " ++ unwords (map gfFile cncns))
+ putStrLn $ "wrote " ++ absn ++ ".pgf"
+ else return ()
+-}
+
+readMulti :: FilePath -> IO (FilePath,[FilePath])
+readMulti file = do
+ src <- readFile file
+ let multi = getMulti (takeWhile (/='.') file) src
+ absn = absName multi
+ cncns = cncNames multi
+ raws = rawModules multi
+ writeFile (gfFile absn) (absCode multi)
+ mapM_ (uncurry writeFile)
+ [(gfFile cncn, cncCode absn cncn cod) |
+ cncn <- cncNames multi, let cod = [r | (la,r) <- cncRules multi, la == cncn]]
+ putStrLn $ "wrote " ++ unwords (map gfFile (absn:cncns))
+ mapM_ (uncurry writeFile) [(gfFile n,s) | (n,s) <- raws] --- overwrites those above
+ return (gfFile absn, map gfFile cncns)
+
+data Multi = Multi {
+ rawModules :: [(String,String)],
+ absName :: String,
+ cncNames :: [String],
+ startCat :: String,
+ absRules :: [String],
+ cncRules :: [(String,String)] -- lang,lin
+ }
+
+emptyMulti :: Multi
+emptyMulti = Multi {
+ rawModules = [],
+ absName = "Abs",
+ cncNames = [],
+ startCat = "S",
+ absRules = [],
+ cncRules = []
+ }
+
+absCode :: Multi -> String
+absCode multi = unlines $ header : start ++ (reverse (absRules multi)) ++ ["}"] where
+ header = "abstract " ++ absName multi ++ " = {"
+ start = ["flags startcat = " ++ cat ++ " ;", "cat " ++ cat ++ " ;"]
+ cat = startCat multi
+
+cncCode :: String -> String -> [String] -> String
+cncCode ab cnc rules = unlines $ header : (reverse rules ++ ["}"]) where
+ header = "concrete " ++ cnc ++ " of " ++ ab ++ " = {"
+
+getMulti :: String -> String -> Multi
+getMulti m s = foldl (flip addMulti) (emptyMulti{absName = m}) (modlines (lines s))
+
+addMulti :: String -> Multi -> Multi
+addMulti line multi = case line of
+ '-':'-':_ -> multi
+ _ | all isSpace line -> multi
+ '>':s -> case words s of
+ "langs":ws -> let las = [absName multi ++ w | w <- ws] in multi {
+ cncNames = las,
+ cncRules = concat [[(la,"lincat " ++ startCat multi ++ " = Str ;"),
+ (la,"flags coding = utf8 ;")] | la <- las]
+ }
+ "startcat":c:ws -> multi {startCat = c}
+ "abs":ws -> multi {
+ absRules = unwords ws : absRules multi
+ }
+ langs:ws -> multi {
+ cncRules = [(absName multi ++ la, unwords ws) | la <- chop ',' langs] ++ cncRules multi
+ }
+ _ -> case words line of
+ m:name:_ | isModule m -> multi {
+ rawModules = (name,line):rawModules multi
+ }
+ _ -> let (cat,fun,lins) = getRules (startCat multi) line in
+ multi {
+ absRules = ("fun " ++ fun ++ " : " ++ cat ++ " ;") : absRules multi,
+ cncRules = zip (cncNames multi) lins ++ cncRules multi
+ }
+
+getRules :: String -> String -> (String,String,[String])
+getRules cat line = (cat, fun, map lin rss) where
+ rss = map (map unspace . chop ',') $ chop ';' line
+ fun = map idChar (head (head rss)) ++ "_" ++ cat
+ lin rs = "lin " ++ fun ++ " = " ++ unwords (intersperse "|" (map quote rs)) ++ " ;"
+
+chop :: Eq c => c -> [c] -> [[c]]
+chop c cs = case break (==c) cs of
+ (w,_:cs2) -> w : chop c cs2
+ ([],[]) -> []
+ (w,_) -> [w]
+
+-- remove spaces from beginning and end, leave them in the middle
+unspace :: String -> String
+unspace = unwords . words
+
+quote :: String -> String
+quote r = "\"" ++ r ++ "\""
+
+-- to guarantee that the char can be used in an ident
+idChar :: Char -> Char
+idChar c =
+ if (n > 47 && n < 58) || (n > 64 && n < 91) || (n > 96 && n < 123)
+ then c
+ else '_'
+ where n = fromEnum c
+
+
+gfFile :: FilePath -> FilePath
+gfFile f = f ++ ".gf"
+
+isModule :: String -> Bool
+isModule = flip elem
+ ["abstract","concrete","incomplete","instance","interface","resource"]
+
+modlines :: [String] -> [String]
+modlines ss = case ss of
+ l:ls -> case words l of
+ w:_ | isModule w -> case break (isModule . concat . take 1 . words) ls of
+ (ms,rest) -> unlines (l:ms) : modlines rest
+ _ -> l : modlines ls
+ _ -> []