From 752a7b803062b17e675b3617fae4ae08d809c60b Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 30 Nov 2010 14:50:24 +0000 Subject: format .gfm for multiple modules in the same file; includes lines with ;-separated words --- src/compiler/GF/Command/Commands.hs | 3 + src/compiler/GF/Command/Importing.hs | 7 +- src/compiler/GF/Compile/Multi.hs | 165 +++++++++++++++++++++++++++++++++++ src/tools/Koe.gfm | 12 +++ src/tools/Koe.multi | 12 --- src/tools/Multi.hs | 137 ----------------------------- 6 files changed, 186 insertions(+), 150 deletions(-) create mode 100644 src/compiler/GF/Compile/Multi.hs create mode 100644 src/tools/Koe.gfm delete mode 100644 src/tools/Koe.multi delete mode 100644 src/tools/Multi.hs (limited to 'src') diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 48f9b4f49..d320396d5 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -407,6 +407,9 @@ allCommands env@(pgf, mos) = Map.fromList [ "If a grammar with the same concrete name is already in the state", "it is overwritten - but only if compilation succeeds.", "The grammar parser depends on the file name suffix:", + " .cf context-free (labelled BNF) source", + " .ebnf extended BNF source", + " .gfm multi-module GF source", " .gf normal GF source", " .gfo compiled GF source", " .pgf precompiled grammar in Portable Grammar Format" diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index 80f3833ee..3d05868b1 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -4,6 +4,7 @@ import PGF import PGF.Data import GF.Compile +import GF.Compile.Multi (readMulti) import GF.Grammar (identC, SourceGrammar) -- for cc command import GF.Grammar.CF import GF.Grammar.EBNF @@ -20,8 +21,12 @@ importGrammar :: PGF -> Options -> [FilePath] -> IO PGF importGrammar pgf0 _ [] = return pgf0 importGrammar pgf0 opts files = case takeExtensions (last files) of - ".cf" -> importCF opts files getCF + ".cf" -> importCF opts files getCF ".ebnf" -> importCF opts files getEBNF + ".gfm" -> do + ascss <- mapM readMulti files + let cs = concatMap snd ascss + importGrammar pgf0 opts cs s | elem s [".gf",".gfo"] -> do res <- appIOE $ compileToPGF opts files case res of diff --git a/src/compiler/GF/Compile/Multi.hs b/src/compiler/GF/Compile/Multi.hs new file mode 100644 index 000000000..59e8ed34c --- /dev/null +++ b/src/compiler/GF/Compile/Multi.hs @@ -0,0 +1,165 @@ +module GF.Compile.Multi (readMulti) where + +import Data.List +import Data.Char + +-- AR 29 November 2010 +-- quick way of writing a multilingual lexicon and (with some more work) a grammar +-- also several modules in one file +-- file suffix .gfm (GF Multi) + + +{- +-- This multi-line comment is a possible file in the format. +-- comments are as in GF, one-liners + +-- always start by declaring lang names as follows +> langs Eng Fin Swe + +-- baseline rules: semicolon-separated line-by-line entries update abs and cncs, adding to S +cheers ; skål ; terveydeksi + +-- alternatives within a language are comma-separated +cheers ; skål ; terveydeksi, kippis + +-- more advanced: verbatim abstract rules prefixed by "> abs" +> abs cat Drink ; +> abs fun drink : Drink -> S ; + +-- verbatim concrete rules prefixed by ">" and comma-separated language list +> Eng,Swe lin Gin = "gin" ; + +-- multiple modules: modules as usual. Each module has to start from a new line. +-- Should be UTF-8 encoded. + +-} + +{- +main = do + xx <- getArgs + if null xx then putStrLn usage else do + let (opts,file) = (init xx, last xx) + (absn,cncns) <- readMulti opts file + if elem "-pgf" xx + then do + system ("gf -make -s -optimize-pgf " ++ unwords (map gfFile cncns)) + putStrLn $ "wrote " ++ absn ++ ".pgf" + else return () +-} + +readMulti :: FilePath -> IO (FilePath,[FilePath]) +readMulti file = do + src <- readFile file + let multi = getMulti (takeWhile (/='.') file) src + absn = absName multi + cncns = cncNames multi + raws = rawModules multi + writeFile (gfFile absn) (absCode multi) + mapM_ (uncurry writeFile) + [(gfFile cncn, cncCode absn cncn cod) | + cncn <- cncNames multi, let cod = [r | (la,r) <- cncRules multi, la == cncn]] + putStrLn $ "wrote " ++ unwords (map gfFile (absn:cncns)) + mapM_ (uncurry writeFile) [(gfFile n,s) | (n,s) <- raws] --- overwrites those above + return (gfFile absn, map gfFile cncns) + +data Multi = Multi { + rawModules :: [(String,String)], + absName :: String, + cncNames :: [String], + startCat :: String, + absRules :: [String], + cncRules :: [(String,String)] -- lang,lin + } + +emptyMulti :: Multi +emptyMulti = Multi { + rawModules = [], + absName = "Abs", + cncNames = [], + startCat = "S", + absRules = [], + cncRules = [] + } + +absCode :: Multi -> String +absCode multi = unlines $ header : start ++ (reverse (absRules multi)) ++ ["}"] where + header = "abstract " ++ absName multi ++ " = {" + start = ["flags startcat = " ++ cat ++ " ;", "cat " ++ cat ++ " ;"] + cat = startCat multi + +cncCode :: String -> String -> [String] -> String +cncCode ab cnc rules = unlines $ header : (reverse rules ++ ["}"]) where + header = "concrete " ++ cnc ++ " of " ++ ab ++ " = {" + +getMulti :: String -> String -> Multi +getMulti m s = foldl (flip addMulti) (emptyMulti{absName = m}) (modlines (lines s)) + +addMulti :: String -> Multi -> Multi +addMulti line multi = case line of + '-':'-':_ -> multi + _ | all isSpace line -> multi + '>':s -> case words s of + "langs":ws -> let las = [absName multi ++ w | w <- ws] in multi { + cncNames = las, + cncRules = concat [[(la,"lincat " ++ startCat multi ++ " = Str ;"), + (la,"flags coding = utf8 ;")] | la <- las] + } + "startcat":c:ws -> multi {startCat = c} + "abs":ws -> multi { + absRules = unwords ws : absRules multi + } + langs:ws -> multi { + cncRules = [(absName multi ++ la, unwords ws) | la <- chop ',' langs] ++ cncRules multi + } + _ -> case words line of + m:name:_ | isModule m -> multi { + rawModules = (name,line):rawModules multi + } + _ -> let (cat,fun,lins) = getRules (startCat multi) line in + multi { + absRules = ("fun " ++ fun ++ " : " ++ cat ++ " ;") : absRules multi, + cncRules = zip (cncNames multi) lins ++ cncRules multi + } + +getRules :: String -> String -> (String,String,[String]) +getRules cat line = (cat, fun, map lin rss) where + rss = map (map unspace . chop ',') $ chop ';' line + fun = map idChar (head (head rss)) ++ "_" ++ cat + lin rs = "lin " ++ fun ++ " = " ++ unwords (intersperse "|" (map quote rs)) ++ " ;" + +chop :: Eq c => c -> [c] -> [[c]] +chop c cs = case break (==c) cs of + (w,_:cs2) -> w : chop c cs2 + ([],[]) -> [] + (w,_) -> [w] + +-- remove spaces from beginning and end, leave them in the middle +unspace :: String -> String +unspace = unwords . words + +quote :: String -> String +quote r = "\"" ++ r ++ "\"" + +-- to guarantee that the char can be used in an ident +idChar :: Char -> Char +idChar c = + if (n > 47 && n < 58) || (n > 64 && n < 91) || (n > 96 && n < 123) + then c + else '_' + where n = fromEnum c + + +gfFile :: FilePath -> FilePath +gfFile f = f ++ ".gf" + +isModule :: String -> Bool +isModule = flip elem + ["abstract","concrete","incomplete","instance","interface","resource"] + +modlines :: [String] -> [String] +modlines ss = case ss of + l:ls -> case words l of + w:_ | isModule w -> case break (isModule . concat . take 1 . words) ls of + (ms,rest) -> unlines (l:ms) : modlines rest + _ -> l : modlines ls + _ -> [] diff --git a/src/tools/Koe.gfm b/src/tools/Koe.gfm new file mode 100644 index 000000000..d8fed0e53 --- /dev/null +++ b/src/tools/Koe.gfm @@ -0,0 +1,12 @@ +-- baseline + +> langs Eng Swe Fin +house ; hus ; talo +car, automobile ; bil ; auto +man ; man ; mies +girl ; flicka, tjej ; tyttö +technical university ; teknisk högskola ; teknillinen korkeakoulu + + + + diff --git a/src/tools/Koe.multi b/src/tools/Koe.multi deleted file mode 100644 index d8fed0e53..000000000 --- a/src/tools/Koe.multi +++ /dev/null @@ -1,12 +0,0 @@ --- baseline - -> langs Eng Swe Fin -house ; hus ; talo -car, automobile ; bil ; auto -man ; man ; mies -girl ; flicka, tjej ; tyttö -technical university ; teknisk högskola ; teknillinen korkeakoulu - - - - diff --git a/src/tools/Multi.hs b/src/tools/Multi.hs deleted file mode 100644 index c45f06b41..000000000 --- a/src/tools/Multi.hs +++ /dev/null @@ -1,137 +0,0 @@ -module Main where - -import List -import Char -import System - --- quick way of writing a multilingual lexicon and (with some more work) a grammar - -usage = "usage: runghc Multi (-pgf)? file" - -{- --- This multi-line comment is a possible file in the format. --- comments are as in GF, one-liners - --- always start by declaring lang names as follows -> langs Eng Fin Swe - --- baseline rules: semicolon-separated line-by-line entries update abs and cncs, adding to S -cheers ; skål ; terveydeksi - --- alternatives within a language are comma-separated -cheers ; skål ; terveydeksi, kippis - --- more advanced: verbatim abstract rules prefixed by "> abs" -> abs cat Drink ; -> abs fun drink : Drink -> S ; - --- verbatim concrete rules prefixed by ">" and comma-separated language list -> Eng,Swe lin Gin = "gin" ; - --} - - -main = do - xx <- getArgs - if null xx putStrLn usage else do - let (opts,file) = (init xx, last xx) - src <- readFile file - let multi = getMulti (takeWhile (/='.') file) src - absn = absName multi - cncns = cncNames multi - writeFile (gfFile absn) (absCode multi) - mapM_ (uncurry writeFile) - [(gfFile cncn, cncCode absn cncn cod) | - cncn <- cncNames multi, let cod = [r | (la,r) <- cncRules multi, la == cncn]] - putStrLn $ "wrote " ++ unwords (map gfFile (absn:cncns)) - if elem "-pgf" xx - then do - system ("gf -make -s -optimize-pgf " ++ unwords (map gfFile cncns)) - putStrLn $ "wrote " ++ absn ++ ".pgf" - else return () - -data Multi = Multi { - absName :: String, - cncNames :: [String], - startCat :: String, - absRules :: [String], - cncRules :: [(String,String)] -- lang,lin - } - -emptyMulti :: Multi -emptyMulti = Multi { - absName = "Abs", - cncNames = [], - startCat = "S", - absRules = [], - cncRules = [] - } - -absCode :: Multi -> String -absCode multi = unlines $ header : start ++ (reverse (absRules multi)) ++ ["}"] where - header = "abstract " ++ absName multi ++ " = {" - start = ["flags startcat = " ++ cat ++ " ;", "cat " ++ cat ++ " ;"] - cat = startCat multi - -cncCode :: String -> String -> [String] -> String -cncCode ab cnc rules = unlines $ header : (reverse rules ++ ["}"]) where - header = "concrete " ++ cnc ++ " of " ++ ab ++ " = {" - -getMulti :: String -> String -> Multi -getMulti m s = foldl (flip addMulti) (emptyMulti{absName = m}) (lines s) - -addMulti :: String -> Multi -> Multi -addMulti line multi = case line of - '-':'-':_ -> multi - _ | all isSpace line -> multi - '>':s -> case words s of - "langs":ws -> let las = [absName multi ++ w | w <- ws] in multi { - cncNames = las, - cncRules = concat [[(la,"lincat " ++ startCat multi ++ " = Str ;"), - (la,"flags coding = utf8 ;")] | la <- las] - } - "startcat":c:ws -> multi {startCat = c} - "abs":ws -> multi { - absRules = unwords ws : absRules multi - } - langs:ws -> multi { - cncRules = [(absName multi ++ la, unwords ws) | la <- chop ',' langs] ++ cncRules multi - } - _ -> let (cat,fun,lins) = getRules (startCat multi) line - in multi { - absRules = ("fun " ++ fun ++ " : " ++ cat ++ " ;") : absRules multi, - cncRules = zip (cncNames multi) lins ++ cncRules multi - } - -getRules :: String -> String -> (String,String,[String]) -getRules cat line = (cat, fun, map lin rss) where - rss = map (map unspace . chop ',') $ chop ';' line - fun = map idChar (head (head rss)) ++ "_" ++ cat - lin rs = "lin " ++ fun ++ " = " ++ unwords (intersperse "|" (map quote rs)) ++ " ;" - -chop :: Eq c => c -> [c] -> [[c]] -chop c cs = case break (==c) cs of - (w,_:cs2) -> w : chop c cs2 - ([],[]) -> [] - (w,_) -> [w] - --- remove spaces from beginning and end, leave them in the middle -unspace :: String -> String -unspace = unwords . words - -quote :: String -> String -quote r = "\"" ++ r ++ "\"" - --- to guarantee that the char can be used in an ident -idChar :: Char -> Char -idChar c = - if (n > 47 && n < 58) || (n > 64 && n < 91) || (n > 96 && n < 123) - then c - else '_' - where n = fromEnum c - - -gfFile :: FilePath -> FilePath -gfFile f = f ++ ".gf" - - -- cgit v1.2.3