summaryrefslogtreecommitdiff
path: root/src/GF
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-09-19 16:48:13 +0000
committeraarne <aarne@cs.chalmers.se>2007-09-19 16:48:13 +0000
commitef389db5694a52eb9c171fe76b952f37216e4c09 (patch)
treecf285b0f746575e5fb210866df5a73c6057405d5 /src/GF
parentaff4aa20c19ccef4ccdbb664d2e989eba36f4446 (diff)
started extending GFCC API with parsing
Diffstat (limited to 'src/GF')
-rw-r--r--src/GF/Canon/GFCC/FCFGParsing.hs171
-rw-r--r--src/GF/Canon/GFCC/GFCCAPI.hs115
-rw-r--r--src/GF/Canon/GFCC/ParGFCC.hs4
-rw-r--r--src/GF/Canon/GFCC/Shell.hs62
4 files changed, 350 insertions, 2 deletions
diff --git a/src/GF/Canon/GFCC/FCFGParsing.hs b/src/GF/Canon/GFCC/FCFGParsing.hs
new file mode 100644
index 000000000..f9a838417
--- /dev/null
+++ b/src/GF/Canon/GFCC/FCFGParsing.hs
@@ -0,0 +1,171 @@
+module GF.Canon.GFCC.FCFGParsing where
+
+import GF.Canon.GFCC.DataGFCC
+import GF.Canon.GFCC.AbsGFCC
+import GF.Conversion.SimpleToFCFG (convertGrammar)
+
+--import GF.System.Tracing
+--import GF.Infra.Print
+--import qualified GF.Grammar.PrGrammar as PrGrammar
+
+--import GF.Data.Operations (Err(..))
+
+--import qualified GF.Grammar.Grammar as Grammar
+--import qualified GF.Grammar.Macros as Macros
+--import qualified GF.Canon.AbsGFC as AbsGFC
+--import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
+--import qualified GF.Infra.Ident as Ident
+--import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
+
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Formalism.Utilities --(forest2trees)
+
+--import GF.Conversion.Types
+
+import GF.Formalism.FCFG
+--import qualified GF.Formalism.GCFG as G
+--import qualified GF.Formalism.SimpleGFC as S
+--import qualified GF.Formalism.MCFG as M
+--import qualified GF.Formalism.CFG as C
+--import qualified GF.Parsing.MCFG as PM
+import qualified GF.Parsing.FCFG as PF
+--import qualified GF.Parsing.CFG as PC
+import GF.Canon.GFCC.ErrM
+
+
+--convertGrammar :: Grammar -> [(Ident,FGrammar)]
+
+--import qualified GF.Parsing.GFC as New
+--checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+-- algorithm "f"
+-- strategy "bottomup"
+
+type Token = String ----
+type CFTok = String ----
+type CFCat = CId ----
+type Fun = CId ----
+
+cfCat2Ident = id ----
+
+wordsCFTok :: CFTok -> [String]
+wordsCFTok = return ----
+
+
+type FCFPInfo = PF.FCFPInfo FCat FName Token
+
+-- main parsing function
+
+parse ::
+-- String -> -- ^ parsing algorithm (mcfg or cfg)
+-- String -> -- ^ parsing strategy
+ FCFPInfo -> -- ^ compiled grammar (fcfg)
+-- Ident.Ident -> -- ^ abstract module name
+ CFCat -> -- ^ starting category
+ [CFTok] -> -- ^ input tokens
+ Err [Exp] -- ^ resulting GF terms
+
+parse pinfo startCat inString =
+
+ do let inTokens = inputMany (map wordsCFTok inString)
+ forests <- selectParser pinfo startCat inTokens
+ let filteredForests = forests >>= applyProfileToForest
+ trees = nubsort $ filteredForests >>= forest2trees
+
+ return $ map tree2term trees
+
+
+-- parsing via FCFG
+selectParser pinfo startCat inTokens
+ = do let startCats = filter isStart $ PF.grammarCats fcfpi
+ isStart cat = cat' == cfCat2Ident startCat
+ where CId x = fcat2cid cat
+ cat' = CId x
+ fcfpi = pinfo
+ fcfParser <- PF.parseFCF "bottomup"
+ let chart = fcfParser fcfpi startCats inTokens
+ (i,j) = inputBounds inTokens
+ finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
+ return $ map cnv_forests $ chart2forests chart (const False) finalEdges
+
+cnv_forests FMeta = FMeta
+cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
+cnv_forests (FString x) = FString x
+cnv_forests (FInt x) = FInt x
+cnv_forests (FFloat x) = FFloat x
+
+cnv_profile (Unify x) = Unify x
+cnv_profile (Constant x) = Constant (cnv_forests2 x)
+
+cnv_forests2 FMeta = FMeta
+cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
+cnv_forests2 (FString x) = FString x
+cnv_forests2 (FInt x) = FInt x
+cnv_forests2 (FFloat x) = FFloat x
+
+----------------------------------------------------------------------
+-- parse trees to GFCC terms
+
+tree2term :: SyntaxTree Fun -> Exp
+tree2term (TNode f ts) = Tr (AC (CId f)) (map tree2term ts)
+{- ----
+tree2term (TString s) = Macros.string2term s
+tree2term (TInt n) = Macros.int2term n
+tree2term (TFloat f) = Macros.float2term f
+tree2term (TMeta) = Macros.mkMeta 0
+-}
+
+
+----------------------------------------------------------------------
+-- conversion and unification of forests
+
+-- simplest implementation
+applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
+applyProfileToForest (FNode name@(Name fun profile) children)
+ | isCoercion name = concat chForests
+ | otherwise = [ FNode fun chForests | not (null chForests) ]
+ where chForests = concat [ applyProfileM unifyManyForests profile forests |
+ forests0 <- children,
+ forests <- mapM applyProfileToForest forests0 ]
+applyProfileToForest (FString s) = [FString s]
+applyProfileToForest (FInt n) = [FInt n]
+applyProfileToForest (FFloat f) = [FFloat f]
+applyProfileToForest (FMeta) = [FMeta]
+
+
+--------------------- From parsing types ------------------------------
+
+-- * fast nonerasing MCFG
+
+type FIndex = Int
+type FPath = [FIndex]
+type FName = NameProfile CId
+type FGrammar = FCFGrammar FCat FName Token
+type FRule = FCFRule FCat FName Token
+data FCat = FCat {-# UNPACK #-} !Int CId [FPath] [(FPath,FIndex)]
+
+initialFCat :: CId -> FCat
+initialFCat cat = FCat 0 cat [] []
+
+fcatString = FCat (-1) (CId "String") [[0]] []
+fcatInt = FCat (-2) (CId "Int") [[0]] []
+fcatFloat = FCat (-3) (CId "Float") [[0]] []
+
+fcat2cid :: FCat -> CId
+fcat2cid (FCat _ c _ _) = c
+
+instance Eq FCat where
+ (FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
+
+instance Ord FCat where
+ compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
+
+
+
+---
+
+isCoercion :: Name -> Bool
+isCoercion (Name fun [Unify [0]]) = False -- isWildIdent fun
+isCoercion _ = False
+
+type Name = NameProfile Fun
diff --git a/src/GF/Canon/GFCC/GFCCAPI.hs b/src/GF/Canon/GFCC/GFCCAPI.hs
new file mode 100644
index 000000000..e815697d7
--- /dev/null
+++ b/src/GF/Canon/GFCC/GFCCAPI.hs
@@ -0,0 +1,115 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GFCCAPI
+-- Maintainer : Aarne Ranta
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date:
+-- > CVS $Author:
+-- > CVS $Revision:
+--
+-- Reduced Application Programmer's Interface to GF, meant for
+-- embedded GF systems. AR 19/9/2007
+-----------------------------------------------------------------------------
+
+module GF.Canon.GFCC.GFCCAPI where
+
+import GF.Canon.GFCC.DataGFCC
+--import GF.Canon.GFCC.GenGFCC
+import GF.Canon.GFCC.AbsGFCC
+import GF.Canon.GFCC.ParGFCC
+import GF.Canon.GFCC.PrintGFCC
+import GF.Canon.GFCC.ErrM
+--import GF.Data.Operations
+--import GF.Infra.UseIO
+import qualified Data.Map as Map
+import System.Random (newStdGen)
+import System.Directory (doesFileExist)
+import System
+
+-- This API is meant to be used when embedding GF grammars in Haskell
+-- programs. The embedded system is supposed to use the
+-- .gfcm grammar format, which is first produced by the gf program.
+
+---------------------------------------------------
+-- Interface
+---------------------------------------------------
+
+type MultiGrammar = GFCC
+type Language = String
+type Category = String
+type Tree = Exp
+
+file2grammar :: FilePath -> IO MultiGrammar
+
+linearize :: MultiGrammar -> Language -> Tree -> String
+parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
+
+linearizeAll :: MultiGrammar -> Tree -> [String]
+linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
+
+--parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
+--parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
+
+readTree :: MultiGrammar -> String -> Tree
+showTree :: Tree -> String
+
+languages :: MultiGrammar -> [Language]
+categories :: MultiGrammar -> [Category]
+
+startCat :: MultiGrammar -> Category
+
+---------------------------------------------------
+-- Implementation
+---------------------------------------------------
+
+file2grammar f =
+ readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
+
+linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize mgr (CId lang)
+
+
+parse mgr lang cat s = []
+{-
+ map tree2exp .
+ errVal [] .
+ parseString (stateOptions sgr) sgr cfcat
+ where
+ sgr = stateGrammarOfLang mgr (zIdent lang)
+ cfcat = string2CFCat abs cat
+ abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
+-}
+
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t = [(lang,linearThis mgr lang t) | lang <- languages mgr]
+
+{-
+parseAll mgr cat = map snd . parseAllLang mgr cat
+
+parseAllLang mgr cat s =
+ [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
+-}
+
+readTree _ = err (const exp0) id . (pExp . myLexer)
+
+showTree t = printTree t
+
+languages mgr = [l | CId l <- cncnames mgr]
+
+categories mgr = [c | CId c <- Map.keys (cats (abstract mgr))]
+
+startCat mgr = "S" ----
+
+------------ for internal use only
+
+linearThis = GF.Canon.GFCC.GFCCAPI.linearize
+
+err f g ex = case ex of
+ Ok x -> g x
+ Bad s -> f s
+
+readFileIf f = do
+ b <- doesFileExist f
+ if b then readFile f
+ else putStrLn ("file " ++ f ++ " not found") >> return ""
diff --git a/src/GF/Canon/GFCC/ParGFCC.hs b/src/GF/Canon/GFCC/ParGFCC.hs
index 22813a47b..2d208c20d 100644
--- a/src/GF/Canon/GFCC/ParGFCC.hs
+++ b/src/GF/Canon/GFCC/ParGFCC.hs
@@ -448,14 +448,14 @@ happyReduction_42 happy_x_2
happy_x_1
= case happyOut24 happy_x_2 of { happy_var_2 ->
happyIn36
- (V happy_var_2
+ (V (fromInteger happy_var_2) --H
)}
happyReduce_43 = happySpecReduce_1 13# happyReduction_43
happyReduction_43 happy_x_1
= case happyOut24 happy_x_1 of { happy_var_1 ->
happyIn36
- (C happy_var_1
+ (C (fromInteger happy_var_1) --H
)}
happyReduce_44 = happySpecReduce_1 13# happyReduction_44
diff --git a/src/GF/Canon/GFCC/Shell.hs b/src/GF/Canon/GFCC/Shell.hs
new file mode 100644
index 000000000..bc33e7949
--- /dev/null
+++ b/src/GF/Canon/GFCC/Shell.hs
@@ -0,0 +1,62 @@
+module Main where
+
+import GF.Canon.GFCC.GFCCAPI
+import qualified GF.Canon.GFCC.GenGFCC as G ---
+import GF.Canon.GFCC.AbsGFCC (CId(CId)) ---
+import System.Random (newStdGen)
+import System (getArgs)
+
+
+-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
+
+main :: IO ()
+main = do
+ file:_ <- getArgs
+ grammar <- file2grammar file
+ putStrLn $ "languages: " ++ unwords (languages grammar)
+ putStrLn $ "categories: " ++ unwords (categories grammar)
+ loop grammar
+
+loop :: MultiGrammar -> IO ()
+loop grammar = do
+ s <- getLine
+ if s == "quit" then return () else do
+ treat grammar s
+ loop grammar
+
+treat :: MultiGrammar -> String -> IO ()
+treat grammar s = case words s of
+ "gt":cat:n:_ -> do
+ mapM_ prlinonly $ take (read n) $ G.generate grammar (CId cat)
+ "gtt":cat:n:_ -> do
+ mapM_ prlin $ take (read n) $ G.generate grammar (CId cat)
+ "gr":cat:n:_ -> do
+ gen <- newStdGen
+ mapM_ prlinonly $ take (read n) $ G.generateRandom gen grammar (CId cat)
+ "grt":cat:n:_ -> do
+ gen <- newStdGen
+ mapM_ prlin $ take (read n) $ G.generateRandom gen grammar (CId cat)
+ "p":lang:cat:ws -> do
+ let ts = parse grammar lang cat $ unwords ws
+ mapM_ (putStrLn . showTree) ts
+ "search":cat:n:ws -> do
+ case G.parse (read n) grammar (CId cat) ws of
+ t:_ -> prlin t
+ _ -> putStrLn "no parse found"
+ _ -> lins $ readTree grammar s
+ where
+ langs = languages grammar
+ lins t = mapM_ (lint t) $ langs
+ lint t lang = do
+---- putStrLn $ showTree $ linExp grammar lang t
+ lin t lang
+ lin t lang = do
+ putStrLn $ linearize grammar lang t
+ prlins t = do
+ putStrLn $ showTree t
+ lins t
+ prlin t = do
+ putStrLn $ showTree t
+ prlinonly t
+ prlinonly t = mapM_ (lin t) $ langs
+