diff options
| author | krasimir <krasimir@chalmers.se> | 2008-05-30 13:07:11 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2008-05-30 13:07:11 +0000 |
| commit | 150940b8704a6a61ed08c6bbd99ba4b05a42c59c (patch) | |
| tree | 277a97a4a3ba770e49e51a5526cf2e46bc2c6e1a /src-3.0/PGF.hs | |
| parent | 8bb0c32a9cf2cbad0375ab5886b7f2be37109477 (diff) | |
cleaned up and documented PGF API
Diffstat (limited to 'src-3.0/PGF.hs')
| -rw-r--r-- | src-3.0/PGF.hs | 184 |
1 files changed, 129 insertions, 55 deletions
diff --git a/src-3.0/PGF.hs b/src-3.0/PGF.hs index 9e0a6007e..fafbfafcf 100644 --- a/src-3.0/PGF.hs +++ b/src-3.0/PGF.hs @@ -1,28 +1,54 @@ ----------------------------------------------------------------------- +------------------------------------------------- -- | --- Module : GFCCAPI +-- Module : PGF -- Maintainer : Aarne Ranta --- Stability : (stable) --- Portability : (portable) +-- Stability : stable +-- Portability : portable -- --- > CVS $Date: --- > CVS $Author: --- > CVS $Revision: --- --- Reduced Application Programmer's Interface to GF, meant for --- embedded GF systems. AR 19/9/2007 ------------------------------------------------------------------------------ - -module PGF(module PGF, PGF, emptyPGF) where +-- Application Programming Interface to PGF. +------------------------------------------------- + +module PGF( + -- * PGF + PGF, + readPGF, + + -- * Identifiers + -- ** CId + CId, mkCId, prCId, + + -- ** Language + Language, languages, abstractName, + + -- ** Category + Category, categories, startCat, + + -- * Expressions + Exp(..), + showExp, readExp, + + -- * Operations + -- ** Linearization + linearize, linearizeAllLang, linearizeAll, + + -- ** Parsing + parse, parseAllLang, parseAll, + + -- ** Generation + generateRandom, generateAll, generateAllDepth + ) where import PGF.CId -import PGF.Linearize +import PGF.Linearize hiding (linearize) +import qualified PGF.Linearize (linearize) import PGF.Generate import PGF.Macros import PGF.Data import PGF.Raw.Convert import PGF.Raw.Parse +import PGF.Raw.Print (printTree) import PGF.Parsing.FCFG +import GF.Text.UTF8 import GF.Data.ErrM @@ -37,45 +63,105 @@ import qualified Text.ParserCombinators.ReadP as RP -- This API is meant to be used when embedding GF grammars in Haskell -- programs. The embedded system is supposed to use the --- .gfcc grammar format, which is first produced by the gf program. +-- .pgf grammar format, which is first produced by the gf program. --------------------------------------------------- -- Interface --------------------------------------------------- +-- | This is just a string with the language name. +-- A language name is the identifier that you write in the +-- top concrete or abstract module in GF after the +-- concrete/abstract keyword. Example: +-- +-- > abstract Lang = ... +-- > concrete LangEng of Lang = ... type Language = String -type Category = String -type Tree = Exp - -file2pgf :: FilePath -> IO PGF - -linearize :: PGF -> Language -> Tree -> String -parse :: PGF -> Language -> Category -> String -> [Tree] - -linearizeAll :: PGF -> Tree -> [String] -linearizeAllLang :: PGF -> Tree -> [(Language,String)] - -parseAll :: PGF -> Category -> String -> [[Tree]] -parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])] -generateAll :: PGF -> Category -> [Tree] -generateRandom :: PGF -> Category -> IO [Tree] -generateAllDepth :: PGF -> Category -> Maybe Int -> [Tree] - -readTree :: String -> Tree -showTree :: Tree -> String +-- | This is just a string with the category name. +-- The categories are defined in the abstract syntax +-- with the \'cat\' keyword. +type Category = String -languages :: PGF -> [Language] +-- | Reads file in Portable Grammar Format and produces +-- 'PGF' structure. The file is usually produced with: +-- +-- > $ gfc --make <grammar file name> +readPGF :: FilePath -> IO PGF + +-- | Linearizes given expression as string in the language +linearize :: PGF -> Language -> Exp -> String + +-- | Tries to parse the given string in the specified language +-- and to produce abstract syntax expression. An empty +-- list is returned if the parsing is not successful. The list may also +-- contain more than one element if the grammar is ambiguous. +parse :: PGF -> Language -> Category -> String -> [Exp] + +-- | The same as 'linearizeAllLang' but does not return +-- the language. +linearizeAll :: PGF -> Exp -> [String] + +-- | Linearizes given expression as string in all languages +-- available in the grammar. +linearizeAllLang :: PGF -> Exp -> [(Language,String)] + +-- | The same as 'parseAllLang' but does not return +-- the language. +parseAll :: PGF -> Category -> String -> [[Exp]] + +-- | Tries to parse the given string with every language +-- available in the grammar and to produce abstract syntax +-- expression. The returned list contains pairs of language +-- and list of possible expressions. Only those languages +-- for which at least one parsing is possible are listed. +-- More than one abstract syntax expressions are possible +-- if the grammar is ambiguous. +parseAllLang :: PGF -> Category -> String -> [(Language,[Exp])] + +-- | The same as 'generateAllDepth' but does not limit +-- the depth in the generation. +generateAll :: PGF -> Category -> [Exp] + +-- | Generates an infinite list of random abstract syntax expressions. +-- This is usefull for tree bank generation which after that can be used +-- for grammar testing. +generateRandom :: PGF -> Category -> IO [Exp] + +-- | Generates an exhaustive possibly infinite list of +-- abstract syntax expressions. A depth can be specified +-- to limit the search space. +generateAllDepth :: PGF -> Category -> Maybe Int -> [Exp] + +-- | parses 'String' as an expression +readExp :: String -> Maybe Exp + +-- | renders expression as 'String' +showExp :: Exp -> String + +-- | List of all languages available in the given grammar. +languages :: PGF -> [Language] + +-- | The abstract language name is the name of the top-level +-- abstract module +abstractName :: PGF -> Language + +-- | List of all categories defined in the given grammar. categories :: PGF -> [Category] +-- | The start category is defined in the grammar with +-- the \'startcat\' flag. This is usually the sentence category +-- but it is not necessary. Despite that there is a start category +-- defined you can parse with any category. The start category +-- definition is just for convenience. startCat :: PGF -> Category --------------------------------------------------- -- Implementation --------------------------------------------------- -file2pgf f = do - s <- readFileIf f +readPGF f = do + s <- readFile f g <- parseGrammar s return $! toPGF g @@ -83,9 +169,9 @@ linearize pgf lang = PGF.Linearize.linearize pgf (mkCId lang) parse pgf lang cat s = case lookParser pgf (mkCId lang) of - Nothing -> error "no parser" + Nothing -> error ("Unknown language: " ++ lang) Just pinfo -> case parseFCF "bottomup" pinfo (mkCId cat) (words s) of - Ok x -> x + Ok x -> x Bad s -> error s linearizeAll mgr = map snd . linearizeAllLang mgr @@ -104,9 +190,9 @@ generateRandom pgf cat = do generateAll pgf cat = generate pgf (mkCId cat) Nothing generateAllDepth pgf cat = generate pgf (mkCId cat) -readTree s = case RP.readP_to_S (pExp False) s of - [(x,"")] -> x - _ -> error "no parse" +readExp s = case RP.readP_to_S (pExp False) s of + [(x,"")] -> Just x + _ -> Nothing pExps :: RP.ReadP [Exp] pExps = liftM2 (:) (pExp True) pExps RP.<++ (RP.skipSpaces >> return []) @@ -136,7 +222,7 @@ pExp isNested = RP.skipSpaces >> (pParen RP.<++ pAbs RP.<++ pApp RP.<++ pNum RP. isIdentRest c = c == '_' || c == '\'' || isAlphaNum c -showTree = PP.render . ppExp False +showExp = PP.render . ppExp False ppExp isNested (EAbs xs t) = ppParens isNested (PP.char '\\' PP.<> PP.hsep (PP.punctuate PP.comma (map (PP.text . prCId) xs)) PP.<+> @@ -160,15 +246,3 @@ languages pgf = [prCId l | l <- cncnames pgf] categories pgf = [prCId c | c <- Map.keys (cats (abstract pgf))] startCat pgf = lookStartCat pgf - - ------------- for internal use only - -err f g ex = case ex of - Ok x -> g x - Bad s -> f s - -readFileIf f = do - b <- doesFileExist f - if b then readFile f - else putStrLn ("file " ++ f ++ " not found") >> return "" |
