summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2017-01-25 20:30:54 +0000
committerkrasimir <krasimir@chalmers.se>2017-01-25 20:30:54 +0000
commitdb0f8b0dced9a827c24842e5eeda7fbd64ef115e (patch)
tree4e17ba95c6d4cede185ecfd8d66b00b289627410
parent6de9636ff26aef7ed1cc8b3bc5d93f27a91b861d (diff)
improve the documentation for PGF2
-rw-r--r--src/compiler/GF/Command/Commands2.hs8
-rw-r--r--src/runtime/haskell-bind/PGF2.hsc102
-rw-r--r--src/runtime/haskell-bind/PGF2/Expr.hsc68
-rw-r--r--src/runtime/haskell-bind/PGF2/FFI.hs2
4 files changed, 130 insertions, 50 deletions
diff --git a/src/compiler/GF/Command/Commands2.hs b/src/compiler/GF/Command/Commands2.hs
index 505c22286..4e68c05ff 100644
--- a/src/compiler/GF/Command/Commands2.hs
+++ b/src/compiler/GF/Command/Commands2.hs
@@ -844,7 +844,7 @@ pgfCommands = Map.fromList [
ts = [hsExpr t|Right ts<-rs,(t,p)<-takeOptNum opts ts]
msgs = concatMap (either err ok) rs
err msg = ["Parse failed: "++msg]
- ok = map (C.showExpr . fst).takeOptNum opts
+ ok = map (C.showExpr [] . fst).takeOptNum opts
cLins env@(pgf,cncs) opts ts =
[l|t<-ts,l<-[abs++": "++show t|treebank]++[l|cnc<-cncs,l<-lin cnc t]]
@@ -975,7 +975,11 @@ pgfCommands = Map.fromList [
optFile opts = valStrOpts "file" "_gftmp" opts
-}
- optCat pgf opts = valStrOpts "cat" (C.startCat pgf) opts
+ optCat pgf opts =
+ case listFlags "cat" opts of
+ v:_ -> C.DTyp [] (valueString v) []
+ _ -> C.startCat pgf
+
{-
optType pgf opts =
let str = valStrOpts "cat" (H.showCId $ H.lookStartCat pgf) opts
diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc
index b9c5412e0..c5da14abf 100644
--- a/src/runtime/haskell-bind/PGF2.hsc
+++ b/src/runtime/haskell-bind/PGF2.hsc
@@ -1,36 +1,56 @@
{-# LANGUAGE ExistentialQuantification, DeriveDataTypeable, ScopedTypeVariables #-}
-------------------------------------------------
-- |
+-- Module : PGF2
-- Maintainer : Krasimir Angelov
-- Stability : stable
-- Portability : portable
--
--- This is the Haskell binding to the C run-time system for
--- loading and interpreting grammars compiled in Portable Grammar Format (PGF).
+-- This module is an Application Programming Interface to
+-- load and interpret grammars compiled in the Portable Grammar Format (PGF).
+-- The PGF format is produced as the final output from the GF compiler.
+-- The API is meant to be used for embedding GF grammars in Haskell
+-- programs
-------------------------------------------------
+
#include <pgf/pgf.h>
#include <gu/enum.h>
#include <gu/exn.h>
-module PGF2 (-- * CId
+module PGF2 (-- * PGF
+ PGF,readPGF,
+
+ -- * Identifiers
CId,
- -- * PGF
- PGF,readPGF,AbsName,abstractName,Cat,startCat,categories,
+
+ -- * Abstract syntax
+ AbsName,abstractName,
+ -- ** Categories
+ Cat,startCat,categories,
+ -- ** Functions
+ Fun,functions, functionsByCat, functionType, hasLinearization,
+ -- ** Expressions
+ Expr,showExpr,readExpr,mkApp,unApp,mkStr,mkInt,mkFloat,
+ -- ** Types
+ Type(..), Hypo, BindType(..), showType,
+
-- * Concrete syntax
- ConcName,Concr,languages,parse,
- parseWithHeuristics, parseWithOracle,
- hasLinearization,linearize,linearizeAll,alignWords,
- -- * Types
- Type(..), Hypo, BindType(..), showType, functionType,
- -- * Trees
- Expr,Fun,readExpr,showExpr,mkApp,unApp,mkStr,mkInt,mkFloat,
- graphvizAbstractTree,graphvizParseTree,
- -- * Morphology
+ ConcName,Concr,languages,
+ -- ** Linearization
+ linearize,linearizeAll,
+ alignWords,
+ -- ** Parsing
+ parse, parseWithHeuristics,
+ -- ** Generation
+ generateAll,
+ -- ** Morphological Analysis
MorphoAnalysis, lookupMorpho, fullFormLexicon,
- -- * Generation
- functions, functionsByCat, generateAll,
+ -- ** Visualizations
+ graphvizAbstractTree,graphvizParseTree,
+
-- * Exceptions
PGFError(..),
+
-- * Grammar specific callbacks
LiteralCallback,literalCallbacks
) where
@@ -61,9 +81,13 @@ import Data.Function(on)
-- to Concr but has lost its reference to PGF.
-type AbsName = String -- ^ Name of abstract syntax
-type ConcName = String -- ^ Name of concrete syntax
+type AbsName = CId -- ^ Name of abstract syntax
+type ConcName = CId -- ^ Name of concrete syntax
+-- | Reads file in Portable Grammar Format and produces
+-- 'PGF' structure. The file is usually produced with:
+--
+-- > $ gf -make <grammar file name>
readPGF :: FilePath -> IO PGF
readPGF fpath =
do pool <- gu_new_pool
@@ -85,6 +109,7 @@ readPGF fpath =
master <- newForeignPtr gu_pool_finalizer pool
return PGF {pgf = pgf, pgfMaster = master}
+-- | List of all languages available in the grammar.
languages :: PGF -> Map.Map ConcName Concr
languages p =
unsafePerformIO $
@@ -103,8 +128,11 @@ languages p =
concr <- fmap (\ptr -> Concr ptr p) $ peek (castPtr value)
writeIORef ref $! Map.insert name concr langs
-generateAll :: PGF -> Cat -> [(Expr,Float)]
-generateAll p cat =
+-- | Generates an exhaustive possibly infinite list of
+-- all abstract syntax expressions of the given type.
+-- The expressions are ordered by their probability.
+generateAll :: PGF -> Type -> [(Expr,Float)]
+generateAll p (DTyp _ cat _) =
unsafePerformIO $
do genPl <- gu_new_pool
exprPl <- gu_new_pool
@@ -115,11 +143,21 @@ generateAll p cat =
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
fromPgfExprEnum enum genFPl (p,exprFPl)
+-- | The abstract language name is the name of the top-level
+-- abstract module
abstractName :: PGF -> AbsName
abstractName p = unsafePerformIO (peekUtf8CString =<< pgf_abstract_name (pgf p))
-startCat :: PGF -> Cat
-startCat p = unsafePerformIO (peekUtf8CString =<< pgf_start_cat (pgf p))
+-- | The start category is defined in the grammar with
+-- the \'startcat\' flag. This is usually the sentence category
+-- but it is not necessary. Despite that there is a start category
+-- defined you can parse with any category. The start category
+-- definition is just for convenience.
+startCat :: PGF -> Type
+startCat p = unsafePerformIO $ do
+ cat <- pgf_start_cat (pgf p)
+ cat <- peekUtf8CString cat
+ return (DTyp [] cat [])
loadConcr :: Concr -> FilePath -> IO ()
loadConcr c fpath =
@@ -143,7 +181,8 @@ loadConcr c fpath =
unloadConcr :: Concr -> IO ()
unloadConcr c = pgf_concrete_unload (concr c)
-functionType :: PGF -> CId -> Type
+-- | The type of a function
+functionType :: PGF -> Fun -> Type
functionType p fn =
unsafePerformIO $
withGuPool $ \tmpPl -> do
@@ -185,6 +224,7 @@ functionType p fn =
-----------------------------------------------------------------------------
-- Graphviz
+-- | Renders an abstract syntax tree in a Graphviz format.
graphvizAbstractTree :: PGF -> Expr -> String
graphvizAbstractTree p e =
unsafePerformIO $
@@ -259,11 +299,11 @@ getAnalysis ref self c_lemma c_anal prob exn = do
anal <- peekUtf8CString c_anal
writeIORef ref ((lemma, anal, prob):ans)
-parse :: Concr -> Cat -> String -> Either String [(Expr,Float)]
-parse lang cat sent = parseWithHeuristics lang cat sent (-1.0) []
+parse :: Concr -> Type -> String -> Either String [(Expr,Float)]
+parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parseWithHeuristics :: Concr -- ^ the language with which we parse
- -> Cat -- ^ the start category
+ -> Type -- ^ the start category
-> String -- ^ the input sentence
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
@@ -277,7 +317,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
-> Either String [(Expr,Float)]
-parseWithHeuristics lang cat sent heuristic callbacks =
+parseWithHeuristics lang (DTyp _ cat _) sent heuristic callbacks =
unsafePerformIO $
do exprPl <- gu_new_pool
parsePl <- gu_new_pool
@@ -427,11 +467,13 @@ parseWithOracle lang cat sent (predict,complete,literal) =
return ep
Nothing -> do return nullPtr
+-- | Returns True if there is a linearization defined for that function in that language
hasLinearization :: Concr -> Fun -> Bool
hasLinearization lang id = unsafePerformIO $
withGuPool $ \pl ->
newUtf8CString id pl >>= pgf_has_linearization (concr lang)
+-- | Linearizes an expression as a string in the language
linearize :: Concr -> Expr -> String
linearize lang e = unsafePerformIO $
withGuPool $ \pl ->
@@ -452,6 +494,7 @@ linearize lang e = unsafePerformIO $
else do lin <- gu_string_buf_freeze sb pl
peekUtf8CString lin
+-- | Generates all possible linearizations of an expression
linearizeAll :: Concr -> Expr -> [String]
linearizeAll lang e = unsafePerformIO $
do pl <- gu_new_pool
@@ -520,6 +563,7 @@ alignWords lang e = unsafePerformIO $
(fids :: [CInt]) <- peekArray (fromIntegral (n_fids :: CInt)) (ptr `plusPtr` (#offset PgfAlignmentPhrase, fids))
return (phrase, map fromIntegral fids)
+-- | List of all functions defined in the abstract syntax
functions :: PGF -> [Fun]
functions p =
unsafePerformIO $
@@ -540,6 +584,7 @@ functions p =
name <- peekUtf8CString (castPtr key)
writeIORef ref $! (name : names)
+-- | List of all functions defined for a category
functionsByCat :: PGF -> Cat -> [Fun]
functionsByCat p cat =
unsafePerformIO $
@@ -561,6 +606,9 @@ functionsByCat p cat =
name <- peekUtf8CString (castPtr key)
writeIORef ref $! (name : names)
+-- | List of all categories defined in the grammar.
+-- The categories are defined in the abstract syntax
+-- with the \'cat\' keyword.
categories :: PGF -> [Cat]
categories pgf = -- !!! quick hack
nub [cat | f<-functions pgf, let DTyp _ cat _=functionType pgf f]
diff --git a/src/runtime/haskell-bind/PGF2/Expr.hsc b/src/runtime/haskell-bind/PGF2/Expr.hsc
index 5914500de..338d4fa18 100644
--- a/src/runtime/haskell-bind/PGF2/Expr.hsc
+++ b/src/runtime/haskell-bind/PGF2/Expr.hsc
@@ -8,14 +8,17 @@ import Foreign hiding (unsafePerformIO)
import Foreign.C
import qualified Text.PrettyPrint as PP
import PGF2.FFI
+import Data.List(mapAccumL)
+-- | An data type that represents
+-- identifiers for functions and categories in PGF.
type CId = String
ppCId = PP.text
wildCId = "_" :: CId
-type Cat = String -- ^ Name of syntactic category
-type Fun = String -- ^ Name of function
+type Cat = CId -- ^ Name of syntactic category
+type Fun = CId -- ^ Name of function
-----------------------------------------------------------------------------
-- Expressions
@@ -28,8 +31,9 @@ type Fun = String -- ^ Name of function
data Expr = forall a . Expr {expr :: PgfExpr, exprMaster :: a}
instance Show Expr where
- show = showExpr
+ show = showExpr []
+-- | Constructs an expression by applying a function to a list of expressions
mkApp :: Fun -> [Expr] -> Expr
mkApp fun args =
unsafePerformIO $
@@ -45,6 +49,7 @@ mkApp fun args =
where
len = length args
+-- | Decomposes an expression into an application of a function
unApp :: Expr -> Maybe (Fun,[Expr])
unApp (Expr expr master) =
unsafePerformIO $
@@ -58,6 +63,7 @@ unApp (Expr expr master) =
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
return $ Just (fun, [Expr c_arg master | c_arg <- c_args])
+-- | Constructs an expression from a string literal
mkStr :: String -> Expr
mkStr str =
unsafePerformIO $
@@ -67,6 +73,7 @@ mkStr str =
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
return (Expr c_expr exprFPl)
+-- | Constructs an expression from an integer literal
mkInt :: Int -> Expr
mkInt val =
unsafePerformIO $ do
@@ -75,6 +82,7 @@ mkInt val =
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
return (Expr c_expr exprFPl)
+-- | Constructs an expression from a real number
mkFloat :: Double -> Expr
mkFloat val =
unsafePerformIO $ do
@@ -83,6 +91,7 @@ mkFloat val =
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
return (Expr c_expr exprFPl)
+-- | parses a 'String' as an expression
readExpr :: String -> Maybe Expr
readExpr str =
unsafePerformIO $
@@ -99,11 +108,15 @@ readExpr str =
else do gu_pool_free exprPl
return Nothing
-ppExpr :: Int -> Expr -> PP.Doc
-ppExpr d e = ppParens (d>0) (PP.text (showExpr e)) -- just a quick hack !!!
+ppExpr :: Int -> [CId] -> Expr -> PP.Doc
+ppExpr d xs e = ppParens (d>0) (PP.text (showExpr xs e)) -- just a quick hack !!!
-showExpr :: Expr -> String
-showExpr e =
+-- | renders an expression as a 'String'. The list
+-- of identifiers is the list of all free variables
+-- in the expression in order reverse to the order
+-- of binding.
+showExpr :: [CId] -> Expr -> String
+showExpr scope e =
unsafePerformIO $
withGuPool $ \tmpPl ->
do (sb,out) <- newOut tmpPl
@@ -131,25 +144,38 @@ type Hypo = (BindType,CId,Type)
-- | renders type as 'String'.
showType :: Type -> String
-showType = PP.render . ppType 0
+showType = PP.render . ppType 0 []
-ppType :: Int -> Type -> PP.Doc
-ppType d (DTyp hyps cat args)
- | null hyps = ppRes cat args
- | otherwise = let hdocs = map (ppHypo 1) hyps
- in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes cat args) hdocs)
+ppType :: Int -> [CId] -> Type -> PP.Doc
+ppType d scope (DTyp hyps cat args)
+ | null hyps = ppRes scope cat args
+ | otherwise = let (scope',hdocs) = mapAccumL (ppHypo 1) scope hyps
+ in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope cat args) hdocs)
where
- ppRes cat es
+ ppRes scope cat es
| null es = ppCId cat
- | otherwise = ppParens (d > 3) (ppCId cat PP.<+> PP.hsep (map (ppExpr 4) es))
+ | otherwise = ppParens (d > 3) (ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es))
-ppHypo :: Int -> (BindType,CId,Type) -> PP.Doc
-ppHypo d (Explicit,x,typ) =
+ppHypo :: Int -> [CId]-> (BindType,CId,Type) -> ([CId],PP.Doc)
+ppHypo d scope (Explicit,x,typ) =
if x == wildCId
- then ppType d typ
- else PP.parens (ppCId x PP.<+> PP.char ':' PP.<+> ppType 0 typ)
-ppHypo d (Implicit,x,typ) =
- PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 typ)
+ then (scope, ppType d scope typ)
+ else let y = freshName x scope
+ in (y:scope, PP.parens (ppCId x PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
+ppHypo d scope (Implicit,x,typ) =
+ if x == wildCId
+ then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
+ else let y = freshName x scope
+ in (y:scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
+
+freshName :: CId -> [CId] -> CId
+freshName x xs0 = loop 1 x
+ where
+ xs = wildCId : xs0
+
+ loop i y
+ | elem y xs = loop (i+1) (x++show i)
+ | otherwise = y
ppParens True = PP.parens
ppParens False = id
diff --git a/src/runtime/haskell-bind/PGF2/FFI.hs b/src/runtime/haskell-bind/PGF2/FFI.hs
index 8c4a1f5de..5e7dfe260 100644
--- a/src/runtime/haskell-bind/PGF2/FFI.hs
+++ b/src/runtime/haskell-bind/PGF2/FFI.hs
@@ -10,6 +10,8 @@ import Control.Exception
import GHC.Ptr
import Data.Int(Int32)
+-- | An abstract data type representing multilingual grammar
+-- in Portable Grammar Format.
data PGF = PGF {pgf :: Ptr PgfPGF, pgfMaster :: ForeignPtr GuPool}
data Concr = Concr {concr :: Ptr PgfConcr, concrMaster :: PGF}