summaryrefslogtreecommitdiff
path: root/src/GF
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-12-17 18:12:46 +0000
committeraarne <aarne@cs.chalmers.se>2007-12-17 18:12:46 +0000
commit27602f4f8291f4d6a8b4b7109caf321d16cb6c99 (patch)
tree8e64b75d15c3b9f80c543466ad37aa5536a86f8c /src/GF
parent7551c70db6c76a796e93fa1cd4ebe7c850344b7e (diff)
took back smart type of Int ; Digits type in resource and some adjustments of Det syntax (not yet for romance and russian)
Diffstat (limited to 'src/GF')
-rw-r--r--src/GF/Devel/GrammarToGFCC.hs4
-rw-r--r--src/GF/GFCC/Linearize.hs5
-rw-r--r--src/GF/GFCC/Raw/ConvertGFCC.hs4
-rw-r--r--src/GF/GFCC/doc/gfcc.txt9
-rw-r--r--src/GF/GFCC/doc/syntax.txt182
-rw-r--r--src/GF/Grammar/Lookup.hs8
6 files changed, 199 insertions, 13 deletions
diff --git a/src/GF/Devel/GrammarToGFCC.hs b/src/GF/Devel/GrammarToGFCC.hs
index a61a2ec49..b7eaebe31 100644
--- a/src/GF/Devel/GrammarToGFCC.hs
+++ b/src/GF/Devel/GrammarToGFCC.hs
@@ -307,8 +307,8 @@ type ParamEnv =
paramValues :: SourceGrammar -> ParamEnv
paramValues cgr = (labels,untyps,typs) where
partyps = nub $
- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt
- ++ [ty |
+ --- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt
+ [ty |
(_,(_,CncCat (Yes (RecType ls)) _ _)) <- jments,
ty0 <- [ty | (_, ty) <- unlockTyp ls],
ty <- typsFrom ty0
diff --git a/src/GF/GFCC/Linearize.hs b/src/GF/GFCC/Linearize.hs
index d087384bf..7d5e6b010 100644
--- a/src/GF/GFCC/Linearize.hs
+++ b/src/GF/GFCC/Linearize.hs
@@ -31,8 +31,9 @@ linExp mcfg lang tree@(DTr _ at trees) = ---- bindings TODO
case at of
AC fun -> comp (lmap lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
- AI i -> R [C lst, kks (show i), C size] where
- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
+ AI i -> R [kks (show i)]
+ --- [C lst, kks (show i), C size] where
+ --- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
AF d -> R [kks (show d)]
AM _ -> TM
where
diff --git a/src/GF/GFCC/Raw/ConvertGFCC.hs b/src/GF/GFCC/Raw/ConvertGFCC.hs
index fbed72e69..3bfed9c83 100644
--- a/src/GF/GFCC/Raw/ConvertGFCC.hs
+++ b/src/GF/GFCC/Raw/ConvertGFCC.hs
@@ -73,7 +73,7 @@ toExp e = case e of
App fun [App (CId "B") xs, App (CId "X") exps] ->
DTr [x | AId x <- xs] (AC fun) (lmap toExp exps)
App (CId "Eq") eqs ->
- EEq [Equ (lmap toExp ps) (toExp v) | App (CId "Case") (v:ps) <- eqs]
+ EEq [Equ (lmap toExp ps) (toExp v) | App (CId "E") (v:ps) <- eqs]
AMet -> DTr [] (AM 0) []
AInt i -> DTr [] (AI i) []
AFlt i -> DTr [] (AF i) []
@@ -147,7 +147,7 @@ fromExp e = case e of
DTr [] (AI i) [] -> AInt (toInteger i)
DTr [] (AM _) [] -> AMet ----
EEq eqs ->
- App (CId "Eq") [App (CId "Case") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
+ App (CId "Eq") [App (CId "E") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
_ -> error $ "exp " ++ show e
fromTerm :: Term -> RExp
diff --git a/src/GF/GFCC/doc/gfcc.txt b/src/GF/GFCC/doc/gfcc.txt
index 6a78a62f6..5dcf2fbdc 100644
--- a/src/GF/GFCC/doc/gfcc.txt
+++ b/src/GF/GFCC/doc/gfcc.txt
@@ -1,6 +1,6 @@
The GFCC Grammar Format
Aarne Ranta
-October 5, 2007
+December 14, 2007
Author's address:
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
@@ -8,6 +8,7 @@ Author's address:
% to compile: txt2tags -thtml --toc gfcc.txt
History:
+- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC
- 5 Oct 2007: new, better structured GFCC with full expressive power
- 19 Oct: translation of lincats, new figures on C++
- 3 Oct 2006: first version
@@ -53,7 +54,8 @@ will be used instead. GFC provides only marginal advantages as a target format
compared with GF, and it is therefore just extra weight to carry around this
format.
-The main differences of GFCC compared with GFC (and GF) can be summarized as follows:
+The main differences of GFCC compared with GFC (and GF) can be
+summarized as follows:
- there are no modules, and therefore no qualified names
- a GFCC grammar is multilingual, and consists of a common abstract syntax
together with one concrete syntax per language
@@ -66,7 +68,8 @@ The main differences of GFCC compared with GFC (and GF) can be summarized as fol
Here is an example of a GF grammar, consisting of three modules,
-as translated to GFCC. The representations are aligned; thus they do not completely
+as translated to GFCC. The representations are aligned;
+thus they do not completely
reflect the order of judgements in GFCC files, which have different orders of
blocks of judgements, and alphabetical sorting.
```
diff --git a/src/GF/GFCC/doc/syntax.txt b/src/GF/GFCC/doc/syntax.txt
new file mode 100644
index 000000000..6bb3b8d97
--- /dev/null
+++ b/src/GF/GFCC/doc/syntax.txt
@@ -0,0 +1,182 @@
+GFCC Syntax
+
+
+==Syntax of GFCC files==
+
+The parser syntax is very simple, as defined in BNF:
+```
+ Grm. Grammar ::= [RExp] ;
+
+ App. RExp ::= "(" CId [RExp] ")" ;
+ AId. RExp ::= CId ;
+ AInt. RExp ::= Integer ;
+ AStr. RExp ::= String ;
+ AFlt. RExp ::= Double ;
+ AMet. RExp ::= "?" ;
+
+ terminator RExp "" ;
+
+ token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
+```
+While a parser and a printer can be generated for many languages
+from this grammar by using the BNF Converter, a parser is also
+easy to write by hand using recursive descent.
+
+
+==Syntax of well-formed GFCC code==
+
+Here is a summary of well-formed syntax,
+with a comment on the semantics of each construction.
+```
+ Grammar ::=
+ CId -- abstract syntax names
+ "(" "concrete" CId* ")" -- concrete syntax names
+ "(" "flags" Flag* ")" -- global flags
+ "(" "abstract" Abstract ")" -- abstract syntax
+ "(" "concrete" Concrete* ")" -- concrete syntaxes
+
+ Abstract ::=
+ "(" "flags" Flag* ")" -- abstract flags
+ "(" "fun" FunDef* ")" -- function definitions
+ "(" "cat" CatDef* ")" -- category definitions
+
+ Concrete ::=
+ "(" CId -- language name
+ "flags" Flag* -- concrete flags
+ "lin" LinDef* -- linearization rules
+ "oper" LinDef* -- operations (macros)
+ "lincat" LinDef* -- linearization type definitions
+ "lindef" LinDef* -- linearization default definitions
+ "printname" LinDef* -- printname definitions
+ "param" LinDef* -- lincats with labels and parameter value names
+ ")"
+
+ Flag ::= "(" CId String ")" -- flag and value
+ FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
+ CatDef ::= "(" CId Hypo* ")" -- category and context
+ LinDef ::= "(" CId Term ")" -- function and definition
+
+ Type ::=
+ "(" CId -- value category
+ "(" "H" Hypo* ")" -- argument context
+ "(" "X" Exp* ")" ")" -- arguments (of dependent value type)
+
+ Exp ::=
+ "(" CId -- function
+ "(" "B" CId* ")" -- bindings
+ "(" "X" Exp* ")" ")" -- arguments
+ | CId -- variable
+ | "?" -- metavariable
+ | "(" "Eq" Equation* ")" -- group of pattern equations
+ | Integer -- integer literal (non-negative)
+ | Float -- floating-point literal (non-negative)
+ | String -- string literal (in double quotes)
+
+ Hypo ::= "(" CId Type ")" -- variable and type
+
+ Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
+
+ Term ::=
+ "(" "R" Term* ")" -- array (record or table)
+ | "(" "S" Term* ")" -- concatenated sequence
+ | "(" "FV" Term* ")" -- free variant list
+ | "(" "P" Term Term ")" -- access to index (projection or selection)
+ | "(" "W" String Term ")" -- token prefix with suffix list
+ | "(" "A" Integer ")" -- pointer to subtree
+ | String -- token (in double quotes)
+ | Integer -- index in array
+ | CId -- macro constant
+ | "?" -- metavariable
+```
+
+
+==GFCC interpreter==
+
+The first phase in interpreting GFCC is to parse a GFCC file and
+build an internal abstract syntax representation, as specified
+in the previous section.
+
+With this representation, linearization can be performed by
+a straightforward function from expressions (``Exp``) to terms
+(``Term``). All expressions except groups of pattern equations
+can be linearized.
+
+Here is a reference Haskell implementation of linearization:
+```
+ linExp :: GFCC -> CId -> Exp -> Term
+ linExp gfcc lang tree@(DTr _ at trees) = case at of
+ AC fun -> comp (map lin trees) $ look fun
+ AS s -> R [K (show s)] -- quoted
+ AI i -> R [K (show i)]
+ AF d -> R [K (show d)]
+ AM -> TM
+ where
+ lin = linExp gfcc lang
+ comp = compute gfcc lang
+ look = lookLin gfcc lang
+```
+TODO: bindings must be supported.
+
+Terms resulting from linearization are evaluated in
+call-by-value order, with two environments needed:
+- the grammar (a concrete syntax) to give the global constants
+- an array of terms to give the subtree linearizations
+
+
+The Haskell implementation works as follows:
+```
+compute :: GFCC -> CId -> [Term] -> Term -> Term
+compute gfcc lang args = comp where
+ comp trm = case trm of
+ P r p -> proj (comp r) (comp p)
+ W s t -> W s (comp t)
+ R ts -> R $ map comp ts
+ V i -> idx args (fromInteger i) -- already computed
+ F c -> comp $ look c -- not computed (if contains V)
+ FV ts -> FV $ Prelude.map comp ts
+ S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
+ _ -> trm
+
+ look = lookOper gfcc lang
+
+ idx xs i = xs !! i
+
+ proj r p = case (r,p) of
+ (_, FV ts) -> FV $ Prelude.map (proj r) ts
+ (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
+ (W s t, _) -> kks (s ++ getString (proj t p))
+ _ -> comp $ getField r (getIndex p)
+
+ getString t = case t of
+ K (KS s) -> s
+ _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
+
+ getIndex t = case t of
+ C i -> fromInteger i
+ RP p _ -> getIndex p
+ TM -> 0 -- default value for parameter
+ _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
+
+ getField t i = case t of
+ R rs -> idx rs i
+ RP _ r -> getField r i
+ TM -> TM
+ _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
+```
+The result of linearization is usually a record, which is realized as
+a string using the following algorithm.
+```
+ realize :: Term -> String
+ realize trm = case trm of
+ R (t:_) -> realize t
+ S ss -> unwords $ map realize ss
+ K s -> s
+ W s t -> s ++ realize t
+ FV (t:_) -> realize t -- TODO: all variants
+ TM -> "?"
+```
+Notice that realization always picks the first field of a record.
+If a linearization type has more than one field, the first field
+does not necessarily contain the desired string.
+Also notice that the order of record fields in GFCC is not necessarily
+the same as in GF source.
diff --git a/src/GF/Grammar/Lookup.hs b/src/GF/Grammar/Lookup.hs
index 481512751..81a62decf 100644
--- a/src/GF/Grammar/Lookup.hs
+++ b/src/GF/Grammar/Lookup.hs
@@ -231,10 +231,10 @@ lookupAbsDef gr m c = errIn ("looking up absdef of" +++ prt c) $ do
_ -> Bad $ prt m +++ "is not an abstract module"
linTypeInt :: Type
-linTypeInt =
- let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
- RecType [
- (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]
+linTypeInt = defLinType
+--- let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
+--- RecType [
+--- (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]
lookupLincat :: SourceGrammar -> Ident -> Ident -> Err Type
lookupLincat gr m c | elem c [zIdent "Int"] = return linTypeInt