diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2013-09-27 15:09:48 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2013-09-27 15:09:48 +0000 |
| commit | 426bc49a52b4efa0ef0129d713842d8c9abdf0ff (patch) | |
| tree | d9f5985559de0347448e77ff26ce5a2d3ee2f245 /src | |
| parent | b138899512d9aea248160eb17df3007e55dd03da (diff) | |
a major refactoring in the C and the Haskell runtimes. Note incompatible change in the PGF format!!!
The following are the outcomes:
- Predef.nonExist is fully supported by both the Haskell and the C runtimes
- Predef.BIND is now an internal compiler defined token. For now
it behaves just as usual for the Haskell runtime, i.e. it generates &+.
However, the special treatment will let us to handle it properly in
the C runtime.
- This required a major change in the PGF format since both
nonExist and BIND may appear inside 'pre' and this was not supported
before.
Diffstat (limited to 'src')
28 files changed, 331 insertions, 372 deletions
diff --git a/src/compiler/GF/Compile/Compute/AppPredefined.hs b/src/compiler/GF/Compile/Compute/AppPredefined.hs index 869052e0a..861a74a89 100644 --- a/src/compiler/GF/Compile/Compute/AppPredefined.hs +++ b/src/compiler/GF/Compile/Compute/AppPredefined.hs @@ -84,6 +84,8 @@ primitives = Map.fromList [(Explicit,varL,typeType),(Explicit,identW,mkFunType [typeStr] typeStr),(Explicit,identW,Vr varL)] (Vr varL) []))) Nothing) , (cNonExist , ResOper (Just (noLoc (mkProd -- Str [] typeStr []))) Nothing) + , (cBIND , ResOper (Just (noLoc (mkProd -- Str + [] typeStr []))) Nothing) ] where fun from to = oper (mkFunType from to) diff --git a/src/compiler/GF/Compile/Compute/Predef.hs b/src/compiler/GF/Compile/Compute/Predef.hs index 11c4002b8..b8b7f7c77 100644 --- a/src/compiler/GF/Compile/Compute/Predef.hs +++ b/src/compiler/GF/Compile/Compute/Predef.hs @@ -78,7 +78,7 @@ predefList = (cError,Error), -- Canonical values: (cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int), - (cInts,Ints),(cNonExist,NonExist)] + (cInts,Ints),(cNonExist,NonExist),(cBIND,BIND)] --- add more functions!!! delta f vs = @@ -106,6 +106,7 @@ delta f vs = PFalse -> canonical PTrue -> canonical NonExist-> canonical + BIND -> canonical where canonical = delay delay = return (VApp f vs) -- wrong number of arguments diff --git a/src/compiler/GF/Compile/Compute/Value.hs b/src/compiler/GF/Compile/Compute/Value.hs index 7dbaaa193..e72b06778 100644 --- a/src/compiler/GF/Compile/Compute/Value.hs +++ b/src/compiler/GF/Compile/Compute/Value.hs @@ -51,5 +51,5 @@ data Predefined = Drop | Take | Tk | Dp | EqStr | Occur | Occurs | ToUpper {- | Show | Read | ToStr | MapStr | EqVal -} | Error -- Canonical values below: - | PBool | PFalse | PTrue | Int | Ints | NonExist + | PBool | PFalse | PTrue | Int | Ints | NonExist | BIND deriving (Show,Eq,Ord,Ix,Bounded,Enum) diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 0afa2bd49..9642110bc 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -14,7 +14,7 @@ module GF.Compile.GeneratePMCFG ) where import PGF.CId -import PGF.Data(Alternative(..),CncCat(..),Symbol(..),fidVar) +import PGF.Data(CncCat(..),Symbol(..),fidVar) import GF.Infra.Option import GF.Grammar hiding (Env, mkRecord, mkTable) @@ -376,30 +376,24 @@ convertTerm opts sel ctype (FV vars) = do term <- variants vars convertTerm opts sel ctype (C t1 t2) = do v1 <- convertTerm opts sel ctype t1 v2 <- convertTerm opts sel ctype t2 return (CStr (concat [s | CStr s <- [v1,v2]])) -convertTerm opts sel ctype (K t) = return (CStr [SymKS [t]]) +convertTerm opts sel ctype (K t) = return (CStr [SymKS t]) convertTerm opts sel ctype Empty = return (CStr []) -convertTerm opts sel ctype (Alts s alts) - = return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]]) - where - strings (K s) = [s] - strings (C u v) = strings u ++ strings v - strings (Strs ss) = concatMap strings ss - strings (EPatt p) = getPatts p - strings Empty = [""] - strings t = bug $ "strings "++show t - - getPatts p = - case p of - PAlt a b -> getPatts a ++ getPatts b - PString s -> [s] - PSeq a b -> [s ++ t | s <- getPatts a, t <- getPatts b] - _ -> ppbug $ hang (text "not valid pattern in pre expression:") - 4 - (ppPatt Unqualified 0 p) +convertTerm opts sel ctype (Alts s alts)= do CStr s <- convertTerm opts CNil ctype s + alts <- forM alts $ \(u,Strs ps) -> do + CStr u <- convertTerm opts CNil ctype u + ps <- mapM (convertTerm opts CNil ctype) ps + return (u,map unSym ps) + return (CStr [SymKP s alts]) + where + unSym (CStr []) = "" + unSym (CStr [SymKS t]) = t + unSym _ = ppbug $ hang (text "invalid prefix in pre expression:") 4 (ppU 0 (Alts s alts)) convertTerm opts sel ctype (Q (m,f)) | m == cPredef && f == cNonExist = return (CStr [SymNE]) + | m == cPredef && + f == cBIND = return (CStr [SymBIND]) convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2)) | l `elem` map fst rs2 = convertTerm opts sel ctype t2 @@ -492,7 +486,7 @@ addSequencesV seqs (CRec vs) = let !(seqs1,vs1) = mapAccumL' (\seqs (lbl,b) -> addSequencesV seqs (CTbl pt vs)=let !(seqs1,vs1) = mapAccumL' (\seqs (trm,b) -> let !(seqs',b') = addSequencesB seqs b in (seqs',(trm,b'))) seqs vs in (seqs1,CTbl pt vs1) -addSequencesV seqs (CStr lin) = let !(seqs1,seqid) = addSequence seqs (optimizeLin lin) +addSequencesV seqs (CStr lin) = let !(seqs1,seqid) = addSequence seqs lin in (seqs1,CStr seqid) addSequencesV seqs (CPar i) = (seqs,CPar i) @@ -502,16 +496,6 @@ mapAccumL' f s (x:xs) = (s'',y:ys) where !(s', y ) = f s x !(s'',ys) = mapAccumL' f s' xs -optimizeLin [] = [] -optimizeLin lin@(SymKS _ : _) = - let (ts,lin') = getRest lin - in SymKS ts : optimizeLin lin' - where - getRest (SymKS ts : lin) = let (ts1,lin') = getRest lin - in (ts++ts1,lin') - getRest lin = ([],lin) -optimizeLin (sym : lin) = sym : optimizeLin lin - addSequence :: SeqSet -> [Symbol] -> (SeqSet,SeqId) addSequence seqs lst = case Map.lookup seq seqs of @@ -629,4 +613,4 @@ mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] bug msg = ppbug (text msg) ppbug = error . render . hang (text "Internal error in GeneratePMCFG:") 4 -ppU = ppTerm Unqualified
\ No newline at end of file +ppU = ppTerm Unqualified diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index b7b3d5545..5cb01fac4 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -85,10 +85,12 @@ sym2js :: Symbol -> JS.Expr sym2js (SymCat n l) = new "SymCat" [JS.EInt n, JS.EInt l] sym2js (SymLit n l) = new "SymLit" [JS.EInt n, JS.EInt l] sym2js (SymVar n l) = new "SymVar" [JS.EInt n, JS.EInt l] -sym2js (SymKS ts) = new "SymKS" (map JS.EStr ts) -sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map JS.EStr ts), JS.EArray (map alt2js alts)] +sym2js (SymKS t) = new "SymKS" [JS.EStr t] +sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)] +sym2js SymNE = new "SymNE" [] +sym2js SymBIND = new "SymKS" [JS.EStr "&+"] -alt2js (Alt ps ts) = new "Alt" [JS.EArray (map JS.EStr ps), JS.EArray (map JS.EStr ts)] +alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)] new :: String -> [JS.Expr] -> JS.Expr new f xs = JS.ENew (JS.Ident f) xs diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs index de50d86d1..02993ac65 100644 --- a/src/compiler/GF/Compile/PGFtoProlog.hs +++ b/src/compiler/GF/Compile/PGFtoProlog.hs @@ -136,9 +136,9 @@ instance PLPrint Symbol where plp (SymCat n l) = plOper ":" (show n) (show l) plp (SymLit n l) = plTerm "lit" [show n, show l] plp (SymVar n l) = plTerm "var" [show n, show l] - plp (SymKS ts) = prTList "," (map plAtom ts) - plp (SymKP ts alts) = plTerm "pre" [plList (map plAtom ts), plList (map plAlt alts)] - where plAlt (Alt ps ts) = plOper "/" (plList (map plAtom ps)) (plList (map plAtom ts)) + plp (SymKS t) = plAtom t + plp (SymKP ts alts) = plTerm "pre" [plList (map plp ts), plList (map plAlt alts)] + where plAlt (ps,ts) = plOper "/" (plList (map plp ps)) (plList (map plAtom ts)) class PLPrint a where plp :: a -> String diff --git a/src/compiler/GF/Compile/PGFtoPython.hs b/src/compiler/GF/Compile/PGFtoPython.hs index a4268b714..1877f8d70 100644 --- a/src/compiler/GF/Compile/PGFtoPython.hs +++ b/src/compiler/GF/Compile/PGFtoPython.hs @@ -75,9 +75,9 @@ pySymbol :: Symbol -> String pySymbol (SymCat n l) = pyTuple 0 show [n, l] pySymbol (SymLit n l) = pyDict 0 pyStr id [("lit", pyTuple 0 show [n, l])] pySymbol (SymVar n l) = pyDict 0 pyStr id [("var", pyTuple 0 show [n, l])] -pySymbol (SymKS ts) = prTList "," (map pyStr ts) -pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pyStr ts), ("alts", pyList 0 alt2py alts)] - where alt2py (Alt ps ts) = pyTuple 0 (pyList 0 pyStr) [ps, ts] +pySymbol (SymKS t) = pyStr t +pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pySymbol ts), ("alts", pyList 0 alt2py alts)] + where alt2py (ps,ts) = pyTuple 0 (pyList 0 pyStr) [map pySymbol ps, ts] ---------------------------------------------------------------------- -- python helpers diff --git a/src/compiler/GF/Grammar/Predef.hs b/src/compiler/GF/Grammar/Predef.hs index 8bee8dcb5..b814dd110 100644 --- a/src/compiler/GF/Grammar/Predef.hs +++ b/src/compiler/GF/Grammar/Predef.hs @@ -30,6 +30,7 @@ cErrorType = identS "Error" cOverload = identS "overload" cUndefinedType = identS "UndefinedType" cNonExist = identS "nonExist" +cBIND = identS "BIND" isPredefCat :: Ident -> Bool isPredefCat c = elem c [cInt,cString,cFloat] diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index 163f02537..39c5b2a32 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -91,8 +91,8 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co symbolToCFSymbol :: Symbol -> [CFSymbol] symbolToCFSymbol (SymCat n l) = [let PArg _ fid = args!!n in NonTerminal (fcatToCat fid l)] - symbolToCFSymbol (SymKS ts) = map Terminal ts - symbolToCFSymbol (SymKP ts as) = map Terminal $ ts + symbolToCFSymbol (SymKS t) = [Terminal t] + symbolToCFSymbol (SymKP syms as) = concatMap symbolToCFSymbol syms ---- ++ [t | Alt ss _ <- as, t <- ss] ---- should be alternatives in [[CFSymbol]] ---- AR 3/6/2010 diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 7717f89f9..2d7fc450e 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -131,8 +131,11 @@ extern GU_DECLARE_TYPE(PgfCncCat, abstract); bool pgf_tokens_equal(PgfTokens* t1, PgfTokens* t2); +typedef GuSeq PgfSequence; // -> PgfSymbol +typedef GuSeq PgfSequences; + typedef struct { - PgfTokens* form; + PgfSequence* form; /**< The form of this variant as a list of tokens. */ GuStrings* prefixes; @@ -175,7 +178,8 @@ typedef enum { PGF_SYMBOL_VAR, PGF_SYMBOL_KS, PGF_SYMBOL_KP, - PGF_SYMBOL_NE + PGF_SYMBOL_NE, + PGF_SYMBOL_BIND } PgfSymbolTag; typedef struct { @@ -186,14 +190,14 @@ typedef struct { typedef PgfSymbolIdx PgfSymbolCat, PgfSymbolLit, PgfSymbolVar; typedef struct { - PgfTokens* tokens; + PgfToken token; } PgfSymbolKS; typedef struct PgfSymbolKP /** A prefix-dependent symbol. The form that this symbol takes * depends on the form of a prefix of the following symbol. */ { - PgfTokens* default_form; + PgfSequence* default_form; /**< Default form that this symbol takes if none of of the * variant forms is triggered. */ @@ -206,8 +210,8 @@ typedef struct PgfSymbolKP typedef struct { } PgfSymbolNE; -typedef GuSeq PgfSequence; // -> PgfSymbol -typedef GuSeq PgfSequences; +typedef struct { +} PgfSymbolBIND; typedef struct { PgfAbsFun* absfun; diff --git a/src/runtime/c/pgf/graphviz.c b/src/runtime/c/pgf/graphviz.c index 5190d2fee..1003c4e8e 100644 --- a/src/runtime/c/pgf/graphviz.c +++ b/src/runtime/c/pgf/graphviz.c @@ -116,18 +116,15 @@ typedef struct { } PgfBracketLznState; static void -pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - PgfParseNode* node = gu_new(PgfParseNode, state->pool); - node->id = 100000 + gu_buf_length(state->leaves); - node->parent = state->parent; - node->label = gu_seq_get(toks, PgfToken, i); - gu_buf_push(state->leaves, PgfParseNode*, node); - } + PgfParseNode* node = gu_new(PgfParseNode, state->pool); + node->id = 100000 + gu_buf_length(state->leaves); + node->parent = state->parent; + node->label = tok; + gu_buf_push(state->leaves, PgfParseNode*, node); } static void @@ -214,7 +211,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, } static PgfLinFuncs pgf_bracket_lin_funcs = { - .symbol_tokens = pgf_bracket_lzn_symbol_tokens, + .symbol_token = pgf_bracket_lzn_symbol_token, .expr_literal = pgf_bracket_lzn_expr_literal, .begin_phrase = pgf_bracket_lzn_begin_phrase, .end_phrase = pgf_bracket_lzn_end_phrase diff --git a/src/runtime/c/pgf/linearizer.c b/src/runtime/c/pgf/linearizer.c index 0a29db824..6a3eb8c9d 100644 --- a/src/runtime/c/pgf/linearizer.c +++ b/src/runtime/c/pgf/linearizer.c @@ -454,6 +454,50 @@ pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool) } void +pgf_lzr_linearize_sequence(PgfConcr* concr, + PgfCncTreeApp* fapp, PgfSequence* seq, + PgfLinFuncs** fnsp) +{ + size_t nsyms = gu_seq_length(seq); + PgfSymbol* syms = gu_seq_data(seq); + for (size_t i = 0; i < nsyms; i++) { + PgfSymbol sym = syms[i]; + GuVariantInfo sym_i = gu_variant_open(sym); + switch (sym_i.tag) { + case PGF_SYMBOL_CAT: + case PGF_SYMBOL_VAR: + case PGF_SYMBOL_LIT: { + PgfSymbolIdx* sidx = sym_i.data; + gu_assert((unsigned) sidx->d < fapp->n_args); + + PgfCncTree argf = fapp->args[sidx->d]; + pgf_lzr_linearize(concr, argf, sidx->r, fnsp); + break; + } + case PGF_SYMBOL_KS: { + PgfSymbolKS* ks = sym_i.data; + if ((*fnsp)->symbol_token) { + (*fnsp)->symbol_token(fnsp, ks->token); + } + break; + } + case PGF_SYMBOL_KP: { + // TODO: correct prefix-dependencies + PgfSymbolKP* kp = sym_i.data; + pgf_lzr_linearize_sequence(concr, fapp, kp->default_form, fnsp); + break; + } + case PGF_SYMBOL_NE: { + // Nothing to be done here + break; + } + default: + gu_impossible(); + } + } +} + +void pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp) { PgfLinFuncs* fns = *fnsp; @@ -472,47 +516,9 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs } gu_require(lin_idx < fun->n_lins); + PgfSequence* seq = fun->lins[lin_idx]; - size_t nsyms = gu_seq_length(seq); - PgfSymbol* syms = gu_seq_data(seq); - for (size_t i = 0; i < nsyms; i++) { - PgfSymbol sym = syms[i]; - GuVariantInfo sym_i = gu_variant_open(sym); - switch (sym_i.tag) { - case PGF_SYMBOL_CAT: - case PGF_SYMBOL_VAR: - case PGF_SYMBOL_LIT: { - PgfSymbolIdx* sidx = sym_i.data; - gu_assert((unsigned) sidx->d < fapp->n_args); - - PgfCncTree argf = fapp->args[sidx->d]; - pgf_lzr_linearize(concr, argf, sidx->r, fnsp); - break; - } - case PGF_SYMBOL_KS: { - PgfSymbolKS* ks = sym_i.data; - if (fns->symbol_tokens) { - fns->symbol_tokens(fnsp, ks->tokens); - } - break; - } - case PGF_SYMBOL_KP: { - // TODO: correct prefix-dependencies - PgfSymbolKP* kp = sym_i.data; - if (fns->symbol_tokens) { - fns->symbol_tokens(fnsp, - kp->default_form); - } - break; - } - case PGF_SYMBOL_NE: { - // Nothing to be done here - break; - } - default: - gu_impossible(); - } - } + pgf_lzr_linearize_sequence(concr, fapp, seq, fnsp); if (fns->end_phrase) { fns->end_phrase(fnsp, @@ -572,22 +578,18 @@ struct PgfSimpleLin { }; static void -pgf_file_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs); if (!gu_ok(flin->err)) { return; } - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - if (flin->n_tokens > 0) - gu_putc(' ', flin->out, flin->err); + if (flin->n_tokens > 0) + gu_putc(' ', flin->out, flin->err); - PgfToken tok = gu_seq_get(toks, PgfToken, i); - gu_string_write(tok, flin->out, flin->err); - - flin->n_tokens++; - } + gu_string_write(tok, flin->out, flin->err); + + flin->n_tokens++; } static void @@ -626,10 +628,10 @@ pgf_file_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit) } static PgfLinFuncs pgf_file_lin_funcs = { - .symbol_tokens = pgf_file_lzn_symbol_tokens, - .expr_literal = pgf_file_lzn_expr_literal, - .begin_phrase = NULL, - .end_phrase = NULL, + .symbol_token = pgf_file_lzn_symbol_token, + .expr_literal = pgf_file_lzn_expr_literal, + .begin_phrase = NULL, + .end_phrase = NULL, }; void diff --git a/src/runtime/c/pgf/linearizer.h b/src/runtime/c/pgf/linearizer.h index bd143c1c2..ea240dc32 100644 --- a/src/runtime/c/pgf/linearizer.h +++ b/src/runtime/c/pgf/linearizer.h @@ -51,7 +51,7 @@ typedef struct PgfLinFuncs PgfLinFuncs; struct PgfLinFuncs { /// Output tokens - void (*symbol_tokens)(PgfLinFuncs** self, PgfTokens* toks); + void (*symbol_token)(PgfLinFuncs** self, PgfToken tok); /// Output literal void (*expr_literal)(PgfLinFuncs** self, PgfLiteral lit); diff --git a/src/runtime/c/pgf/literals.c b/src/runtime/c/pgf/literals.c index a11097781..7e0c664c6 100644 --- a/src/runtime/c/pgf/literals.c +++ b/src/runtime/c/pgf/literals.c @@ -42,7 +42,7 @@ pgf_match_string_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, gu_new_variant(PGF_LITERAL_STR, PgfLiteralStr, &expr_lit->lit, pool); - lit_str->val = gu_seq_get(sks->tokens, PgfToken, 0); + lit_str->val = sks->token; *out_ep = ep; accepted = false; @@ -80,10 +80,9 @@ pgf_match_int_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, } else if (n_syms == 1) { PgfSymbolKS* sks = gu_variant_data(gu_seq_get(seq, PgfSymbol, 0)); - PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0); int val; - if (!gu_string_to_int(tok, &val)) { + if (!gu_string_to_int(sks->token, &val)) { *out_ep = NULL; } else { PgfExprProb* ep = gu_new(PgfExprProb, pool); @@ -137,10 +136,9 @@ pgf_match_float_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, } else if (n_syms == 1) { PgfSymbolKS* sks = gu_variant_data(gu_seq_get(seq, PgfSymbol, 0)); - PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0); double val; - if (!gu_string_to_double(tok, &val)) { + if (!gu_string_to_double(sks->token, &val)) { *out_ep = NULL; } else { PgfExprProb* ep = gu_new(PgfExprProb, pool); @@ -209,9 +207,8 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i); gu_assert(gu_variant_tag(sym) == PGF_SYMBOL_KS); PgfSymbolKS* sks = gu_variant_data(sym); - PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0); - gu_string_write(tok, out, err); + gu_string_write(sks->token, out, err); } PgfExprProb* ep = gu_new(PgfExprProb, pool); diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 92c689fae..188672dd3 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -133,8 +133,8 @@ struct PgfItem { PgfPArgs* args; PgfSymbol curr_sym; uint16_t seq_idx; - uint8_t tok_idx; - uint8_t alt; + uint8_t alt_idx; // position in the pre alternative + uint8_t alt; // the number of the alternative prob_t inside_prob; }; @@ -694,7 +694,7 @@ pgf_new_item(PgfItemConts* conts, PgfProduction prod, item->prod = prod; item->curr_sym = gu_null_variant; item->seq_idx = 0; - item->tok_idx = 0; + item->alt_idx = 0; item->alt = 0; conts->ref_count++; @@ -758,8 +758,12 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat, static void pgf_item_advance(PgfItem* item, GuPool* pool) { - item->seq_idx++; - pgf_item_set_curr_symbol(item, pool); + if (GU_LIKELY(item->alt == 0)) { + item->seq_idx++; + pgf_item_set_curr_symbol(item, pool); + } + else + item->alt_idx++; } static void @@ -1133,8 +1137,7 @@ pgf_parsing_meta_scan(PgfParseState* before, PgfParseState* after, gu_alignof(PgfSymbolKS), &item->curr_sym, after->ps->pool); *((PgfSymbol*)(sks+1)) = prev; - sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool); - gu_seq_set(sks->tokens, PgfToken, 0, tok); + sks->token = tok; gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item); } @@ -1218,76 +1221,54 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after, case PGF_SYMBOL_KS: { if (after != NULL) { PgfSymbolKS* sks = gu_variant_data(sym); - gu_assert(item->tok_idx < gu_seq_length(sks->tokens)); - PgfToken tok = - gu_seq_get(sks->tokens, PgfToken, item->tok_idx++); - if (item->tok_idx == gu_seq_length(sks->tokens)) { - item->tok_idx = 0; - pgf_item_advance(item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, item); + pgf_item_advance(item, after->ps->pool); + pgf_parsing_add_transition(before, after, sks->token, item); } break; } case PGF_SYMBOL_KP: { if (after != NULL) { PgfSymbolKP* skp = gu_variant_data(sym); - size_t idx = item->tok_idx; - uint8_t alt = item->alt; - gu_assert(idx < gu_seq_length(skp->default_form)); - if (idx == 0) { - PgfToken tok; + + PgfSymbol sym; + if (item->alt == 0) { PgfItem* new_item; - - tok = gu_seq_get(skp->default_form, PgfToken, 0); + new_item = pgf_item_copy(item, after->ps->pool, after->ps); - new_item->tok_idx++; - if (new_item->tok_idx == gu_seq_length(skp->default_form)) { - new_item->tok_idx = 0; - pgf_item_advance(new_item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, new_item); + new_item->alt = 1; + new_item->alt_idx = 0; + sym = gu_seq_get(skp->default_form, PgfSymbol, new_item->alt_idx); + pgf_parsing_symbol(before, after, new_item, sym); for (size_t i = 0; i < skp->n_forms; i++) { - // XXX: do nubbing properly - PgfTokens* toks = skp->forms[i].form; - PgfTokens* toks2 = skp->default_form; - bool skip = pgf_tokens_equal(toks, toks2); + PgfSequence* syms = skp->forms[i].form; + PgfSequence* syms2 = skp->default_form; + bool skip = false; /*pgf_tokens_equal(toks, toks2); for (size_t j = 0; j < i; j++) { PgfTokens* toks2 = skp->forms[j].form; skip |= pgf_tokens_equal(toks, toks2); - } + }*/ if (!skip) { - tok = gu_seq_get(toks, PgfToken, 0); new_item = pgf_item_copy(item, after->ps->pool, after->ps); - new_item->tok_idx++; - new_item->alt = i; - if (new_item->tok_idx == gu_seq_length(toks)) { - new_item->tok_idx = 0; - pgf_item_advance(new_item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, new_item); + new_item->alt = i+2; + new_item->alt_idx = 0; + sym = gu_seq_get(syms, PgfSymbol, new_item->alt_idx); + pgf_parsing_symbol(before, after, new_item, sym); } } - } else if (alt == 0) { - PgfToken tok = - gu_seq_get(skp->default_form, PgfToken, idx); - item->tok_idx++; - if (item->tok_idx == gu_seq_length(skp->default_form)) { - item->tok_idx = 0; - pgf_item_advance(item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, item); } else { - gu_assert(alt <= skp->n_forms); - PgfTokens* toks = skp->forms[alt - 1].form; - PgfToken tok = gu_seq_get(toks, PgfToken, idx); - item->tok_idx++; - if (item->tok_idx == gu_seq_length(toks)) { - item->tok_idx = 0; + PgfSequence* syms = + (item->alt == 1) ? skp->default_form : + skp->forms[item->alt-2].form; + + if (item->alt_idx < gu_seq_length(syms)) { + sym = gu_seq_get(syms, PgfSymbol, item->alt_idx); + pgf_parsing_symbol(before, after, item, sym); + } else { + item->alt = 0; pgf_item_advance(item, after->ps->pool); + gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item); } - pgf_parsing_add_transition(before, after, tok, item); } } break; @@ -1357,7 +1338,7 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after, // XXX TODO proper support break; case PGF_SYMBOL_NE: { - // Nothing to be done here + pgf_item_free(before, after, item); break; } default: @@ -1450,8 +1431,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item) gu_alignof(PgfSymbolKS), &item->curr_sym, after->ps->pool); *((PgfSymbol*)(sks+1)) = prev; - sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool); - gu_seq_set(sks->tokens, PgfToken, 0, tok); + sks->token = tok; item->seq_idx++; pgf_parsing_add_transition(before, after, tok, item); @@ -1755,9 +1735,7 @@ typedef struct { } PgfPrefixTokenState; static GuString -pgf_get_tokens(PgfSequence* seq, - uint16_t seq_idx, uint8_t tok_idx, - GuPool* pool) +pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool) { GuPool* tmp_pool = gu_new_pool(); GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool); @@ -1773,17 +1751,7 @@ pgf_get_tokens(PgfSequence* seq, switch (i.tag) { case PGF_SYMBOL_KS: { PgfSymbolKS* symks = i.data; - size_t len = gu_seq_length(symks->tokens); - for (size_t i = tok_idx; i < len; i++) { - if (i > 0) { - gu_putc(' ', out, err); - } - - PgfToken tok = gu_seq_get(symks->tokens, PgfToken, i); - gu_string_write(tok, out, err); - } - - tok_idx = 0; + gu_string_write(symks->token, out, err); } default: goto end; @@ -1809,18 +1777,9 @@ pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item) PgfSequence* seq; pgf_item_sequence(item, &lin_idx, &seq, ts->pool); - uint16_t seq_idx = item->seq_idx; - uint8_t tok_idx = item->tok_idx; - - // go one token back - if (tok_idx > 0) - tok_idx--; - else - seq_idx--; - ts->tp = gu_new(PgfTokenProb, ts->pool); ts->tp->tok = - pgf_get_tokens(seq, seq_idx, tok_idx, ts->pool); + pgf_get_tokens(seq, item->seq_idx-1, ts->pool); ts->tp->cat = item->conts->ccat->cnccat->abscat->name; ts->tp->prob = item->inside_prob+item->conts->outside_prob; } @@ -2346,17 +2305,15 @@ pgf_morpho_iter(GuMapItor* fn, const void* key, void* value, GuExn* err) switch (i.tag) { case PGF_SYMBOL_KS: { PgfSymbolKS* symks = i.data; - size_t len = gu_seq_length(symks->tokens); - for (size_t i = 0; i < len; i++) { - if (pos >= gu_seq_length(clo->tokens)) - goto cont; - - PgfToken tok1 = gu_seq_get(symks->tokens, PgfToken, i); - PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++); - - if (!gu_string_eq(tok1, tok2)) - goto cont; - } + + if (pos >= gu_seq_length(clo->tokens)) + goto cont; + + PgfToken tok1 = symks->token; + PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++); + + if (!gu_string_eq(tok1, tok2)) + goto cont; } default: continue; @@ -2443,7 +2400,7 @@ pgf_fullform_iter(GuMapItor* fn, const void* key, void* value, GuExn* err) PgfProductionApply* papp = i.data; PgfSequence* seq = papp->fun->lins[cfc.lin_idx]; - GuString tokens = pgf_get_tokens(seq, 0, 0, st->pool); + GuString tokens = pgf_get_tokens(seq, 0, st->pool); // create a new production index with keys that // are multiword units @@ -2531,12 +2488,10 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry, static void pgf_parser_index_token(PgfConcr* concr, - PgfTokens* tokens, + PgfToken tok, PgfCCat* ccat, size_t lin_idx, PgfProduction prod, GuPool *pool) { - PgfToken tok = gu_seq_get(tokens, PgfToken, 0); - PgfProductionIdx* set = gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*); if (set == NULL) { @@ -2570,6 +2525,47 @@ pgf_parser_index_epsilon(PgfConcr* concr, gu_buf_push(prods, PgfProduction, prod); } +static void +pgf_parser_index_symbol(PgfConcr* concr, PgfSymbol sym, + PgfCCat* ccat, size_t lin_idx, PgfProduction prod, + GuPool *pool) +{ + GuVariantInfo i = gu_variant_open(sym); + switch (i.tag) { + case PGF_SYMBOL_KS: { + PgfSymbolKS* sks = i.data; + pgf_parser_index_token(concr, + sks->token, + ccat, lin_idx, prod, + pool); + break; + } + case PGF_SYMBOL_KP: { + PgfSymbolKP* skp = i.data; + PgfSymbol sym = + gu_seq_get(skp->default_form, PgfSymbol, 0); + pgf_parser_index_symbol(concr, sym, + ccat, lin_idx, prod, + pool); + for (size_t i = 0; i < skp->n_forms; i++) { + sym = gu_seq_get(skp->forms[i].form, PgfSymbol, 0); + pgf_parser_index_symbol(concr, sym, + ccat, lin_idx, prod, + pool); + } + break; + } + case PGF_SYMBOL_CAT: + case PGF_SYMBOL_LIT: + case PGF_SYMBOL_NE: + case PGF_SYMBOL_VAR: + // Nothing to be done here + break; + default: + gu_impossible(); + } +} + void pgf_parser_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod, @@ -2586,39 +2582,9 @@ pgf_parser_index(PgfConcr* concr, PgfSequence* seq = papp->fun->lins[lin_idx]; if (gu_seq_length(seq) > 0) { - GuVariantInfo i = gu_variant_open(gu_seq_get(seq, PgfSymbol, 0)); - switch (i.tag) { - case PGF_SYMBOL_KS: { - PgfSymbolKS* sks = i.data; - pgf_parser_index_token(concr, - sks->tokens, - ccat, lin_idx, prod, - pool); - break; - } - case PGF_SYMBOL_KP: { - PgfSymbolKP* skp = i.data; - pgf_parser_index_token(concr, - skp->default_form, - ccat, lin_idx, prod, - pool); - for (size_t i = 0; i < skp->n_forms; i++) { - pgf_parser_index_token(concr, - skp->forms[i].form, - ccat, lin_idx, prod, - pool); - } - break; - } - case PGF_SYMBOL_CAT: - case PGF_SYMBOL_LIT: - case PGF_SYMBOL_NE: - case PGF_SYMBOL_VAR: - // Nothing to be done here - break; - default: - gu_impossible(); - } + pgf_parser_index_symbol(concr, gu_seq_get(seq, PgfSymbol, 0), + ccat, lin_idx, prod, + pool); } else { pgf_parser_index_epsilon(concr, ccat, lin_idx, prod, diff --git a/src/runtime/c/pgf/parseval.c b/src/runtime/c/pgf/parseval.c index 84d93b346..8b38d252d 100644 --- a/src/runtime/c/pgf/parseval.c +++ b/src/runtime/c/pgf/parseval.c @@ -19,19 +19,14 @@ typedef struct { } PgfMetricsLznState; static void -pgf_metrics_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); - - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - PgfToken tok = gu_seq_get(toks, PgfToken, i); - - if (state->ps != NULL) - state->ps = pgf_parser_next_state(state->ps, tok); + + if (state->ps != NULL) + state->ps = pgf_parser_next_state(state->ps, tok); - state->pos++; - } + state->pos++; } static void @@ -128,17 +123,17 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id } static PgfLinFuncs pgf_metrics_lin_funcs1 = { - .symbol_tokens = pgf_metrics_lzn_symbol_tokens, - .expr_literal = pgf_metrics_lzn_expr_literal, - .begin_phrase = pgf_metrics_lzn_begin_phrase, - .end_phrase = pgf_metrics_lzn_end_phrase1 + .symbol_token = pgf_metrics_lzn_symbol_token, + .expr_literal = pgf_metrics_lzn_expr_literal, + .begin_phrase = pgf_metrics_lzn_begin_phrase, + .end_phrase = pgf_metrics_lzn_end_phrase1 }; static PgfLinFuncs pgf_metrics_lin_funcs2 = { - .symbol_tokens = pgf_metrics_lzn_symbol_tokens, - .expr_literal = pgf_metrics_lzn_expr_literal, - .begin_phrase = pgf_metrics_lzn_begin_phrase, - .end_phrase = pgf_metrics_lzn_end_phrase2 + .symbol_token = pgf_metrics_lzn_symbol_token, + .expr_literal = pgf_metrics_lzn_expr_literal, + .begin_phrase = pgf_metrics_lzn_begin_phrase, + .end_phrase = pgf_metrics_lzn_end_phrase2 }; bool diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index 79aac5afd..82ce0bf45 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -196,19 +196,16 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences, } static void -pgf_print_tokens(PgfTokens* tokens, GuOut *out, GuExn *err) +pgf_print_token(PgfToken tok, GuOut *out, GuExn *err) { gu_putc('"', out, err); - size_t n_toks = gu_seq_length(tokens); - for (size_t i = 0; i < n_toks; i++) { - if (i > 0) gu_putc(' ', out, err); - - PgfToken tok = gu_seq_get(tokens, PgfToken, i); - gu_string_write(tok, out, err); - } + gu_string_write(tok, out, err); gu_putc('"', out, err); } +static void +pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err); + void pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) { @@ -220,18 +217,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) } case PGF_SYMBOL_KS: { PgfSymbolKS* sks = gu_variant_data(sym); - pgf_print_tokens(sks->tokens, out, err); + pgf_print_token(sks->token, out, err); break; } case PGF_SYMBOL_KP: { PgfSymbolKP* skp = gu_variant_data(sym); gu_puts("pre {", out, err); - pgf_print_tokens(skp->default_form, out, err); + pgf_print_sequence(skp->default_form, out, err); for (size_t i = 0; i < skp->n_forms; i++) { gu_puts("; ", out, err); - pgf_print_tokens(skp->forms[i].form, out, err); + pgf_print_sequence(skp->forms[i].form, out, err); gu_puts(" / ", out, err); size_t n_prefixes = gu_seq_length(skp->forms[i].prefixes); @@ -262,16 +259,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) gu_puts("nonExist", out, err); break; } + case PGF_SYMBOL_BIND: { + gu_puts("BIND", out, err); + break; + } default: gu_impossible(); } } static void -pgf_print_sequence(size_t seqid, PgfSequence* seq, GuOut *out, GuExn *err) +pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err) { - gu_printf(out,err," S%d := ", seqid); - int n_syms = gu_seq_length(seq); for (int i = 0; i < n_syms; i++) { if (i > 0) gu_putc(' ', out, err); @@ -279,8 +278,6 @@ pgf_print_sequence(size_t seqid, PgfSequence* seq, GuOut *out, GuExn *err) PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i); pgf_print_symbol(sym, out, err); } - - gu_putc('\n', out, err); } static void @@ -342,7 +339,10 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr, size_t n_seqs = gu_seq_length(concr->sequences); for (size_t i = 0; i < n_seqs; i++) { PgfSequence* seq = gu_seq_get(concr->sequences, PgfSequence*, i); - pgf_print_sequence(i, seq, out, err); + + gu_printf(out,err," S%d := ", i); + pgf_print_sequence(seq, out, err); + gu_putc('\n', out, err); } gu_puts(" categories\n", out, err); diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 95b5a4c04..ebc5050e4 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -586,27 +586,13 @@ pgf_read_printnames(PgfReader* rdr) return printnames; } -static PgfTokens* -pgf_read_tokens(PgfReader* rdr) -{ - size_t len = pgf_read_len(rdr); - gu_return_on_exn(rdr->err, NULL); - - PgfTokens* tokens = gu_new_seq(PgfToken, len, rdr->opool); - for (size_t i = 0; i < len; i++) { - PgfToken token = pgf_read_string(rdr); - gu_return_on_exn(rdr->err, NULL); - - gu_seq_set(tokens, PgfToken, i, token); - } - - return tokens; -} +static PgfSequence* +pgf_read_sequence(PgfReader* rdr); static void pgf_read_alternative(PgfReader* rdr, PgfAlternative* alt) { - alt->form = pgf_read_tokens(rdr); + alt->form = pgf_read_sequence(rdr); gu_return_on_exn(rdr->err,); size_t n_prefixes = pgf_read_len(rdr); @@ -672,12 +658,12 @@ pgf_read_symbol(PgfReader* rdr) gu_new_variant(PGF_SYMBOL_KS, PgfSymbolKS, &sym, rdr->opool); - sym_ks->tokens = pgf_read_tokens(rdr); + sym_ks->token = pgf_read_string(rdr); gu_return_on_exn(rdr->err, gu_null_variant); break; } case PGF_SYMBOL_KP: { - PgfTokens* default_form = pgf_read_tokens(rdr); + PgfSequence* default_form = pgf_read_sequence(rdr); gu_return_on_exn(rdr->err, gu_null_variant); size_t n_forms = pgf_read_len(rdr); @@ -703,6 +689,13 @@ pgf_read_symbol(PgfReader* rdr) gu_return_on_exn(rdr->err, gu_null_variant); break; } + case PGF_SYMBOL_BIND: { + gu_new_variant(PGF_SYMBOL_BIND, + PgfSymbolBIND, + &sym, rdr->opool); + gu_return_on_exn(rdr->err, gu_null_variant); + break; + } default: pgf_read_tag_error(rdr); } diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index 865f98417..3c9dcc265 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -76,10 +76,6 @@ instance Binary Concr where , cnccats=cnccats, totalCats=totalCats
})
-instance Binary Alternative where
- put (Alt v x) = put (v,x)
- get = liftM2 Alt get get
-
instance Binary Expr where
put (EAbs b x exp) = putWord8 0 >> put (b,x,exp)
put (EApp e1 e2) = putWord8 1 >> put (e1,e2)
@@ -153,6 +149,7 @@ instance Binary Symbol where put (SymKS ts) = putWord8 3 >> put ts
put (SymKP d vs) = putWord8 4 >> put (d,vs)
put SymNE = putWord8 5
+ put SymBIND = putWord8 6
get = do tag <- getWord8
case tag of
0 -> liftM2 SymCat get get
@@ -161,6 +158,7 @@ instance Binary Symbol where 3 -> liftM SymKS get
4 -> liftM2 (\d vs -> SymKP d vs) get get
5 -> return SymNE
+ 6 -> return SymBIND
_ -> decodingError
instance Binary PArg where
diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index 58ced6a1e..e86b02778 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -58,9 +58,10 @@ data Symbol = SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex | SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex | SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int - | SymKS [Token] - | SymKP [Token] [Alternative] + | SymKS Token | SymNE -- non exist + | SymBIND -- the special BIND token + | SymKP [Symbol] [([Symbol],[String])] deriving (Eq,Ord,Show) data Production = PApply {-# UNPACK #-} !FunId [PArg] @@ -75,10 +76,6 @@ type FunId = Int type SeqId = Int type BCAddr = Int -data Alternative = - Alt [Token] [String] - deriving (Eq,Ord,Show) - -- merge two PGFs; fails is differens absnames; priority to second arg diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs index 9c47583ad..e6e3c1136 100644 --- a/src/runtime/haskell/PGF/Forest.hs +++ b/src/runtime/haskell/PGF/Forest.hs @@ -80,7 +80,7 @@ bracketedTokn dp f@(Forest abs cnc forest root) = ltable = mkLinTable cnc isTrusted [] funid largs
in ((cat,fid),wildCId,either (const []) id $ getAbsTrees f arg Nothing dp,ltable)
descend forest (PCoerce fid) = render forest (PArg [] fid)
- descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [[LeafKS ts]]))
+ descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [map LeafKS ts]))
getVar (fid,_)
| fid == fidVar = wildCId
diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index cf4c78193..7ff7d9c7a 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -82,7 +82,7 @@ linTree pgf lang e = LInt n -> return (n_fid+1,((cidInt, n_fid),wildCId,[e0],([],ss (show n)))) LFlt f -> return (n_fid+1,((cidFloat, n_fid),wildCId,[e0],([],ss (show f)))) - ss s = listArray (0,0) [[LeafKS [s]]] + ss s = listArray (0,0) [[LeafKS s]] apply :: Maybe CncType -> FId -> Expr -> [CId] -> [CId] -> CId -> [Expr] -> [(FId,(CncType, CId, [Expr], LinTable))] apply mb_cty n_fid e0 ys xs f es = @@ -115,7 +115,7 @@ linTree pgf lang e = let args = [((wildCId, n_fid),wildCId,[e0],([],ss s))] return (n_fid+2,((cat,n_fid+1),wildCId,[e0],mkLinTable cnc (const True) xs funid args)) Nothing - | isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS [s]]]))) + | isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS s]]))) | otherwise -> do PCoerce fid <- maybe [] Set.toList (IntMap.lookup fid (pproductions cnc)) def (Just (cat,fid)) n_fid e0 ys xs s def Nothing n_fid e0 ys xs s = [] diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 2497d53ae..ffec9279f 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -156,9 +156,11 @@ data BracketedString -- that represents the same constituent. data BracketedTokn - = LeafKS [Token] - | LeafKP [Token] [Alternative] - | Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty + = Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty + | LeafKS Token + | LeafNE + | LeafBIND + | LeafKP [BracketedTokn] [([BracketedTokn],[String])] deriving Eq type LinTable = ([CId],Array.Array LIndex [BracketedTokn]) @@ -178,21 +180,30 @@ lengthBracketedString (Leaf _) = 1 lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss) untokn :: Maybe String -> BracketedTokn -> (Maybe String,[BracketedString]) -untokn nw (LeafKS ts) = (has_tok nw ts,map Leaf ts) -untokn nw (LeafKP d vs) = let ts = filter (not . null) (sel d vs nw) - in (has_tok nw ts,map Leaf ts) - where - sel d vs Nothing = d - sel d vs (Just w) = - case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of - v:_ -> v - _ -> d -untokn nw (Bracket_ cat fid index fun es bss) = - let (nw',bss') = mapAccumR untokn nw bss - in (nw',[Bracket cat fid index fun es (concat bss')]) - -has_tok nw [] = nw -has_tok nw (t:ts) = Just t +untokn nw bs = + case untokn nw bs of + (nw,Nothing ) -> (nw,[] ) + (nw,Just bss) -> (nw,bss) + where + untokn nw (Bracket_ cat fid index fun es bss) = + let (nw',bss') = mapAccumR untokn nw bss + in case sequence bss' of + Just bss -> (nw',Just [Bracket cat fid index fun es (concat bss)]) + Nothing -> (Nothing, Nothing) + untokn nw (LeafKS t) + | null t = (nw,Just []) + | otherwise = (Just t,Just [Leaf t]) + untokn nw LeafNE = (Nothing, Nothing) + untokn nw (LeafKP d vs) = let (nw',bss') = mapAccumR untokn nw (sel d vs nw) + in case sequence bss' of + Just bss -> (nw',Just (concat bss)) + Nothing -> (Nothing, Nothing) + where + sel d vs Nothing = d + sel d vs (Just w) = + case [v | (v,cs) <- vs, any (\c -> isPrefixOf c w) cs] of + v:_ -> v + _ -> d type CncType = (CId, FId) -- concrete type is the abstract type (the category) + the forest id @@ -204,11 +215,13 @@ mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq fi computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,CId,[Expr],LinTable)] -> [BracketedTokn] computeSeq filter seq args = concatMap compute seq where - compute (SymCat d r) = getArg d r - compute (SymLit d r) = getArg d r - compute (SymVar d r) = getVar d r - compute (SymKS ts) = [LeafKS ts] - compute (SymKP ts alts) = [LeafKP ts alts] + compute (SymCat d r) = getArg d r + compute (SymLit d r) = getArg d r + compute (SymVar d r) = getVar d r + compute (SymKS t) = [LeafKS t] + compute SymNE = [LeafNE] + compute SymBIND = [LeafKS "&+"] + compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]] getArg d r | not (null arg_lin) && @@ -218,7 +231,7 @@ computeSeq filter seq args = concatMap compute seq arg_lin = lin ! r (ct@(cat,fid),fun,es,(xs,lin)) = args !! d - getVar d r = [LeafKS [showCId (xs !! r)]] + getVar d r = [LeafKS (showCId (xs !! r))] where (ct,fun,es,(xs,lin)) = args !! d diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index 2f8fdecc2..894b64dfb 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -36,8 +36,8 @@ collectWords pinfo = Map.fromListWith (++) , sym <- elems (sequences pinfo ! seqid) , t <- sym2tokns sym] where - sym2tokns (SymKS ts) = ts - sym2tokns (SymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts] + sym2tokns (SymKS t) = [t] + sym2tokns (SymKP ts alts) = concat (map sym2tokns ts ++ [sym2tokns sym | (syms,ps) <- alts, sym <- syms]) sym2tokns _ = [] lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index a339c9add..f04a8b04c 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -221,9 +221,13 @@ splitLexicalRules cnc p_prods = wf ts = (ts,IntSet.singleton funid) - seq2prefix [] = TrieMap.fromList [wf []] - seq2prefix (SymKS ts :syms) = TrieMap.fromList [wf ts] - seq2prefix (SymKP ts alts:syms) = TrieMap.fromList (wf ts : [wf ts | Alt ts ps <- alts]) + seq2prefix [] = TrieMap.fromList [wf []] + seq2prefix (SymKS t :syms) = TrieMap.fromList [wf [t]] + seq2prefix (SymKP syms0 alts:syms) = TrieMap.unionsWith IntSet.union + (seq2prefix (syms0++syms) : + [seq2prefix (syms1 ++ syms) | (syms1,ps) <- alts]) + seq2prefix (SymNE :syms) = TrieMap.empty + seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]] updateConcrete abs cnc = let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc) diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index 7057db3bc..e50f8436e 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -244,14 +244,12 @@ getParseOutput (PState abs cnc chart cnt) ty@(DTyp _ start _) dp = flit _ = Nothing
ftok toks = TrieMap.unionWith Set.union (TrieMap.compose Nothing toks)
- cutAt ppos toks seqid =
+ cutAt ppos toks seqid =
let seq = unsafeAt (sequences cnc) seqid
init = take (ppos-1) (elems seq)
tail = case unsafeAt seq (ppos-1) of
- SymKS ts -> let ts' = reverse (drop (length toks) (reverse ts))
- in if null ts' then [] else [SymKS ts']
- SymKP ts _ -> let ts' = reverse (drop (length toks) (reverse ts))
- in if null ts' then [] else [SymKS ts']
+ SymKS t -> drop (length toks) [SymKS t]
+ SymKP ts _ -> reverse (drop (length toks) (reverse ts))
sym -> []
in init ++ tail
@@ -307,11 +305,18 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha Nothing -> process flit ftok cnc items4 acc' chart{active=insertAC key (Set.singleton item,new_sc) (active chart)}
Just (set,sc) | Set.member item set -> process flit ftok cnc items acc chart
| otherwise -> process flit ftok cnc items2 acc chart{active=insertAC key (Set.insert item set,IntMap.unionWith Set.union new_sc sc) (active chart)}
- SymKS toks -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid args key0) acc
+ SymKS tok -> let !acc' = ftok_ [tok] (Active j (ppos+1) funid seqid args key0) acc
in process flit ftok cnc items acc' chart
- SymKP strs vars
- -> let !acc' = foldl (\acc toks -> ftok_ toks (Active j (ppos+1) funid seqid args key0) acc) acc
- (strs:[strs' | Alt strs' _ <- vars])
+ SymNE -> process flit ftok cnc items acc chart
+ SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc
+ in process flit ftok cnc items acc' chart
+ SymKP syms vars
+ -> let to_tok (SymKS t) = [t]
+ to_tok SymBIND = ["&+"]
+ to_tok _ = []
+
+ !acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc
+ (syms:[syms' | (syms',_) <- vars])
in process flit ftok cnc items acc' chart
SymLit d r -> let PArg hypos fid = args !! d
key = AK fid r
diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index c0529b116..9385e81c4 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -89,10 +89,12 @@ ppPrintName (id,name) = ppSymbol (SymCat d r) = char '<' <> int d <> comma <> int r <> char '>' ppSymbol (SymLit d r) = char '{' <> int d <> comma <> int r <> char '}' ppSymbol (SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char '>' -ppSymbol (SymKS ts) = ppStrs ts -ppSymbol (SymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts))) +ppSymbol (SymKS t) = doubleQuotes (text t) +ppSymbol SymNE = text "nonExist" +ppSymbol SymBIND = text "BIND" +ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts))) -ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps) +ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps) ppStrs ss = doubleQuotes (hsep (map text ss)) diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 27655166b..b5ff53af0 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -1518,17 +1518,13 @@ typedef struct { } PgfBracketLznState; static void -pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - PgfToken tok = gu_seq_get(toks, PgfToken, i); - PyObject* str = gu2py_string(tok); - PyList_Append(state->list, str); - Py_DECREF(str); - } + PyObject* str = gu2py_string(tok); + PyList_Append(state->list, str); + Py_DECREF(str); } static void @@ -1600,7 +1596,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, } static PgfLinFuncs pgf_bracket_lin_funcs = { - .symbol_tokens = pgf_bracket_lzn_symbol_tokens, + .symbol_token = pgf_bracket_lzn_symbol_token, .expr_literal = pgf_bracket_lzn_expr_literal, .begin_phrase = pgf_bracket_lzn_begin_phrase, .end_phrase = pgf_bracket_lzn_end_phrase |
