From b1402e8bd6a68a891b00a214d6cf184d66defe19 Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 22 Sep 2003 13:16:55 +0000 Subject: Founding the newly structured GF2.0 cvs archive. --- src/GF/UseGrammar/Custom.hs | 256 ++++++++++++++++++++++++++ src/GF/UseGrammar/Editing.hs | 358 ++++++++++++++++++++++++++++++++++++ src/GF/UseGrammar/GetTree.hs | 46 +++++ src/GF/UseGrammar/Information.hs | 130 +++++++++++++ src/GF/UseGrammar/Linear.hs | 195 ++++++++++++++++++++ src/GF/UseGrammar/MoreCustom.hs | 15 ++ src/GF/UseGrammar/Morphology.hs | 116 ++++++++++++ src/GF/UseGrammar/Paraphrases.hs | 53 ++++++ src/GF/UseGrammar/Parsing.hs | 98 ++++++++++ src/GF/UseGrammar/Randomized.hs | 47 +++++ src/GF/UseGrammar/RealMoreCustom.hs | 122 ++++++++++++ src/GF/UseGrammar/Session.hs | 110 +++++++++++ src/GF/UseGrammar/TeachYourself.hs | 69 +++++++ src/GF/UseGrammar/Tokenize.hs | 130 +++++++++++++ 14 files changed, 1745 insertions(+) create mode 100644 src/GF/UseGrammar/Custom.hs create mode 100644 src/GF/UseGrammar/Editing.hs create mode 100644 src/GF/UseGrammar/GetTree.hs create mode 100644 src/GF/UseGrammar/Information.hs create mode 100644 src/GF/UseGrammar/Linear.hs create mode 100644 src/GF/UseGrammar/MoreCustom.hs create mode 100644 src/GF/UseGrammar/Morphology.hs create mode 100644 src/GF/UseGrammar/Paraphrases.hs create mode 100644 src/GF/UseGrammar/Parsing.hs create mode 100644 src/GF/UseGrammar/Randomized.hs create mode 100644 src/GF/UseGrammar/RealMoreCustom.hs create mode 100644 src/GF/UseGrammar/Session.hs create mode 100644 src/GF/UseGrammar/TeachYourself.hs create mode 100644 src/GF/UseGrammar/Tokenize.hs (limited to 'src/GF/UseGrammar') diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs new file mode 100644 index 000000000..bf84d776b --- /dev/null +++ b/src/GF/UseGrammar/Custom.hs @@ -0,0 +1,256 @@ +module Custom where + +import Operations +import Text +import Tokenize +import qualified Grammar as G +import qualified AbsGFC as A +import qualified GFC as C +import qualified AbsGF as GF +import qualified MMacros as MM +import AbsCompute +import TypeCheck +------import Compile +import ShellState +import Editing +import Paraphrases +import Option +import CF +import CFIdent + +---- import CFtoGrammar +import PPrCF +import PrGrammar + +----import Morphology +-----import GrammarToHaskell +-----import GrammarToCanon (showCanon, showCanonOpt) +-----import qualified GrammarToGFC as GFC + +-- the cf parsing algorithms +import ChartParser -- or some other CF Parser + +import MoreCustom -- either small/ or big/. The one in Small is empty. + +import UseIO + +-- minimal version also used in Hugs. AR 2/12/2002. + +-- databases for customizable commands. AR 21/11/2001 +-- for: grammar parsers, grammar printers, term commands, string commands +-- idea: items added here are usable throughout GF; nothing else need be edited +-- they are often usable through the API: hence API cannot be imported here! + +-- Major redesign 3/4/2002: the first entry in each database is DEFAULT. +-- If no other value is given, the default is selected. +-- Because of this, two invariants have to be preserved: +-- ** no databases may be empty +-- ** additions are made to the end of the database + +-- these are the databases; the comment gives the name of the flag + +-- grammarFormat, "-format=x" or file suffix +customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar) + +-- grammarPrinter, "-printer=x" +customGrammarPrinter :: CustomData (StateGrammar -> String) + +-- syntaxPrinter, "-printer=x" +customSyntaxPrinter :: CustomData (GF.Grammar -> String) + +-- termPrinter, "-printer=x" +customTermPrinter :: CustomData (StateGrammar -> A.Exp -> String) + +-- termCommand, "-transform=x" +customTermCommand :: CustomData (StateGrammar -> A.Exp -> [A.Exp]) + +-- editCommand, "-edit=x" +customEditCommand :: CustomData (StateGrammar -> Action) + +-- filterString, "-filter=x" +customStringCommand :: CustomData (StateGrammar -> String -> String) + +-- useParser, "-parser=x" +customParser :: CustomData (StateGrammar -> CFCat -> CFParser) + +-- useTokenizer, "-lexer=x" +customTokenizer :: CustomData (StateGrammar -> String -> [CFTok]) + +-- useUntokenizer, "-unlexer=x" --- should be from token list to string +customUntokenizer :: CustomData (StateGrammar -> String -> String) + + +-- this is the way of selecting an item +customOrDefault :: Options -> OptFun -> CustomData a -> a +customOrDefault opts optfun db = maybe (defaultCustomVal db) id $ + customAsOptVal opts optfun db + +-- to produce menus of custom operations +customInfo :: CustomData a -> (String, [String]) +customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c)) + +------------------------------- + +type CommandId = String + +strCI :: String -> CommandId +strCI = id + +ciStr :: CommandId -> String +ciStr = id + +ciOpt :: CommandId -> Option +ciOpt = iOpt + +newtype CustomData a = CustomData (String, [(CommandId,a)]) +customData title db = CustomData (title,db) +dbCustomData (CustomData (_,db)) = db +titleCustomData (CustomData (t,_)) = t + +lookupCustom :: CustomData a -> CommandId -> Maybe a +lookupCustom = flip lookup . dbCustomData + +customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a +customAsOptVal opts optfun db = do + arg <- getOptVal opts optfun + lookupCustom db (strCI arg) + +-- take the first entry from the database +defaultCustomVal :: CustomData a -> a +defaultCustomVal (CustomData (s,db)) = + ifNull (error ("empty database:" +++ s)) (snd . head) db + +------------------------------------------------------------------------- +-- and here's the customizable part: + +-- grammar parsers: the ID is also used as file name suffix +customGrammarParser = + customData "Grammar parsers, selected by file name suffix" $ + [ +------ (strCI "gf", compileModule noOptions) -- DEFAULT +-- add your own grammar parsers here + ] + ++ moreCustomGrammarParser + + +customGrammarPrinter = + customData "Grammar printers, selected by option -printer=x" $ + [ +---- (strCI "gf", prt) -- DEFAULT + (strCI "cf", prCF . stateCF) + +{- ---- + (strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT + ,(strCI "canon", showCanon "Lang" . stateGrammarST) + ,(strCI "gfc", GFC.showGFC . stateGrammarST) + ,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST) + ,(strCI "morpho", prMorpho . stateMorpho) + ,(strCI "opts", prOpts . stateOptions) +-} +-- add your own grammar printers here +--- also include printing via grammar2syntax! + ] + ++ moreCustomGrammarPrinter + +customSyntaxPrinter = + customData "Syntax printers, selected by option -printer=x" $ + [ +-- add your own grammar printers here + ] + ++ moreCustomSyntaxPrinter + + +customTermPrinter = + customData "Term printers, selected by option -printer=x" $ + [ + (strCI "gf", const prt) -- DEFAULT +-- add your own term printers here + ] + ++ moreCustomTermPrinter + +customTermCommand = + customData "Term transformers, selected by option -transform=x" $ + [ + (strCI "identity", \_ t -> [t]) -- DEFAULT +{- ---- + ,(strCI "compute", \g t -> err (const [t]) return (computeAbsTerm g t)) + ,(strCI "paraphrase", \g t -> mkParaphrases g t) + ,(strCI "typecheck", \g t -> err (const []) return (checkIfValidExp g t)) + ,(strCI "solve", \g t -> editAsTermCommand g + (uniqueRefinements g) t) + ,(strCI "context", \g t -> editAsTermCommand g + (contextRefinements g) t) +-} +--- ,(strCI "delete", \g t -> [MM.mExp0]) +-- add your own term commands here + ] + ++ moreCustomTermCommand + +customEditCommand = + customData "Editor state transformers, selected by option -edit=x" $ + [ + (strCI "identity", const return) -- DEFAULT + ,(strCI "transfer", const return) --- done ad hoc on top level +{- ---- + ,(strCI "typecheck", reCheckState) + ,(strCI "solve", solveAll) + ,(strCI "context", contextRefinements) + ,(strCI "compute", computeSubTree) +-} + ,(strCI "paraphrase", const return) --- done ad hoc on top level +-- add your own edit commands here + ] + ++ moreCustomEditCommand + +customStringCommand = + customData "String filters, selected by option -filter=x" $ + [ + (strCI "identity", const $ id) -- DEFAULT + ,(strCI "erase", const $ const "") + ,(strCI "take100", const $ take 100) + ,(strCI "text", const $ formatAsText) + ,(strCI "code", const $ formatAsCode) +---- ,(strCI "latexfile", const $ mkLatexFile) + ,(strCI "length", const $ show . length) +-- add your own string commands here + ] + ++ moreCustomStringCommand + +customParser = + customData "Parsers, selected by option -parser=x" $ + [ + (strCI "chart", chartParser . stateCF) +-- add your own parsers here + ] + ++ moreCustomParser + +customTokenizer = + customData "Tokenizers, selected by option -lexer=x" $ + [ + (strCI "words", const $ tokWords) + ,(strCI "literals", const $ tokLits) + ,(strCI "vars", const $ tokVars) + ,(strCI "chars", const $ map (tS . singleton)) + ,(strCI "code", const $ lexHaskell) + ,(strCI "text", const $ lexText) +---- ,(strCI "codelit", lexHaskellLiteral . stateIsWord) +---- ,(strCI "textlit", lexTextLiteral . stateIsWord) + ,(strCI "codeC", const $ lexC2M) + ,(strCI "codeCHigh", const $ lexC2M' True) +-- add your own tokenizers here + ] + ++ moreCustomTokenizer + +customUntokenizer = + customData "Untokenizers, selected by option -unlexer=x" $ + [ + (strCI "unwords", const $ id) -- DEFAULT + ,(strCI "text", const $ formatAsText) + ,(strCI "code", const $ formatAsCode) + ,(strCI "textlit", const $ formatAsTextLit) + ,(strCI "codelit", const $ formatAsCodeLit) + ,(strCI "concat", const $ concat . words) + ,(strCI "bind", const $ performBinds) +-- add your own untokenizers here + ] + ++ moreCustomUntokenizer diff --git a/src/GF/UseGrammar/Editing.hs b/src/GF/UseGrammar/Editing.hs new file mode 100644 index 000000000..616ddc7cc --- /dev/null +++ b/src/GF/UseGrammar/Editing.hs @@ -0,0 +1,358 @@ +module Editing where + +import Abstract +import qualified GFC +import TypeCheck +import LookAbs +import AbsCompute + +import Operations +import Zipper + +-- generic tree editing, with some grammar notions assumed. AR 18/8/2001 +-- 19/6/2003 for GFC + +type CGrammar = GFC.CanonGrammar + +type State = Loc TrNode + +-- the "empty" state +initState :: State +initState = tree2loc uTree + +isRootState :: State -> Bool +isRootState s = case actPath s of + Top -> True + _ -> False + +actTree :: State -> Tree +actTree (Loc (t,_)) = t + +actPath :: State -> Path TrNode +actPath (Loc (_,p)) = p + +actVal :: State -> Val +actVal = valNode . nodeTree . actTree + +actCat :: State -> Cat +actCat = errVal undefined . val2cat . actVal ---- undef + +actAtom :: State -> Atom +actAtom = atomTree . actTree + +actExp = tree2exp . actTree + +-- current local bindings +actBinds :: State -> Binds +actBinds = bindsNode . nodeTree . actTree + +-- constraints in current subtree +actConstrs :: State -> Constraints +actConstrs = allConstrsTree . actTree + +-- constraints in the whole tree +allConstrs :: State -> Constraints +allConstrs = allConstrsTree . loc2tree + +-- metas in current subtree +actMetas :: State -> [Meta] +actMetas = metasTree . actTree + +-- metas in the whole tree +allMetas :: State -> [Meta] +allMetas = metasTree . loc2tree + +actTreeBody :: State -> Tree +actTreeBody = bodyTree . actTree + +allPrevBinds :: State -> Binds +allPrevBinds = concatMap bindsNode . traverseCollect . actPath + +allBinds :: State -> Binds +allBinds s = actBinds s ++ allPrevBinds s + +actGen :: State -> Int +actGen = length . allBinds -- symbol generator for VGen + +allPrevVars :: State -> [Var] +allPrevVars = map fst . allPrevBinds + +allVars :: State -> [Var] +allVars = map fst . allBinds + +vGenIndex = length . allBinds + +actIsMeta = atomIsMeta . actAtom + +actMeta :: State -> Err Meta +actMeta = getMetaAtom . actAtom + +-- meta substs are not only on the actual path... +entireMetaSubst :: State -> MetaSubst +entireMetaSubst = concatMap metaSubstsNode . scanTree . loc2tree + +isCompleteTree = null . filter atomIsMeta . map atomNode . scanTree +isCompleteState = isCompleteTree . loc2tree + +initStateCat :: Context -> Cat -> Err State +initStateCat cont cat = do + return $ tree2loc (Tr (mkNode [] mAtom (cat2val cont cat) ([],[]), [])) + +-- this function only concerns the body of an expression... +annotateInState :: CGrammar -> Exp -> State -> Err Tree +annotateInState gr exp state = do + let binds = allBinds state + val = actVal state + annotateIn gr binds exp (Just val) + +-- ...whereas this one works with lambda abstractions +annotateExpInState :: CGrammar -> Exp -> State -> Err Tree +annotateExpInState gr exp state = do + let cont = allPrevBinds state + binds = actBinds state + val = actVal state + typ <- mkProdVal binds val + annotateIn gr binds exp (Just typ) + +treeByExp :: (Exp -> Err Exp) -> CGrammar -> Exp -> State -> Err Tree +treeByExp trans gr exp0 state = do + exp <- trans exp0 + annotateExpInState gr exp state + +-- actions + +type Action = State -> Err State + +newCat :: CGrammar -> Cat -> Action +newCat gr cat@(m,c) _ = do + cont <- lookupCatContext gr m c + testErr (null cont) "start cat must have null context" -- for easier meta refresh + initStateCat cont cat + +newTree :: Tree -> Action +newTree t _ = return $ tree2loc t + +newExpTC :: CGrammar -> Exp -> Action +newExpTC gr t s = annotate gr (refreshMetas [] t) >>= flip newTree s + +goNextMeta, goPrevMeta, goNextNewMeta, goPrevNewMeta, goNextMetaIfCan :: Action + +goNextMeta = repeatUntilErr actIsMeta goAhead -- can be the location itself +goPrevMeta = repeatUntilErr actIsMeta goBack + +goNextNewMeta s = goAhead s >>= goNextMeta -- always goes away from location +goPrevNewMeta s = goBack s >>= goPrevMeta + +goNextMetaIfCan = actionIfPossible goNextMeta + +actionIfPossible a s = return $ errVal s (a s) + +goFirstMeta, goLastMeta :: Action +goFirstMeta s = goNextMeta $ goRoot s +goLastMeta s = goLast s >>= goPrevMeta + +noMoreMetas :: State -> Bool +noMoreMetas = err (const True) (const False) . goNextMeta + +replaceSubTree :: Tree -> Action +replaceSubTree tree state = changeLoc state tree + +refineWithTree :: Bool -> CGrammar -> Tree -> Action +refineWithTree der gr tree state = do + m <- errIn "move pointer to meta" $ actMeta state + state' <- replaceSubTree tree state + let cs0 = allConstrs state' + (cs,ms) = splitConstraints cs0 + v = vClos $ tree2exp (bodyTree tree) + msubst = (m,v) : ms + metaSubstRefinements gr msubst $ mapLoc (performMetaSubstNode msubst) state' + + -- without dep. types, no constraints, no grammar needed - simply: do + -- testErr (actIsMeta state) "move pointer to meta" + -- replaceSubTree tree state + +refineAllNodes :: Action -> Action +refineAllNodes act state = do + let estate0 = goFirstMeta state + case estate0 of + Bad _ -> return state + Ok state0 -> do + (state',n) <- tryRefine 0 state0 + if n==0 + then return state + else actionIfPossible goFirstMeta state' + where + tryRefine n state = err (const $ return (state,n)) return $ do + state' <- goNextMeta state + meta <- actMeta state' + case act state' of + Ok state2 -> tryRefine (n+1) state2 + _ -> err (const $ return (state',n)) return $ do + state2 <- goNextNewMeta state' + tryRefine n state2 + +uniqueRefinements :: CGrammar -> Action +uniqueRefinements = refineAllNodes . uniqueRefine + +metaSubstRefinements :: CGrammar -> MetaSubst -> Action +metaSubstRefinements gr = refineAllNodes . metaSubstRefine gr + +contextRefinements :: CGrammar -> Action +contextRefinements gr = refineAllNodes contextRefine where + contextRefine state = case varRefinementsState state of + [(e,_)] -> refineWithAtom False gr e state + _ -> Bad "no unique refinement in context" + varRefinementsState state = + [r | r@(e,_) <- refinementsState gr state, isVariable e] + +uniqueRefine :: CGrammar -> Action +uniqueRefine gr state = case refinementsState gr state of + [(e,_)] -> refineWithAtom False gr e state + _ -> Bad "no unique refinement" + +metaSubstRefine :: CGrammar -> MetaSubst -> Action +metaSubstRefine gr msubst state = do + m <- errIn "move pointer to meta" $ actMeta state + case lookup m msubst of + Just v -> do + e <- val2expSafe v + refineWithExpTC False gr e state + _ -> Bad "no metavariable substitution available" + +refineWithExpTC :: Bool -> CGrammar -> Exp -> Action +refineWithExpTC der gr exp0 state = do + let oldmetas = allMetas state + exp = refreshMetas oldmetas exp0 + tree0 <- annotateInState gr exp state + let tree = addBinds (actBinds state) $ tree0 + refineWithTree der gr tree state + +refineWithAtom :: Bool -> CGrammar -> Ref -> Action -- function or variable +refineWithAtom der gr at state = do + val <- lookupRef gr (allBinds state) at + typ <- val2exp val + let oldvars = allVars state + exp <- ref2exp oldvars typ at + refineWithExpTC der gr exp state + +-- in this command, we know that the result is well-typed, since computation +-- rules have been type checked and the result is equal + +computeSubTree :: CGrammar -> Action +computeSubTree gr state = do + let exp = tree2exp (actTree state) + tree <- treeByExp (compute gr) gr exp state + replaceSubTree tree state + +-- but here we don't, since the transfer flag isn't type checked, +-- and computing the transfer function is not checked to preserve equality + +transferSubTree :: Maybe Fun -> CGrammar -> Action +transferSubTree Nothing _ s = return s +transferSubTree (Just fun) gr state = do + let exp = mkApp (qq fun) [tree2exp $ actTree state] + tree <- treeByExp (compute gr) gr exp state + state' <- replaceSubTree tree state + reCheckState gr state' + +deleteSubTree :: CGrammar -> Action +deleteSubTree gr state = + if isRootState state + then do + let cat = actCat state + newCat gr cat state + else do + let metas = allMetas state + binds = actBinds state + exp = refreshMetas metas mExp0 + tree <- annotateInState gr exp state + state' <- replaceSubTree (addBinds binds tree) state + reCheckState gr state' --- must be unfortunately done. 20/11/2001 + +wrapWithFun :: CGrammar -> (Fun,Int) -> Action +wrapWithFun gr (f@(m,c),i) state = do + typ <- lookupFunType gr m c + let olds = allPrevVars state + oldmetas = allMetas state + exp0 <- fun2wrap olds ((f,i),typ) (tree2exp (actTreeBody state)) + let exp = refreshMetas oldmetas exp0 + tree0 <- annotateInState gr exp state + let tree = addBinds (actBinds state) $ tree0 + state' <- replaceSubTree tree state + reCheckState gr state' --- must be unfortunately done. 20/11/2001 + +alphaConvert :: CGrammar -> (Var,Var) -> Action +alphaConvert gr (x,x') state = do + let oldvars = allPrevVars state + testErr (notElem x' oldvars) ("clash with previous bindings" +++ show x') + let binds0 = actBinds state + vars0 = map fst binds0 + testErr (notElem x' vars0) ("clash with other bindings" +++ show x') + let binds = [(if z==x then x' else z, t) | (z,t) <- binds0] + vars = map fst binds + exp' <- alphaConv (vars ++ oldvars) (x,x') (tree2exp (actTreeBody state)) + let exp = mkAbs vars exp' + tree <- annotateExpInState gr exp state + replaceSubTree tree state + +changeFunHead :: CGrammar -> Fun -> Action +changeFunHead gr f state = do + let state' = changeNode (changeAtom (const (atomC f))) state + reCheckState gr state' --- must be done because of constraints elsewhere + +peelFunHead :: CGrammar -> Action +peelFunHead gr state = do + state' <- forgetNode state + reCheckState gr state' --- must be done because of constraints elsewhere + +-- an expensive operation +reCheckState :: CGrammar -> State -> Err State +reCheckState gr st = annotate gr (tree2exp (loc2tree st)) >>= return . tree2loc + +-- extract metasubstitutions from constraints and solve them +solveAll :: CGrammar -> State -> Err State +solveAll gr st0 = do + st <- reCheckState gr st0 + let cs0 = allConstrs st + (cs,ms) = splitConstraints cs0 + metaSubstRefinements gr ms $ mapLoc (performMetaSubstNode ms) st + + +-- active refinements + +refinementsState :: CGrammar -> State -> [(Term,Val)] +refinementsState gr state = + let filt = possibleRefVal gr state in + if actIsMeta state + then refsForType filt gr (allBinds state) (actVal state) + else [] + +wrappingsState :: CGrammar -> State -> [((Fun,Int),Type)] +wrappingsState gr state + | actIsMeta state = [] + | isRootState state = funs + | otherwise = [rule | rule@(_,typ) <- funs, possibleRefVal gr state aval typ] + where + funs = funsOnType (possibleRefVal gr state) gr aval + aval = actVal state + +headChangesState :: CGrammar -> State -> [Fun] +headChangesState gr state = errVal [] $ do + f@(m,c) <- funAtom (actAtom state) + typ0 <- lookupFunType gr m c + return [fun | (fun,typ) <- funRulesOf gr, fun /= f, typ == typ0] + --- alpha-conv ! + +canPeelState :: CGrammar -> State -> Bool +canPeelState gr state = errVal False $ do + f@(m,c) <- funAtom (actAtom state) + typ <- lookupFunType gr m c + return $ isInOneType typ + +possibleRefVal :: CGrammar -> State -> Val -> Type -> Bool +possibleRefVal gr state val typ = errVal True $ do --- was False + vtyp <- valType typ + let gen = actGen state + cs <- return [(val, vClos vtyp)] --- eqVal gen val (vClos vtyp) --- only poss cs + return $ possibleConstraints gr cs --- a simple heuristic + diff --git a/src/GF/UseGrammar/GetTree.hs b/src/GF/UseGrammar/GetTree.hs new file mode 100644 index 000000000..9b545c7dd --- /dev/null +++ b/src/GF/UseGrammar/GetTree.hs @@ -0,0 +1,46 @@ +module GetTree where + +import GFC +import Values +import qualified Grammar as G +import Ident +import MMacros +import Macros +import Rename +import TypeCheck +import PGrammar +import ShellState + +import Operations + +-- how to form linearizable trees from strings and from terms of different levels +-- +-- String --> raw Term --> annot, qualif Term --> Tree + +string2tree :: StateGrammar -> String -> Tree +string2tree gr = errVal uTree . string2treeErr gr + +string2treeErr :: StateGrammar -> String -> Err Tree +string2treeErr gr s = do + t <- pTerm s + let t1 = refreshMetas [] t + let t2 = qualifTerm abstr t1 + annotate grc t2 + where + abstr = absId gr + grc = grammar gr + +string2Cat, string2Fun :: StateGrammar -> String -> (Ident,Ident) +string2Cat gr c = (absId gr,identC c) +string2Fun = string2Cat + +strings2Cat, strings2Fun :: String -> (Ident,Ident) +strings2Cat s = (identC m, identC (drop 1 c)) where (m,c) = span (/= '.') s +strings2Fun = strings2Cat + +string2ref :: StateGrammar -> String -> Err G.Term +string2ref _ ('x':'_':ds) = return $ freshAsTerm ds --- hack for generated vars +string2ref gr s = + if elem '.' s + then return $ uncurry G.Q $ strings2Fun s + else return $ G.Vr $ identC s diff --git a/src/GF/UseGrammar/Information.hs b/src/GF/UseGrammar/Information.hs new file mode 100644 index 000000000..569d8ace6 --- /dev/null +++ b/src/GF/UseGrammar/Information.hs @@ -0,0 +1,130 @@ +module Information where + +import Grammar +import Ident +import Modules +import Option +import CF +import PPrCF +import ShellState +import PrGrammar +import Lookup +import qualified GFC +import qualified AbsGFC + +import Operations +import UseIO + +-- information on module, category, function, operation, parameter,... AR 16/9/2003 +-- uses source grammar + +-- the top level function + +showInformation :: Options -> ShellState -> Ident -> IOE () +showInformation opts st c = do + is <- ioeErr $ getInformation opts st c + mapM_ (putStrLnE . prInformation opts c) is + +-- the data type of different kinds of information + +data Information = + IModAbs SourceAbs + | IModRes SourceRes + | IModCnc SourceCnc + | IModule SourceAbs ---- to be deprecated + | ICatAbs Ident Context [Ident] + | ICatCnc Ident Type [CFRule] Term + | IFunAbs Ident Type (Maybe Term) + | IFunCnc Ident Type [CFRule] Term + | IOper Ident Type Term + | IParam Ident [Param] [Term] + | IValue Ident Type + +type CatId = AbsGFC.CIdent +type FunId = AbsGFC.CIdent + +prInformation :: Options -> Ident -> Information -> String +prInformation opts c i = unlines $ prt c : case i of + IModule m -> [ + "module of type" +++ show (mtype m), + "extends" +++ show (extends m), + "opens" +++ show (opens m), + "defines" +++ unwords (map prt (ownConstants (jments m))) + ] + ICatAbs m co _ -> [ + "category in abstract module" +++ prt m, + "context" +++ prContext co + ] + ICatCnc m ty cfs tr -> [ + "category in concrete module" +++ prt m, + "linearization type" +++ prt ty + ] + IFunAbs m ty _ -> [ + "function in abstract module" +++ prt m, + "type" +++ prt ty + ] + IFunCnc m ty cfs tr -> [ + "function in concrete module" +++ prt m, + "linearization" +++ prt tr + --- "linearization type" +++ prt ty + ] + IOper m ty tr -> [ + "operation in resource module" +++ prt m, + "type" +++ prt ty, + "definition" +++ prt tr + ] + IParam m ty ts -> [ + "parameter type in resource module" +++ prt m, + "constructors" +++ unwords (map prParam ty), + "values" +++ unwords (map prt ts) + ] + IValue m ty -> [ + "parameter constructor in resource module" +++ prt m, + "type" +++ show ty + ] + +-- also finds out if an identifier is defined in many places + +getInformation :: Options -> ShellState -> Ident -> Err [Information] +getInformation opts st c = allChecks $ [ + do + m <- lookupModule src c + case m of + ModMod mo -> return $ IModule mo + _ -> prtBad "not a source module" c + ] ++ map lookInSrc ss ++ map lookInCan cs + where + lookInSrc (i,m) = do + j <- lookupInfo m c + case j of + AbsCat (Yes co) _ -> return $ ICatAbs i co [] --- + AbsFun (Yes ty) _ -> return $ IFunAbs i ty Nothing --- + CncCat (Yes ty) _ _ -> do + ---- let cat = ident2CFCat i c + ---- rs <- concat [rs | (c,rs) <- cf, ] + return $ ICatCnc i ty [] ty --- + CncFun _ (Yes tr) _ -> do + rs <- return [] + return $ IFunCnc i tr rs tr --- + ResOper (Yes ty) (Yes tr) -> return $ IOper i ty tr + ResParam (Yes ps) -> do + ts <- allParamValues src (QC i c) + return $ IParam i ps ts + ResValue (Yes ty) -> return $ IValue i ty --- + + _ -> prtBad "nothing available for" i + lookInCan (i,m) = do + Bad "nothing available yet in canonical" + + src = srcModules st + can = canModules st + ss = [(i,m) | (i,ModMod m) <- modules src] + cs = [(i,m) | (i,ModMod m) <- modules can] + cf = concatMap ruleGroupsOfCF $ map snd $ cfs st + +ownConstants :: BinTree (Ident, Info) -> [Ident] +ownConstants = map fst . filter isOwn . tree2list where + isOwn (c,i) = case i of + AnyInd _ _ -> False + _ -> True + diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs new file mode 100644 index 000000000..da1bfce52 --- /dev/null +++ b/src/GF/UseGrammar/Linear.hs @@ -0,0 +1,195 @@ +module Linear where + +import GFC +import AbsGFC +import qualified Abstract as A +import MkGFC (rtQIdent) ---- +import Ident +import PrGrammar +import CMacros +import Look +import Str +import Unlex +----import TypeCheck -- to annotate + +import Operations +import Zipper + +import Monad + +-- Linearization for canonical GF. AR 7/6/2003 + +-- The worker function: linearize a Tree, return +-- a record. Possibly mark subtrees. + +-- NB. Constants in trees are annotated by the name of the abstract module. +-- A concrete module name must be given to find (and choose) linearization rules. + +linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term +linearizeToRecord gr mk m = lin [] where + + lin ts t = errIn ("lint" +++ prt t) $ ---- + if A.isFocusNode (A.nodeTree t) + then liftM markFocus $ lint ts t + else lint ts t + + lint ts t@(Tr (n,xs)) = do + + let binds = A.bindsNode n + at = A.atomNode n + c <- A.val2cat $ A.valNode n + xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs + + r <- case at of + A.AtC f -> look f >>= comp xs' + A.AtL s -> return $ recS $ tK $ prt at + A.AtI i -> return $ recS $ tK $ prt at + A.AtV x -> lookCat c >>= comp [tK (prt at)] + A.AtM m -> lookCat c >>= comp [tK (prt at)] + + return $ mk ts $ mkBinds binds r + + look = lookupLin gr . redirectIdent m . rtQIdent + comp = ccompute gr + mkBinds bs bdy = case bdy of + R fs -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs + + recS t = R [Ass (L (identC "s")) t] ---- + + lookCat = return . errVal defLindef . look + ---- should always be given in the module + +type Marker = [Int] -> Term -> Term + +-- if no marking is wanted, use the following + +noMark :: [Int] -> Term -> Term +noMark = const id + +-- thus the special case: + +linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term +linearizeNoMark gr = linearizeToRecord gr noMark + +-- expand tables in linearized term to full, normal-order tables +-- NB expand from inside-out so that values are not looked up in copies of branches + +expandLinTables :: CanonGrammar -> Term -> Err Term +expandLinTables gr t = case t of + R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs] + T ty rs -> do + rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out + let t' = T ty $ map (uncurry Cas) rs' + vs <- alls ty + ps <- mapM term2patt vs + ts' <- mapM (comp . S t') $ vs + return $ T ty [Cas [p] t | (p,t) <- zip ps ts'] + FV ts -> liftM FV $ mapM exp ts + _ -> return t + where + alls = allParamValues gr + exp = expandLinTables gr + comp = ccompute gr [] + +-- from records, one can get to records of tables of strings + +rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]] +rec2strTables r = do + vs <- allLinValues r + mapM (mapPairsM (mapPairsM strsFromTerm)) vs + +-- from these tables, one may want to extract the ones for the "s" label + +strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]] +strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0] + +linLab0 :: Label +linLab0 = L (identC "s") + +-- to get lists of token lists is easy +sTables2strs :: [[([Patt],[Str])]] -> [[Str]] +sTables2strs = map snd . concat + +-- from this, to get a list of strings --- customize unlexer +strs2strings :: [[Str]] -> [String] +strs2strings = map unlex + +-- finally, a top-level function to get a string from an expression +linTree2string :: CanonGrammar -> Ident -> A.Tree -> String +linTree2string gr m e = err id id $ do + t <- linearizeNoMark gr m e + r <- expandLinTables gr t + ts <- rec2strTables r + let ss = strs2strings $ sTables2strs $ strTables2sTables ts + ifNull (prtBad "empty linearization of" e) (return . head) ss + + +-- argument is a Tree, value is a list of strs; needed in Parsing + +allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str] +allLinsOfTree gr a e = err (singleton . str) id $ do + e' <- return e ---- annotateExp gr e + r <- linearizeNoMark gr a e' + r' <- expandLinTables gr r + ts <- rec2strTables r' + return $ concat $ sTables2strs $ strTables2sTables ts + +{- +-- the value is a list of strs +allLinStrings :: CanonGrammar -> Tree -> [Str] +allLinStrings gr ft = case allLinsAsStrs gr ft of + Ok ts -> map snd $ concat $ map snd $ concat ts + Bad s -> [str s] + +-- the value is a list of strs, not forgetting their arguments +allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]] +allLinsAsStrs gr ft = do + lpts <- allLinearizations gr ft + return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts + +-- the value is a list of terms of type Str, not forgetting their arguments +allLinearizations :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Term)])]] +allLinearizations gr ft = linearizeTree gr ft >>= allLinValues + +-- to a list of strings +linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String] +linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk + +-- to a list of token lists +linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]] +linearizeToStrss gr mk e = do + R rs <- linearizeToRecord gr mk e ---- + t <- lookupErr linLab0 [(r,s) | Ass r s <- rs] + return $ map strsFromTerm $ allInTable t + + +-- the value is a list of strings, not forgetting their arguments +allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]] +allLinsOfFun gr f = do + t <- lookupLin gr f + allLinValues t + + + +-} + + + + +{- ---- +-- returns printname if one exists; otherwise linearizes with metas +printOrLinearize :: CanonGrammar -> Fun -> String +printOrLinearize gr f = +{- ---- + errVal (prtt f) $ case lookupPrintname cnc f of + Ok s -> return s + _ -> -} + + unlines $ take 1 $ err singleton id $ + do + t <- lookupFunType gr f + f' <- ref2exp [] t (AC f) --- [] + lin f' + where + lin = linearizeToStrings gr (const id) ---- +-} diff --git a/src/GF/UseGrammar/MoreCustom.hs b/src/GF/UseGrammar/MoreCustom.hs new file mode 100644 index 000000000..0ebbb25fb --- /dev/null +++ b/src/GF/UseGrammar/MoreCustom.hs @@ -0,0 +1,15 @@ +module MoreCustom where + +-- All these lists are supposed to be empty! +-- Items should be added to ../Custom.hs instead. + +moreCustomGrammarParser = [] +moreCustomGrammarPrinter = [] +moreCustomSyntaxPrinter = [] +moreCustomTermPrinter = [] +moreCustomTermCommand = [] +moreCustomEditCommand = [] +moreCustomStringCommand = [] +moreCustomParser = [] +moreCustomTokenizer = [] +moreCustomUntokenizer = [] diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs new file mode 100644 index 000000000..102e41340 --- /dev/null +++ b/src/GF/UseGrammar/Morphology.hs @@ -0,0 +1,116 @@ +module Morphology where + +import AbsGFC +import GFC +import PrGrammar + +import Operations + +import Char +import List (sortBy, intersperse) +import Monad (liftM) + +-- construct a morphological analyser from a GF grammar. AR 11/4/2001 + +-- we have found the binary search tree sorted by word forms more efficient +-- than a trie, at least for grammars with 7000 word forms + +type Morpho = BinTree (String,[String]) + +emptyMorpho = NT + +-- with literals +appMorpho :: Morpho -> String -> (String,[String]) +appMorpho m s = (s, ps ++ ms) where + ms = case lookupTree id s m of + Ok vs -> vs + _ -> [] + ps = [] ---- case lookupLiteral s of + ---- Ok (t,_) -> [tagPrt t] + ---- _ -> [] + +-- without literals +appMorphoOnly :: Morpho -> String -> (String,[String]) +appMorphoOnly m s = (s, ms) where + ms = case lookupTree id s m of + Ok vs -> vs + _ -> [] + +-- recognize word, exluding literals +isKnownWord :: Morpho -> String -> Bool +isKnownWord mo = not . null . snd . appMorphoOnly mo + +mkMorpho :: CanonGrammar -> Morpho +mkMorpho gr = emptyMorpho ---- +{- ---- +mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where + mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun + mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun + + -- gather forms of lexical items + allLins fun = errVal [] $ do + ts <- allLinsOfFun gr fun + ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts + return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] + prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps)) + + -- gather syncategorematic words + allSyns fun = errVal [] $ do + tss <- allLinsOfFun gr fun + let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] + return $ concat $ map wordsInTerm ss + prSyn f s = (s, "+" ++ tagPrt f) + + -- all words, Left from lexical rules and Right syncategorematic + allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where + allFuns = allFunsWithValCat ab + lookType = errVal [] . liftM (:[]) . lookupFunType ab + lexRole t = case typeForm t of + Ok ([],_,_) -> Left + _ -> Right +-} + +-- printing full-form lexicon and results + +prMorpho :: Morpho -> String +prMorpho = unlines . map prMorphoAnalysis . tree2list + +prMorphoAnalysis :: (String,[String]) -> String +prMorphoAnalysis (w,fs) = unlines (w:fs) + +prMorphoAnalysisShort :: (String,[String]) -> String +prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where + w' = if null fs then w +++ "*" else "" + +tagPrt :: Print a => a -> String +tagPrt = ("+" ++) . prt --- could look up print name in grammar + +-- print all words recognized + +allMorphoWords :: Morpho -> [String] +allMorphoWords = map fst . tree2list + +-- analyse running text and show results either in short form or on separate lines +morphoTextShort mo = unwords . map (prMorphoAnalysisShort . appMorpho mo) . words +morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words + +-- format used in the Italian Verb Engine +prFullForm :: Morpho -> String +prFullForm = unlines . map prOne . tree2list where + prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) + +-- auxiliaries + +mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b]) +mkMorphoTree = sorted2tree . sortAssocs + +sortAssocs :: (Ord a, Eq b) => [(a,b)] -> [(a,[b])] +sortAssocs = arrange . sortBy (\ (x,_) (y,_) -> compare x y) where + arrange ((x,v):xvs) = arr x [v] xvs + arrange [] = [] + arr y vs xs = case xs of + (x,v):xvs -> if x==y then arr y vvs xvs else (y,vs) : arr x [v] xvs + where vvs = if elem v vs then vs else (v:vs) + _ -> [(y,vs)] + + diff --git a/src/GF/UseGrammar/Paraphrases.hs b/src/GF/UseGrammar/Paraphrases.hs new file mode 100644 index 000000000..f5dc710f9 --- /dev/null +++ b/src/GF/UseGrammar/Paraphrases.hs @@ -0,0 +1,53 @@ +module Paraphrases (mkParaphrases) where + +import Operations +import AbsGFC +import GFC +import Look +import CMacros ---- (mkApp, eqStrIdent) +import AbsCompute +import List (nub) + +-- paraphrases of GF terms. AR 6/10/1998 -- 24/9/1999 -- 5/7/2000 -- 5/6/2002 +-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL) +-- thus inherited from the old GF. Incomplete and inefficient... + +mkParaphrases :: CanonGrammar -> Exp -> [Exp] +mkParaphrases st t = [t] +---- mkParaphrases st = nub . map (beta []) . paraphrases (allDefs st) + +{- ---- +type Definition = (Fun,Trm) + +paraphrases :: [Definition] -> Trm -> [Trm] +paraphrases th t = + t : + paraImmed th t ++ +--- paraMatch th t ++ + case t of + App c a -> [App d b | d <- paraphrases th c, b <- paraphrases th a] + Abs x b -> [Abs x d | d <- paraphrases th b] + c -> [] + +paraImmed :: [Definition] -> Trm -> [Trm] +paraImmed defs t = + [Cn f | (f, u) <- defs, t == u] ++ --- eqTerm + case t of + Cn c -> [u | (f, u) <- defs, eqStrIdent f c] + _ -> [] +-} +{- --- +paraMatch :: [Definition] -> Trm -> [Trm] +paraMatch th@defs t = + [mkApp (Cn f) xx | (PC f zz, u) <- defs, + let (fs,sn) = fullApp u, fs == h, length sn == length zz] ++ + case findAMatch defs t of + Ok (g,b) -> [substTerm [] g b] + _ -> [] + where + (h,xx) = fullApp t + fullApp c = case c of + App f a -> (f', a' ++ [a]) where (f',a') = fullApp f + c -> (c,[]) + +-} diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs new file mode 100644 index 000000000..4cd4f4bc8 --- /dev/null +++ b/src/GF/UseGrammar/Parsing.hs @@ -0,0 +1,98 @@ +module Parsing where + +import CheckM +import qualified AbsGFC as C +import GFC +import MkGFC (trExp) ---- +import CMacros +import Linear +import Str +import CF +import CFIdent +import Ident +import TypeCheck +import Values +--import CFMethod +import Tokenize +import Profile +import Option +import Custom +import ShellState + +import Operations + +import List (nub) +import Monad (liftM) + +-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002 + +parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree] +parseString os sg cat = liftM fst . parseStringMsg os sg cat + +parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String) +parseStringMsg os sg cat s = do + (ts,(_,ss)) <- checkStart $ parseStringC os sg cat s + return (ts,unlines ss) + +parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] +parseStringC opts0 sg cat s = do + let opts = unionOptions opts0 $ stateOptions sg + cf = stateCF sg + gr = stateGrammarST sg + cn = cncId sg + tok = customOrDefault opts useTokenizer customTokenizer sg + parser = customOrDefault opts useParser customParser sg cat + tokens2trms opts sg cn parser (tok s) + +tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] +tokens2trms opts sg cn parser as = do + let res@(trees,info) = parser as + ts0 <- return $ nub (cfParseResults res) + ts <- case () of + _ | null ts0 -> checkWarn "No success in cf parsing" >> return [] + _ | raw -> do + ts1 <- return (map cf2trm0 ts0) ----- should not need annot + mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated + _ -> do + (ts1,_) <- checkErr $ mapErr postParse ts0 + ts2 <- mapM (checkErr . (annotate gr) . trExp) ts1 ---- + if forgive then return ts2 else do + let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2] + ps = [t | (t,ss) <- tsss, + any (compatToks as) (map str2cftoks ss)] + if null ps + then raise $ "Failure in morphology." ++ + if verb + then "\nPossible corrections: " +++++ + unlines (nub (map sstr (concatMap snd tsss))) + else "" + else return ps + + if verb + then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info + else return () + + return $ optIntOrAll opts flagNumber $ nub ts + where + gr = stateGrammarST sg + + raw = oElem rawParse opts + verb = oElem beVerbose opts + forgive = oElem forgiveParse opts + + unknown ts = case filter noMatch ts of + [] -> "where all words are known" + us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals + terminals = map TS $ cfTokens $ stateCF sg + noMatch t = all (not . compatTok t) terminals + + +--- too much type checking in building term info? return FullTerm to save work? + +-- raw parsing: so simple it is for a context-free CF grammar +cf2trm0 :: CFTree -> C.Exp +cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees) + where + cffun2trm (CFFun (fun,_)) = fun + mkApp = foldl C.EApp + mkAppAtom a = mkApp (C.EAtom a) diff --git a/src/GF/UseGrammar/Randomized.hs b/src/GF/UseGrammar/Randomized.hs new file mode 100644 index 000000000..dceb6acc6 --- /dev/null +++ b/src/GF/UseGrammar/Randomized.hs @@ -0,0 +1,47 @@ +module Randomized where + +import Abstract +import Editing + +import Operations +import Zipper + +--- import Arch (myStdGen) --- circular for hbc +import Random --- (mkStdGen, StdGen, randoms) --- bad import for hbc + +-- random generation and refinement. AR 22/8/2001 +-- implemented as sequence of refinement menu selecsions, encoded as integers + +myStdGen = mkStdGen --- + +-- build one random tree; use mx to prevent infinite search +mkRandomTree :: StdGen -> Int -> CGrammar -> QIdent -> Err Tree +mkRandomTree gen mx gr cat = mkTreeFromInts (take mx (randoms gen)) gr cat + +refineRandom :: StdGen -> Int -> CGrammar -> Action +refineRandom gen mx = mkStateFromInts $ take mx $ map abs (randoms gen) + +-- build a tree from a list of integers +mkTreeFromInts :: [Int] -> CGrammar -> QIdent -> Err Tree +mkTreeFromInts ints gr cat = do + st0 <- newCat gr cat initState + state <- mkStateFromInts ints gr st0 + return $ loc2tree state + +mkStateFromInts :: [Int] -> CGrammar -> Action +mkStateFromInts ints gr = mkRandomState ints where + mkRandomState [] state = do + testErr (isCompleteState state) "not completed" + return state + mkRandomState (n:ns) state = do + let refs = refinementsState gr state + testErr (not (null refs)) $ "no refinements available for" +++ + prt (actVal state) + (ref,_) <- (refs !? (n `mod` (length refs))) + state1 <- refineWithAtom False gr ref state + if isCompleteState state1 + then return state1 + else do + state2 <- goNextMeta state1 + mkRandomState ns state2 + diff --git a/src/GF/UseGrammar/RealMoreCustom.hs b/src/GF/UseGrammar/RealMoreCustom.hs new file mode 100644 index 000000000..b9f461a1f --- /dev/null +++ b/src/GF/UseGrammar/RealMoreCustom.hs @@ -0,0 +1,122 @@ +module MoreCustom where + +import Operations +import Text +import Tokenize +import UseGrammar +import qualified UseSyntax as S +import ShellState +import Editing +import Paraphrases +import Option +import CF +import CFIdent --- (CFTok, tS) + +import EBNF +import CFtoGrammar +import PPrCF + +import CFtoHappy +import Morphology +import GrammarToHaskell +import GrammarToCanon (showCanon) +import GrammarToXML +import qualified SyntaxToLatex as L +import GFTex +import MkResource +import SeparateOper + +-- the cf parsing algorithms +import ChartParser -- or some other CF Parser +import Earley -- such as this one +---- import HappyParser -- or this... + +import qualified PPrSRG as SRG +import PPrGSL + +import qualified TransPredCalc as PC + +-- databases for customizable commands. AR 21/11/2001 +-- Extends ../Custom. + +moreCustomGrammarParser = + [ + (strCIm "gfl", S.parseGrammar . extractGFLatex) + ,(strCIm "tex", S.parseGrammar . extractGFLatex) + ,(strCIm "ebnf", pAsGrammar pEBNFasGrammar) + ,(strCIm "cf", pAsGrammar pCFAsGrammar) +-- add your own grammar parsers here + ] + where + -- use a parser with no imports or flags + pAsGrammar p = err Bad (\g -> return (([],noOptions),g)) . p + + +moreCustomGrammarPrinter = + [ + (strCIm "happy", cf2HappyS . stateCF) + ,(strCIm "srg", SRG.prSRG . stateCF) + ,(strCIm "gsl", prGSL . stateCF) + ,(strCIm "gfhs", show . stateGrammarST) + ,(strCIm "haskell", grammar2haskell . st2grammar . stateGrammarST) + ,(strCIm "xml", unlines . prDTD . grammar2dtd . stateAbstract) + ,(strCIm "fullform",prFullForm . stateMorpho) + ,(strCIm "resource",prt . st2grammar . mkResourceGrammar . stateGrammarST) + ,(strCIm "resourcetypes", + prt . operTypeGrammar . st2grammar . mkResourceGrammar . stateGrammarST) + ,(strCIm "resourcedefs", + prt . operDefGrammar . st2grammar . mkResourceGrammar . stateGrammarST) +-- add your own grammar printers here +--- also include printing via grammar2syntax! + ] + +moreCustomSyntaxPrinter = + [ + (strCIm "gf", S.prSyntax) -- DEFAULT + ,(strCIm "latex", L.syntax2latexfile) +-- add your own grammar printers here + ] + +moreCustomTermPrinter = + [ + (strCIm "xml", \g t -> unlines $ prElementX $ term2elemx (stateAbstract g) t) +-- add your own term printers here + ] + +moreCustomTermCommand = + [ + (strCIm "predcalc", \_ t -> PC.transfer t) +-- add your own term commands here + ] + +moreCustomEditCommand = + [ +-- add your own edit commands here + ] + +moreCustomStringCommand = + [ +-- add your own string commands here + ] + +moreCustomParser = + [ + (strCIm "chart", chartParser . stateCF) + ,(strCIm "earley", earleyParser . stateCF) +-- ,(strCIm "happy", const $ lexHaskell) +-- ,(strCIm "td", const $ lexText) +-- add your own parsers here + ] + +moreCustomTokenizer = + [ +-- add your own tokenizers here + ] + +moreCustomUntokenizer = + [ +-- add your own untokenizers here + ] + + +strCIm = id diff --git a/src/GF/UseGrammar/Session.hs b/src/GF/UseGrammar/Session.hs new file mode 100644 index 000000000..bf2dd30ab --- /dev/null +++ b/src/GF/UseGrammar/Session.hs @@ -0,0 +1,110 @@ +module Session where + +import Abstract +import Option +---- import Custom +import Editing + +import Operations + +-- First version 8/2001. Adapted to GFC with modules 19/6/2003. +-- Nothing had to be changed, which is a sign of good modularity. + +-- keep these abstract + +type SState = [(State,[Exp],SInfo)] -- exps are candidate refinements +type SInfo = ([String],(Int,Options)) -- string is message, int is the view + +initSState :: SState +initSState = [(initState, [], (["Select category to start"],(0,noOptions)))] + -- instead of empty + +okInfo n = ([],(n,True)) + +stateSState ((s,_,_):_) = s +candsSState ((_,ts,_):_) = ts +infoSState ((_,_,i):_) = i +msgSState ((_,_,(m,_)):_) = m +viewSState ((_,_,(_,(v,_))):_) = v +optsSState ((_,_,(_,(_,o))):_) = o + +treeSState = actTree . stateSState + + +-- from state to state + +type ECommand = SState -> SState + +-- elementary commands + +-- change state, drop cands, drop message, preserve options +changeState :: State -> ECommand +changeState s ss = changeMsg [] $ (s,[],infoSState ss) : ss + +changeCands :: [Exp] -> ECommand +changeCands ts ss@((s,_,(_,b)):_) = (s,ts,(candInfo ts,b)) : ss -- add new state + +changeMsg :: [String] -> ECommand +changeMsg m ((s,ts,(_,b)):ss) = (s,ts,(m,b)) : ss -- just change message + +changeView :: ECommand +changeView ((s,ts,(m,(v,b))):ss) = (s,ts,(m,(v+1,b))) : ss -- toggle view + +changeStOptions :: (Options -> Options) -> ECommand +changeStOptions f ((s,ts,(m,(v,o))):ss) = (s,ts,(m,(v, f o))) : ss + +noNeedForMsg = changeMsg [] -- everything's all right: no message + +candInfo ts = case length ts of + 0 -> ["no acceptable alternative"] + 1 -> ["just one acceptable alternative"] + n -> [show n +++ "alternatives to select"] + +-- keep SState abstract from this on + +-- editing commands + +action2command :: Action -> ECommand +action2command act state = case act (stateSState state) of + Ok s -> changeState s state + Bad m -> changeMsg [m] state + +action2commandNext :: Action -> ECommand -- move to next meta after execution +action2commandNext act = action2command (\s -> act s >>= goNextMetaIfCan) + +undoCommand :: ECommand +undoCommand ss@[_] = changeMsg ["cannot go back"] ss +undoCommand (_:ss) = changeMsg ["successful undo"] ss + +selectCand :: CGrammar -> Int -> ECommand +selectCand gr i state = err (\m -> changeMsg [m] state) id $ do + exp <- candsSState state !? i + let s = stateSState state + tree <- annotateInState gr exp s + return $ case replaceSubTree tree s of + Ok st' -> changeState st' state + Bad s -> changeMsg [s] state + +refineByExps :: Bool -> CGrammar -> [Exp] -> ECommand +refineByExps der gr trees = case trees of + [t] -> action2commandNext (refineWithExpTC der gr t) + _ -> changeCands trees + +replaceByTrees :: CGrammar -> [Exp] -> ECommand +replaceByTrees gr trees = case trees of + [t] -> action2commandNext (\s -> + annotateExpInState gr t s >>= flip replaceSubTree s) + _ -> changeCands trees + +{- ---- +replaceByEditCommand :: CGrammar -> String -> ECommand +replaceByEditCommand gr co = + action2command $ + maybe return ($ gr) $ + lookupCustom customEditCommand (strCI co) + +replaceByTermCommand :: CGrammar -> String -> Exp -> ECommand +replaceByTermCommand gr co exp = + replaceByTrees gr $ maybe [exp] (\f -> f (abstractOf gr) exp) $ + lookupCustom customTermCommand (strCI co) +-} diff --git a/src/GF/UseGrammar/TeachYourself.hs b/src/GF/UseGrammar/TeachYourself.hs new file mode 100644 index 000000000..9037b9198 --- /dev/null +++ b/src/GF/UseGrammar/TeachYourself.hs @@ -0,0 +1,69 @@ +module TeachYourself where + +import Operations +import UseIO + +import UseGrammar +import Linear (allLinsIfContinuous) +import ShellState +import API +import Option + +import Random --- (randoms) --- bad import for hbc +import Arch (myStdGen) +import System + +-- translation and morphology quiz. AR 10/5/2000 -- 12/4/2002 + +teachTranslation :: Options -> GFGrammar -> GFGrammar -> IO () +teachTranslation opts ig og = do + tts <- transTrainList opts ig og infinity + let qas = [ (q, mkAnswer as) | (q,as) <- tts] + teachDialogue qas "Welcome to GF Translation Quiz." + +transTrainList :: + Options -> GFGrammar -> GFGrammar -> Integer -> IO [(String,[String])] +transTrainList opts ig og number = do + ts <- randomTermsIO opts ig (fromInteger number) + return $ map mkOne $ ts + where + cat = firstCatOpts opts ig + mkOne t = (norml (linearize ig t),map (norml . linearize og) (homonyms ig cat t)) + +teachMorpho :: Options -> GFGrammar -> IO () +teachMorpho opts ig = useIOE () $ do + tts <- morphoTrainList opts ig infinity + let qas = [ (q, mkAnswer as) | (q,as) <- tts] + ioeIO $ teachDialogue qas "Welcome to GF Morphology Quiz." + +morphoTrainList :: Options -> GFGrammar -> Integer -> IOE [(String,[String])] +morphoTrainList opts ig number = do + ts <- ioeIO $ randomTreesIO opts ig (fromInteger number) + gen <- ioeIO $ myStdGen (fromInteger number) + mkOnes gen ts + where + mkOnes gen (t:ts) = do + psss <- ioeErr $ allLinsIfContinuous gr t + let pss = concat psss + let (i,gen') = randomR (0, length pss - 1) gen + (ps,ss) <- ioeErr $ pss !? i + (_,ss0) <- ioeErr $ pss !? 0 + let bas = sstrV $ take 1 ss0 + more <- mkOnes gen' ts + return $ (bas +++ ":" +++ unwords (map prt ps), return (sstrV ss)) : more + mkOnes gen [] = return [] + + gr = stateConcrete ig + +-- compare answer to the list of possible answers, increase score and give feedback +mkAnswer :: [String] -> String -> (Integer, String) +mkAnswer as s = if (elem (norml s) as) + then (1,"Yes.") + else (0,"No, not" +++ s ++ ", but" ++++ unlines as) + +norml = unwords . words + +--- the maximal number of precompiled quiz problems +infinity :: Integer +infinity = 123 + diff --git a/src/GF/UseGrammar/Tokenize.hs b/src/GF/UseGrammar/Tokenize.hs new file mode 100644 index 000000000..dd0879931 --- /dev/null +++ b/src/GF/UseGrammar/Tokenize.hs @@ -0,0 +1,130 @@ +module Tokenize where + +import Operations +---- import UseGrammar (isLiteral,identC) +import CFIdent + +import Char + +-- lexers = tokenizers, to prepare input for GF grammars. AR 4/1/2002 +-- an entry for each is included in Custom.customTokenizer + +-- just words + +tokWords :: String -> [CFTok] +tokWords = map tS . words + +tokLits :: String -> [CFTok] +tokLits = map mkCFTok . words + +tokVars :: String -> [CFTok] +tokVars = map mkCFTokVar . words + +mkCFTok :: String -> CFTok +mkCFTok s = tS s ---- if (isLiteral s) then (mkLit s) else (tS s) + +mkCFTokVar :: String -> CFTok +mkCFTokVar s = case s of + '?':_:_ -> tM s + 'x':'_':_ -> tV s + 'x':[] -> tV s + '$':xs@(_:_) -> if last s == '$' then tV (init xs) else tS s + _ -> tS s + +mkLit :: String -> CFTok +mkLit s = if (all isDigit s) then (tI s) else (tL s) + +mkTL :: String -> CFTok +mkTL s = if (all isDigit s) then (tI s) else (tL ("'" ++ s ++ "'")) + + +-- Haskell lexer, usable for much code + +lexHaskell :: String -> [CFTok] +lexHaskell ss = case lex ss of + [(w@(_:_),ws)] -> tS w : lexHaskell ws + _ -> [] + +-- somewhat shaky text lexer + +lexText :: String -> [CFTok] +lexText = uncap . lx where + + lx s = case s of + p : cs | isMPunct p -> tS [p] : uncap (lx cs) + p : cs | isPunct p -> tS [p] : lx cs + s : cs | isSpace s -> lx cs + _ : _ -> getWord s + _ -> [] + + getWord s = tS w : lx ws where (w,ws) = span isNotSpec s + isMPunct c = elem c ".!?" + isPunct c = elem c ",:;()\"" + isNotSpec c = not (isMPunct c || isPunct c || isSpace c) + uncap (TS (c:cs) : ws) = tC (c:cs) : ws + uncap s = s + +-- lexer for C--, a mini variant of C + +lexC2M :: String -> [CFTok] +lexC2M = lexC2M' False + +lexC2M' :: Bool -> String -> [CFTok] +lexC2M' isHigherOrder s = case s of + '#':cs -> lexC $ dropWhile (/='\n') cs + '/':'*':cs -> lexC $ dropComment cs + c:cs | isSpace c -> lexC cs + c:cs | isAlpha c -> getId s + c:cs | isDigit c -> getLit s + c:d:cs | isSymb [c,d] -> tS [c,d] : lexC cs + c:cs | isSymb [c] -> tS [c] : lexC cs + _ -> [] --- covers end of file and unknown characters + where + lexC = lexC2M' isHigherOrder + getId s = mkT i : lexC cs where (i,cs) = span isIdChar s + getLit s = tI i : lexC cs where (i,cs) = span isDigit s + isIdChar c = isAlpha c || isDigit c || elem c "'_" + isSymb = reservedAnsiCSymbol + dropComment s = case s of + '*':'/':cs -> cs + _:cs -> dropComment cs + _ -> [] + mkT i = if (isRes i) then (tS i) else + if isHigherOrder then (tV i) else (tL ("'" ++ i ++ "'")) + isRes = reservedAnsiC + + +reservedAnsiCSymbol s = case lookupTree show s ansiCtree of + Ok True -> True + _ -> False + +reservedAnsiC s = case lookupTree show s ansiCtree of + Ok False -> True + _ -> False + +-- for an efficient lexer: precompile this! +ansiCtree = buildTree $ [(s,True) | s <- reservedAnsiCSymbols] ++ + [(s,False) | s <- reservedAnsiCWords] + +reservedAnsiCSymbols = words $ + "<<= >>= << >> ++ -- == <= >= *= += -= %= /= &= ^= |= " ++ + "^ { } = , ; + * - ( ) < > & % ! ~" + +reservedAnsiCWords = words $ + "auto break case char const continue default " ++ + "do double else enum extern float for goto if int " ++ + "long register return short signed sizeof static struct switch typedef " ++ + "union unsigned void volatile while " ++ + "main printin putchar" --- these are not ansi-C + +-- turn unknown tokens into string literals; not recursively for literals 123, 'foo' + +unknown2string :: (String -> Bool) -> [CFTok] -> [CFTok] +unknown2string isKnown = map mkOne where + mkOne t@(TS s) = if isKnown s then t else mkTL s + mkOne t@(TC s) = if isKnown s then t else mkTL s + mkOne t = t + +lexTextLiteral isKnown = unknown2string isKnown . lexText +lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell + -- cgit v1.2.3