diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
| commit | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch) | |
| tree | 0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/UseGrammar | |
| parent | fe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff) | |
removed src for 2.9
Diffstat (limited to 'src/GF/UseGrammar')
| -rw-r--r-- | src/GF/UseGrammar/Custom.hs | 494 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Editing.hs | 435 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Generate.hs | 116 | ||||
| -rw-r--r-- | src/GF/UseGrammar/GetTree.hs | 74 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Information.hs | 162 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Linear.hs | 292 | ||||
| -rw-r--r-- | src/GF/UseGrammar/MatchTerm.hs | 50 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Morphology.hs | 140 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Paraphrases.hs | 70 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Parsing.hs | 177 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Randomized.hs | 66 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Session.hs | 181 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Statistics.hs | 44 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Tokenize.hs | 222 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Transfer.hs | 79 | ||||
| -rw-r--r-- | src/GF/UseGrammar/TreeSelections.hs | 77 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Treebank.hs | 251 |
17 files changed, 0 insertions, 2930 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs deleted file mode 100644 index 983b7f683..000000000 --- a/src/GF/UseGrammar/Custom.hs +++ /dev/null @@ -1,494 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Custom --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/16 10:21:21 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.85 $ --- --- A database for customizable GF shell commands. --- --- databases for customizable commands. AR 21\/11\/2001. --- for: grammar parsers, grammar printers, term commands, string commands. --- idea: items added here are usable throughout GF; nothing else need be edited. --- they are often usable through the API: hence API cannot be imported here! --- --- Major redesign 3\/4\/2002: the first entry in each database is DEFAULT. --- If no other value is given, the default is selected. --- Because of this, two invariants have to be preserved: --- --- - no databases may be empty --- --- - additions are made to the end of the database ------------------------------------------------------------------------------ - -module GF.UseGrammar.Custom where - -import GF.Data.Operations -import GF.Text.Text -import GF.UseGrammar.Tokenize -import GF.Grammar.Values -import qualified GF.Grammar.Grammar as G -import qualified GF.Canon.AbsGFC as A -import qualified GF.Canon.GFC as C - -import qualified GF.Devel.GFCCtoJS as JS -import GF.Canon.CanonToGFCC -import qualified GF.Devel.GFCCtoHaskell as CCH - -import qualified GF.Source.AbsGF as GF -import qualified GF.Grammar.MMacros as MM -import GF.Grammar.AbsCompute -import GF.Grammar.TypeCheck -import GF.UseGrammar.Generate -import GF.UseGrammar.MatchTerm -import GF.UseGrammar.Linear (unoptimizeCanon) -------import Compile -import GF.Compile.ShellState -import GF.UseGrammar.Editing -import GF.UseGrammar.Paraphrases -import GF.Infra.Option -import GF.CF.CF -import GF.CF.CFIdent - -import GF.Canon.CanonToGrammar -import GF.CF.PPrCF -import GF.CF.PrLBNF -import GF.Grammar.PrGrammar -import GF.Compile.PrOld -import GF.Canon.MkGFC -import GF.Speech.PrGSL (gslPrinter) -import GF.Speech.PrJSGF (jsgfPrinter) -import GF.Speech.PrSRGS -import GF.Speech.PrSRGS_ABNF -import qualified GF.Speech.SISR as SISR -import GF.Speech.PrSLF -import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) -import GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) -import GF.Speech.GrammarToVoiceXML (grammar2vxml) - -import GF.Data.Zipper - -import GF.UseGrammar.Statistics -import GF.UseGrammar.Morphology -import GF.UseGrammar.Information -import GF.API.GrammarToHaskell -import GF.API.GrammarToTransfer ------import GrammarToCanon (showCanon, showCanonOpt) ------import qualified GrammarToGFC as GFC -import GF.Probabilistic.Probabilistic (prProbs) - --- the cf parsing algorithms -import GF.CF.ChartParser -- OBSOLETE -import qualified GF.Parsing.CF as PCF -import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE - --- grammar conversions -- peb 19/4-04 --- see also customGrammarPrinter -import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE -import qualified GF.Printing.PrintParser as PrtOld -- OBSOLETE -import qualified GF.Infra.Print as Prt -import qualified GF.Conversion.GFC as Cnv -import qualified GF.Conversion.Types as CnvTypes -import qualified GF.Conversion.Haskell as CnvHaskell -import qualified GF.Conversion.Prolog as CnvProlog -import qualified GF.Conversion.TypeGraph as CnvTypeGraph -import GF.Canon.Unparametrize -import GF.Canon.Subexpressions -import GF.Canon.AbsToBNF - -import GF.Canon.GFC -import qualified GF.Canon.MkGFC as MC -import GF.CFGM.PrintCFGrammar (prCanonAsCFGM) -import GF.Visualization.VisualizeGrammar (visualizeCanonGrammar, visualizeSourceGrammar) - -import GF.API.MyParser - -import qualified GF.Infra.Modules as M -import GF.Infra.UseIO - -import Control.Monad -import Data.Char -import Data.Maybe (fromMaybe) - --- character codings -import GF.Text.Unicode -import GF.Text.UTF8 (decodeUTF8) -import GF.Text.Greek (mkGreek) -import GF.Text.Arabic (mkArabic) -import GF.Text.Hebrew (mkHebrew) -import GF.Text.Russian (mkRussian, mkRusKOI8) -import GF.Text.Ethiopic (mkEthiopic) -import GF.Text.Tamil (mkTamil) -import GF.Text.OCSCyrillic (mkOCSCyrillic) -import GF.Text.LatinASupplement (mkLatinASupplement) -import GF.Text.Devanagari (mkDevanagari) -import GF.Text.Hiragana (mkJapanese) -import GF.Text.ExtendedArabic (mkArabic0600) -import GF.Text.ExtendedArabic (mkExtendedArabic) -import GF.Text.ExtraDiacritics (mkExtraDiacritics) - --- minimal version also used in Hugs. AR 2/12/2002. - --- databases for customizable commands. AR 21/11/2001 --- for: grammar parsers, grammar printers, term commands, string commands --- idea: items added here are usable throughout GF; nothing else need be edited --- they are often usable through the API: hence API cannot be imported here! - --- Major redesign 3/4/2002: the first entry in each database is DEFAULT. --- If no other value is given, the default is selected. --- Because of this, two invariants have to be preserved: --- - no databases may be empty --- - additions are made to the end of the database - --- * these are the databases; the comment gives the name of the flag - --- | grammarFormat, \"-format=x\" or file suffix -customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar) - --- | grammarPrinter, \"-printer=x\" -customGrammarPrinter :: CustomData (Options -> StateGrammar -> String) - --- | multiGrammarPrinter, \"-printer=x\" -customMultiGrammarPrinter :: CustomData (Options -> CanonGrammar -> String) - --- | syntaxPrinter, \"-printer=x\" -customSyntaxPrinter :: CustomData (GF.Grammar -> String) - --- | termPrinter, \"-printer=x\" -customTermPrinter :: CustomData (StateGrammar -> Tree -> String) - --- | termCommand, \"-transform=x\" -customTermCommand :: CustomData (StateGrammar -> Tree -> [Tree]) - --- | editCommand, \"-edit=x\" -customEditCommand :: CustomData (StateGrammar -> Action) - --- | filterString, \"-filter=x\" -customStringCommand :: CustomData (StateGrammar -> String -> String) - --- | useParser, \"-parser=x\" -customParser :: CustomData (StateGrammar -> CFCat -> CFParser) - --- | useTokenizer, \"-lexer=x\" -customTokenizer :: CustomData (StateGrammar -> String -> [[CFTok]]) - --- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string -customUntokenizer :: CustomData (StateGrammar -> String -> String) - --- | uniCoding, \"-coding=x\" --- --- contains conversions from different codings to the internal --- unicode coding -customUniCoding :: CustomData (String -> String) - --- | this is the way of selecting an item -customOrDefault :: Options -> OptFun -> CustomData a -> a -customOrDefault opts optfun db = maybe (defaultCustomVal db) id $ - customAsOptVal opts optfun db - --- | to produce menus of custom operations -customInfo :: CustomData a -> (String, [String]) -customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c)) - -------------------------------- --- * types and stuff - -type CommandId = String - -strCI :: String -> CommandId -strCI = id - -ciStr :: CommandId -> String -ciStr = id - -ciOpt :: CommandId -> Option -ciOpt = iOpt - -newtype CustomData a = CustomData (String, [(CommandId,a)]) - -customData :: String -> [(CommandId, a)] -> CustomData a -customData title db = CustomData (title,db) - -dbCustomData :: CustomData a -> [(CommandId, a)] -dbCustomData (CustomData (_,db)) = db - -titleCustomData :: CustomData a -> String -titleCustomData (CustomData (t,_)) = t - -lookupCustom :: CustomData a -> CommandId -> Maybe a -lookupCustom = flip lookup . dbCustomData - -customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a -customAsOptVal opts optfun db = do - arg <- getOptVal opts optfun - lookupCustom db (strCI arg) - --- | take the first entry from the database -defaultCustomVal :: CustomData a -> a -defaultCustomVal (CustomData (s,db)) = - ifNull (error ("empty database:" +++ s)) (snd . head) db - -------------------------------------------------------------------------- --- * and here's the customizable part: - --- grammar parsers: the ID is also used as file name suffix -customGrammarParser = - customData "Grammar parsers, selected by file name suffix" $ - [ ------- (strCI "gf", compileModule noOptions) -- DEFAULT --- add your own grammar parsers here - ] - - -customGrammarPrinter = - customData "Grammar printers, selected by option -printer=x" $ - [ - (strCI "gfc", \_ -> prCanon . stateGrammarST) -- DEFAULT - ,(strCI "gf", \_ -> err id prGrammar . canon2sourceGrammar . stateGrammarST) - ,(strCI "cf", \_ -> prCF . stateCF) - ,(strCI "old", \_ -> printGrammarOld . stateGrammarST) - ,(strCI "gsl", gslPrinter) - ,(strCI "jsgf", jsgfPrinter Nothing) - ,(strCI "jsgf_sisr_old", jsgfPrinter (Just SISR.SISROld)) - ,(strCI "srgs_xml", srgsXmlPrinter Nothing False) - ,(strCI "srgs_xml_non_rec", srgsXmlNonRecursivePrinter) - ,(strCI "srgs_xml_prob", srgsXmlPrinter Nothing True) - ,(strCI "srgs_xml_sisr_old", srgsXmlPrinter (Just SISR.SISROld) False) - ,(strCI "srgs_abnf", srgsAbnfPrinter Nothing False) - ,(strCI "srgs_abnf_non_rec", srgsAbnfNonRecursivePrinter) - ,(strCI "srgs_abnf_sisr_old", srgsAbnfPrinter (Just SISR.SISROld) False) - ,(strCI "vxml", grammar2vxml) - ,(strCI "slf", slfPrinter) - ,(strCI "slf_graphviz", slfGraphvizPrinter) - ,(strCI "slf_sub", slfSubPrinter) - ,(strCI "slf_sub_graphviz", slfSubGraphvizPrinter) - ,(strCI "fa_graphviz", faGraphvizPrinter) - ,(strCI "fa_c", faCPrinter) - ,(strCI "regexp", regexpPrinter) - ,(strCI "regexps", multiRegexpPrinter) - ,(strCI "regular", regularPrinter) - ,(strCI "plbnf", \_ -> prLBNF True) - ,(strCI "lbnf", \_ -> prLBNF False) - ,(strCI "bnf", \_ -> prBNF False) - ,(strCI "absbnf", \_ -> abstract2bnf . stateGrammarST) - ,(strCI "haskell", \_ -> grammar2haskell . stateGrammarST) - ,(strCI "gfcc_haskell", \opts -> CCH.grammar2haskell . - canon2gfcc opts . stateGrammarST) - ,(strCI "haskell_gadt", \_ -> grammar2haskellGADT . stateGrammarST) - ,(strCI "transfer", \_ -> grammar2transfer . stateGrammarST) - ,(strCI "morpho", \_ -> prMorpho . stateMorpho) - ,(strCI "fullform",\_ -> prFullForm . stateMorpho) - ,(strCI "opts", \_ -> prOpts . stateOptions) - ,(strCI "words", \_ -> unwords . stateGrammarWords) - ,(strCI "printnames", \_ -> C.prPrintnamesGrammar . stateGrammarST) - ,(strCI "stat", \_ -> prStatistics . stateGrammarST) - ,(strCI "probs", \_ -> prProbs . stateProbs) - ,(strCI "unpar", \_ -> prCanon . unparametrizeCanon . stateGrammarST) - ,(strCI "subs", \_ -> prSubtermStat . stateGrammarST) - -{- ---- - (strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT - ,(strCI "canon", showCanon "Lang" . stateGrammarST) - ,(strCI "gfc", GFC.showGFC . stateGrammarST) - ,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST) --} - --- add your own grammar printers here - --- grammar conversions: - ,(strCI "mcfg", \_ -> Prt.prt . stateMCFG) - ,(strCI "fcfg", \_ -> Prt.prt . fst . stateFCFG) - ,(strCI "cfg", \_ -> Prt.prt . stateCFG) - ,(strCI "pinfo", \_ -> Prt.prt . statePInfo) - ,(strCI "abstract", \_ -> Prt.prtAfter "\n" . Cnv.gfc2abstract . stateGrammarLang) - - ,(strCI "functiongraph",\_ -> CnvTypeGraph.prtFunctionGraph . Cnv.gfc2simple noOptions . stateGrammarLang) - ,(strCI "typegraph", \_ -> CnvTypeGraph.prtTypeGraph . Cnv.gfc2simple noOptions . stateGrammarLang) - - ,(strCI "gfc-haskell", \_ -> CnvHaskell.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts) - ,(strCI "mcfg-haskell", \_ -> CnvHaskell.prtMGrammar . stateMCFG) - ,(strCI "cfg-haskell", \_ -> CnvHaskell.prtCGrammar . stateCFG) - ,(strCI "gfc-prolog", \_ -> CnvProlog.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts) - ,(strCI "mcfg-prolog", \_ -> CnvProlog.prtMGrammar . stateMCFG) - ,(strCI "cfg-prolog", \_ -> CnvProlog.prtCGrammar . stateCFG) - --- obsolete, or only for testing: - ,(strCI "abs-skvatt", \_ -> Cnv.abstract2skvatt . Cnv.gfc2abstract . stateGrammarLang) - ,(strCI "cfg-skvatt", \_ -> Cnv.cfg2skvatt . stateCFG) - ,(strCI "simple", \_ -> Prt.prt . uncurry Cnv.gfc2simple . stateGrammarLangOpts) - ,(strCI "mcfg-erasing", \_ -> Prt.prt . fst . snd . uncurry Cnv.convertGFC . stateGrammarLangOpts) --- ,(strCI "mcfg-old", PrtOld.prt . CnvOld.mcfg . statePInfoOld) --- ,(strCI "cfg-old", PrtOld.prt . CnvOld.cfg . statePInfoOld) - ] - where stateGrammarLangOpts s = (stateOptions s, stateGrammarLang s) - -customMultiGrammarPrinter = - customData "Printers for multiple grammars, selected by option -printer=x" $ - [ - (strCI "gfcm", const MC.prCanon) - ,(strCI "gfcc", canon2gfccPr) - ,(strCI "js", \opts -> JS.gfcc2js . canon2gfcc opts) - ,(strCI "header", const (MC.prCanonMGr . unoptimizeCanon)) - ,(strCI "cfgm", prCanonAsCFGM) - ,(strCI "graph", visualizeCanonGrammar) - ,(strCI "missing", const missingLinCanonGrammar) - --- to prolog format: - ,(strCI "gfc-prolog", CnvProlog.prtSMulti) - ,(strCI "mcfg-prolog", CnvProlog.prtMMulti) - ,(strCI "cfg-prolog", CnvProlog.prtCMulti) - ] - - -customSyntaxPrinter = - customData "Syntax printers, selected by option -printer=x" $ - [ --- add your own grammar printers here - ] - - -customTermPrinter = - customData "Term printers, selected by option -printer=x" $ - [ - (strCI "gf", const prt) -- DEFAULT --- add your own term printers here - ] - -customTermCommand = - customData "Term transformers, selected by option -transform=x" $ - [ - (strCI "identity", \_ t -> [t]) -- DEFAULT - ,(strCI "compute", \g t -> let gr = grammar g in - err (const [t]) return - (exp2termCommand gr (computeAbsTerm gr) t)) - ,(strCI "nodup", \_ t -> if (hasDupIdent $ tree2exp t) then [] else [t]) - ,(strCI "nodupatom", \_ t -> if (hasDupAtom $ tree2exp t) then [] else [t]) - ,(strCI "paraphrase", \g t -> let gr = grammar g in - exp2termlistCommand gr (mkParaphrases gr) t) - - ,(strCI "generate", \g t -> let gr = grammar g - cat = actCat $ tree2loc t --- not needed - in - [tr | t <- generateTrees noOptions gr cat 2 Nothing (Just t), - Ok tr <- [annotate gr $ MM.qualifTerm (absId g) t]]) - ,(strCI "typecheck", \g t -> err (const []) (return . loc2tree) - (reCheckStateReject (grammar g) (tree2loc t))) - ,(strCI "solve", \g t -> err (const []) (return . loc2tree) - (solveAll (grammar g) (tree2loc t) - >>= rejectUnsolvable)) - ,(strCI "context", \g t -> err (const [t]) (return . loc2tree) - (contextRefinements (grammar g) (tree2loc t))) - ,(strCI "reindex", \g t -> let gr = grammar g in - err (const [t]) return - (exp2termCommand gr (return . MM.reindexTerm) t)) ---- ,(strCI "delete", \g t -> [MM.mExp0]) --- add your own term commands here - ] - -customEditCommand = - customData "Editor state transformers, selected by option -edit=x" $ - [ - (strCI "identity", const return) -- DEFAULT - ,(strCI "typecheck", \g -> reCheckState (grammar g)) - ,(strCI "solve", \g -> solveAll (grammar g)) - ,(strCI "context", \g -> contextRefinements (grammar g)) - ,(strCI "compute", \g -> computeSubTree (grammar g)) - ,(strCI "paraphrase", const return) --- done ad hoc on top level - ,(strCI "generate", const return) --- done ad hoc on top level - ,(strCI "transfer", const return) --- done ad hoc on top level --- add your own edit commands here - ] - -customStringCommand = - customData "String filters, selected by option -filter=x" $ - [ - (strCI "identity", const $ id) -- DEFAULT - ,(strCI "erase", const $ const "") - ,(strCI "take100", const $ take 100) - ,(strCI "text", const $ formatAsText) - ,(strCI "code", const $ formatAsCode) ----- ,(strCI "latexfile", const $ mkLatexFile) - ,(strCI "length", const $ show . length) --- add your own string commands here - ] - -customParser = - customData "Parsers, selected by option -parser=x" $ - [ - (strCI "chart", PCFOld.parse "ibn" . stateCF) -- DEPRECATED - ,(strCI "bottomup", PCF.parse "gb" . stateCF) - ,(strCI "topdown", PCF.parse "gt" . stateCF) --- commented for now, since there's a bug in the incremental algorithm: --- ,(strCI "incremental", PCF.parse "ib" . stateCF) --- ,(strCI "incremental-bottomup", PCF.parse "ib" . stateCF) --- ,(strCI "incremental-topdown", PCF.parse "it" . stateCF) - ,(strCI "old", chartParser . stateCF) -- DEPRECATED - ,(strCI "myparser", myParser) --- add your own parsers here - ] - -customTokenizer = - let sg = singleton in - customData "Tokenizers, selected by option -lexer=x" $ - [ - (strCI "words", const $ sg . tokWords) - ,(strCI "literals", const $ sg . tokLits) - ,(strCI "vars", const $ sg . tokVars) - ,(strCI "chars", const $ sg . map (tS . singleton)) - ,(strCI "code", const $ sg . lexHaskell) - ,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr)) - ,(strCI "textvars", \gr -> sg . (lexTextVar $ stateIsWord gr)) - ,(strCI "text", const $ sg . lexText) - ,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr)) - ,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr)) - ,(strCI "textlit", \gr -> sg . (lexTextLiteral $ stateIsWord gr)) - ,(strCI "codeC", const $ sg . lexC2M) - ,(strCI "ignore", \gr -> sg . lexIgnore (stateIsWord gr) . tokLits) - ,(strCI "subseqs", \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits) - ,(strCI "codeCHigh", const $ sg . lexC2M' True) --- add your own tokenizers here - ] - -customUntokenizer = - customData "Untokenizers, selected by option -unlexer=x" $ - [ - (strCI "unwords", const $ id) -- DEFAULT - ,(strCI "text", const $ formatAsText) - ,(strCI "html", const $ formatAsHTML) - ,(strCI "latex", const $ formatAsLatex) - ,(strCI "code", const $ formatAsCode) - ,(strCI "concat", const $ filter (not . isSpace)) - ,(strCI "textlit", const $ formatAsTextLit) - ,(strCI "codelit", const $ formatAsCodeLit) - ,(strCI "concat", const $ concatRemSpace) - ,(strCI "glue", const $ performBinds) - ,(strCI "finnish", const $ performBindsFinnish) - ,(strCI "reverse", const $ reverse) - ,(strCI "bind", const $ performBinds) -- backward compat --- add your own untokenizers here - ] - -customUniCoding = - customData "Alphabet codings, selected by option -coding=x" $ - [ - (strCI "latin1", id) -- DEFAULT - ,(strCI "utf8", decodeUTF8) - ,(strCI "greek", treat [] mkGreek) - ,(strCI "hebrew", mkHebrew) - ,(strCI "arabic", mkArabic) - ,(strCI "russian", treat [] mkRussian) - ,(strCI "russianKOI8", mkRusKOI8) - ,(strCI "ethiopic", mkEthiopic) - ,(strCI "tamil", mkTamil) - ,(strCI "OCScyrillic", mkOCSCyrillic) - ,(strCI "devanagari", mkDevanagari) - ,(strCI "latinasupplement", mkLatinASupplement) - ,(strCI "japanese", mkJapanese) - ,(strCI "arabic0600", mkArabic0600) - ,(strCI "extendedarabic", mkExtendedArabic) - ,(strCI "extradiacritics", mkExtraDiacritics) - ] diff --git a/src/GF/UseGrammar/Editing.hs b/src/GF/UseGrammar/Editing.hs deleted file mode 100644 index 762562eb0..000000000 --- a/src/GF/UseGrammar/Editing.hs +++ /dev/null @@ -1,435 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Editing --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:45 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.14 $ --- --- generic tree editing, with some grammar notions assumed. AR 18\/8\/2001. --- 19\/6\/2003 for GFC ------------------------------------------------------------------------------ - -module GF.UseGrammar.Editing where - -import GF.Grammar.Abstract -import qualified GF.Canon.GFC as GFC -import GF.Grammar.TypeCheck -import GF.Grammar.LookAbs -import GF.Grammar.AbsCompute -import GF.Grammar.Macros (errorCat) - -import GF.Data.Operations -import GF.Data.Zipper - --- generic tree editing, with some grammar notions assumed. AR 18/8/2001 --- 19/6/2003 for GFC - -type CGrammar = GFC.CanonGrammar - -type State = Loc TrNode - --- | the "empty" state -initState :: State -initState = tree2loc uTree - -isRootState :: State -> Bool -isRootState s = case actPath s of - Top -> True - _ -> False - -actTree :: State -> Tree -actTree (Loc (t,_)) = t - -actPath :: State -> Path TrNode -actPath (Loc (_,p)) = p - -actVal :: State -> Val -actVal = valNode . nodeTree . actTree - -actCat :: State -> Cat -actCat = errVal errorCat . val2cat . actVal ---- undef - -actAtom :: State -> Atom -actAtom = atomTree . actTree - -actFun :: State -> Err Fun -actFun s = case actAtom s of - AtC f -> return f - t -> prtBad "active atom: expected function, found" t - -actExp :: State -> Exp -actExp = tree2exp . actTree - --- | current local bindings -actBinds :: State -> Binds -actBinds = bindsNode . nodeTree . actTree - --- | constraints in current subtree -actConstrs :: State -> Constraints -actConstrs = allConstrsTree . actTree - --- | constraints in the whole tree -allConstrs :: State -> Constraints -allConstrs = allConstrsTree . loc2tree - --- | metas in current subtree -actMetas :: State -> [Meta] -actMetas = metasTree . actTree - --- | metas in the whole tree -allMetas :: State -> [Meta] -allMetas = metasTree . loc2tree - -actTreeBody :: State -> Tree -actTreeBody = bodyTree . actTree - -allPrevBinds :: State -> Binds -allPrevBinds = concatMap bindsNode . traverseCollect . actPath - -allBinds :: State -> Binds -allBinds s = actBinds s ++ allPrevBinds s - -actGen :: State -> Int -actGen = length . allBinds -- symbol generator for VGen - -allPrevVars :: State -> [Var] -allPrevVars = map fst . allPrevBinds - -allVars :: State -> [Var] -allVars = map fst . allBinds - -vGenIndex :: State -> Int -vGenIndex = length . allBinds - -actIsMeta :: State -> Bool -actIsMeta = atomIsMeta . actAtom - -actMeta :: State -> Err Meta -actMeta = getMetaAtom . actAtom - --- | meta substs are not only on the actual path... -entireMetaSubst :: State -> MetaSubst -entireMetaSubst = concatMap metaSubstsNode . scanTree . loc2tree - -isCompleteTree :: Tree -> Bool -isCompleteTree = null . filter atomIsMeta . map atomNode . scanTree - -isCompleteState :: State -> Bool -isCompleteState = isCompleteTree . loc2tree - -initStateCat :: Context -> Cat -> Err State -initStateCat cont cat = do - return $ tree2loc (Tr (mkNode [] mAtom (cat2val cont cat) ([],[]), [])) - --- | this function only concerns the body of an expression... -annotateInState :: CGrammar -> Exp -> State -> Err Tree -annotateInState gr exp state = do - let binds = allBinds state - val = actVal state - annotateIn gr binds exp (Just val) - --- | ...whereas this one works with lambda abstractions -annotateExpInState :: CGrammar -> Exp -> State -> Err Tree -annotateExpInState gr exp state = do - let cont = allPrevBinds state - binds = actBinds state - val = actVal state - typ <- mkProdVal binds val - annotateIn gr binds exp (Just typ) - -treeByExp :: (Exp -> Err Exp) -> CGrammar -> Exp -> State -> Err Tree -treeByExp trans gr exp0 state = do - exp <- trans exp0 - annotateExpInState gr exp state - --- * actions - -type Action = State -> Err State - -newCat :: CGrammar -> Cat -> Action -newCat gr cat@(m,c) _ = do - cont <- lookupCatContext gr m c - testErr (null cont) "start cat must have null context" -- for easier meta refresh - initStateCat cont cat - -newFun :: CGrammar -> Fun -> Action -newFun gr fun@(m,c) _ = do - typ <- lookupFunType gr m c - cat <- valCat typ - st1 <- newCat gr cat initState - refineWithAtom True gr (qq fun) st1 - -newTree :: Tree -> Action -newTree t _ = return $ tree2loc t - -newExpTC :: CGrammar -> Exp -> Action -newExpTC gr t s = annotate gr (refreshMetas [] t) >>= flip newTree s - -goNextMeta, goPrevMeta, goNextNewMeta, goPrevNewMeta, goNextMetaIfCan :: Action - -goNextMeta = repeatUntilErr actIsMeta goAhead -- can be the location itself -goPrevMeta = repeatUntilErr actIsMeta goBack - -goNextNewMeta s = goAhead s >>= goNextMeta -- always goes away from location -goPrevNewMeta s = goBack s >>= goPrevMeta - -goNextMetaIfCan = actionIfPossible goNextMeta - -actionIfPossible :: Action -> Action -actionIfPossible a s = return $ errVal s (a s) - -goFirstMeta, goLastMeta :: Action -goFirstMeta s = goNextMeta $ goRoot s -goLastMeta s = goLast s >>= goPrevMeta - -noMoreMetas :: State -> Bool -noMoreMetas = err (const True) (const False) . goNextMeta - -replaceSubTree :: Tree -> Action -replaceSubTree tree state = changeLoc state tree - -refineOrReplaceWithTree :: Bool -> CGrammar -> Tree -> Action -refineOrReplaceWithTree der gr tree state = case actMeta state of - Ok m -> refineWithTreeReal der gr tree m state - _ -> do - let tree1 = addBinds (actBinds state) $ tree - state' <- replaceSubTree tree1 state - reCheckState gr state' - -refineWithTree :: Bool -> CGrammar -> Tree -> Action -refineWithTree der gr tree state = do - m <- errIn "move pointer to meta" $ actMeta state - refineWithTreeReal der gr tree m state - -refineWithTreeReal :: Bool -> CGrammar -> Tree -> Meta -> Action -refineWithTreeReal der gr tree m state = do - state' <- replaceSubTree tree state - let cs0 = allConstrs state' - (cs,ms) = splitConstraints gr cs0 - v = vClos $ tree2exp (bodyTree tree) - msubst = (m,v) : ms - metaSubstRefinements gr msubst $ - mapLoc (reduceConstraintsNode gr . performMetaSubstNode msubst) state' - - -- without dep. types, no constraints, no grammar needed - simply: do - -- testErr (actIsMeta state) "move pointer to meta" - -- replaceSubTree tree state - -refineAllNodes :: Action -> Action -refineAllNodes act state = do - let estate0 = goFirstMeta state - case estate0 of - Bad _ -> return state - Ok state0 -> do - (state',n) <- tryRefine 0 state0 - if n==0 - then return state - else actionIfPossible goFirstMeta state' - where - tryRefine n state = err (const $ return (state,n)) return $ do - state' <- goNextMeta state - meta <- actMeta state' - case act state' of - Ok state2 -> tryRefine (n+1) state2 - _ -> err (const $ return (state',n)) return $ do - state2 <- goNextNewMeta state' - tryRefine n state2 - -uniqueRefinements :: CGrammar -> Action -uniqueRefinements = refineAllNodes . uniqueRefine - -metaSubstRefinements :: CGrammar -> MetaSubst -> Action -metaSubstRefinements gr = refineAllNodes . metaSubstRefine gr - -contextRefinements :: CGrammar -> Action -contextRefinements gr = refineAllNodes contextRefine where - contextRefine state = case varRefinementsState state of - [(e,_)] -> refineWithAtom False gr e state - _ -> Bad "no unique refinement in context" - varRefinementsState state = - [r | r@(e,_) <- refinementsState gr state, isVariable e] - -uniqueRefine :: CGrammar -> Action -uniqueRefine gr state = case refinementsState gr state of - [(e,(_,True))] -> Bad "only circular refinement" - [(e,_)] -> refineWithAtom False gr e state - _ -> Bad "no unique refinement" - -metaSubstRefine :: CGrammar -> MetaSubst -> Action -metaSubstRefine gr msubst state = do - m <- errIn "move pointer to meta" $ actMeta state - case lookup m msubst of - Just v -> do - e <- val2expSafe v - refineWithExpTC False gr e state - _ -> Bad "no metavariable substitution available" - -refineWithExpTC :: Bool -> CGrammar -> Exp -> Action -refineWithExpTC der gr exp0 state = do - let oldmetas = allMetas state - exp = refreshMetas oldmetas exp0 - tree0 <- annotateInState gr exp state - let tree = addBinds (actBinds state) $ tree0 - refineWithTree der gr tree state - -refineWithAtom :: Bool -> CGrammar -> Ref -> Action -- function or variable -refineWithAtom der gr at state = do - val <- lookupRef gr (allBinds state) at - typ <- val2exp val - let oldvars = allVars state - exp <- ref2exp oldvars typ at - refineWithExpTC der gr exp state - --- | in this command, we know that the result is well-typed, since computation --- rules have been type checked and the result is equal -computeSubTree :: CGrammar -> Action -computeSubTree gr state = do - let exp = tree2exp (actTree state) - tree <- treeByExp (compute gr) gr exp state - replaceSubTree tree state - --- | but here we don't, since the transfer flag isn't type checked, --- and computing the transfer function is not checked to preserve equality -transferSubTree :: Maybe Fun -> CGrammar -> Action -transferSubTree Nothing _ s = return s -transferSubTree (Just fun) gr state = do - let exp = mkApp (qq fun) [tree2exp $ actTree state] - tree <- treeByExp (compute gr) gr exp state - state' <- replaceSubTree tree state - reCheckState gr state' - -deleteSubTree :: CGrammar -> Action -deleteSubTree gr state = - if isRootState state - then do - let cat = actCat state - newCat gr cat state - else do - let metas = allMetas state - binds = actBinds state - exp = refreshMetas metas mExp0 - tree <- annotateInState gr exp state - state' <- replaceSubTree (addBinds binds tree) state - reCheckState gr state' --- must be unfortunately done. 20/11/2001 - -wrapWithFun :: CGrammar -> (Fun,Int) -> Action -wrapWithFun gr (f@(m,c),i) state = do - typ <- lookupFunType gr m c - let olds = allPrevVars state - oldmetas = allMetas state - exp0 <- fun2wrap olds ((f,i),typ) (tree2exp (actTreeBody state)) - let exp = refreshMetas oldmetas exp0 - tree0 <- annotateInState gr exp state - let tree = addBinds (actBinds state) $ tree0 - state' <- replaceSubTree tree state - reCheckState gr state' --- must be unfortunately done. 20/11/2001 - -alphaConvert :: CGrammar -> (Var,Var) -> Action -alphaConvert gr (x,x') state = do - let oldvars = allPrevVars state - testErr (notElem x' oldvars) ("clash with previous bindings" +++ show x') - let binds0 = actBinds state - vars0 = map fst binds0 - testErr (notElem x' vars0) ("clash with other bindings" +++ show x') - let binds = [(if z==x then x' else z, t) | (z,t) <- binds0] - vars = map fst binds - exp' <- alphaConv (vars ++ oldvars) (x,x') (tree2exp (actTreeBody state)) - let exp = mkAbs vars exp' - tree <- annotateExpInState gr exp state - replaceSubTree tree state - -changeFunHead :: CGrammar -> Fun -> Action -changeFunHead gr f state = do - let state' = changeNode (changeAtom (const (atomC f))) state - reCheckState gr state' --- must be done because of constraints elsewhere - -peelFunHead :: CGrammar -> (Fun,Int) -> Action -peelFunHead gr (f@(m,c),i) state = do - tree0 <- nthSubtree i $ actTree state - let tree = addBinds (actBinds state) $ tree0 - state' <- replaceSubTree tree state - reCheckState gr state' --- must be unfortunately done. 20/11/2001 - --- | an expensive operation -reCheckState :: CGrammar -> State -> Err State -reCheckState gr st = annotate gr (tree2exp (loc2tree st)) >>= return . tree2loc - --- | a variant that returns Bad instead of a tree with unsolvable constraints -reCheckStateReject :: CGrammar -> State -> Err State -reCheckStateReject gr st = do - st' <- reCheckState gr st - rejectUnsolvable st' - -rejectUnsolvable :: State -> Err State -rejectUnsolvable st = case (constrsNode $ nodeTree $ actTree st) of - [] -> return st - cs -> Bad $ "Unsolvable constraints:" +++ prConstraints cs - --- | extract metasubstitutions from constraints and solve them -solveAll :: CGrammar -> State -> Err State -solveAll gr st = solve st >>= solve where - solve st0 = do ---- why need twice? - st <- reCheckState gr st0 - let cs0 = allConstrs st - (cs,ms) = splitConstraints gr cs0 - metaSubstRefinements gr ms $ - mapLoc (reduceConstraintsNode gr . performMetaSubstNode ms) st - --- * active refinements - -refinementsState :: CGrammar -> State -> [(Term,(Val,Bool))] -refinementsState gr state = - let filt = possibleRefVal gr state in - if actIsMeta state - then refsForType filt gr (allBinds state) (actVal state) - else [] - -wrappingsState :: CGrammar -> State -> [((Fun,Int),Type)] -wrappingsState gr state - | actIsMeta state = [] - | isRootState state = funs - | otherwise = [rule | rule@(_,typ) <- funs, possibleRefVal gr state aval typ] - where - funs = funsOnType (possibleRefVal gr state) gr aval - aval = actVal state - -peelingsState :: CGrammar -> State -> [(Fun,Int)] -peelingsState gr state - | actIsMeta state = [] - | isRootState state = - err (const []) (\f -> [(f,i) | i <- [0 .. arityTree tree - 1]]) $ actFun state - | otherwise = - err (const []) - (\f -> [fi | (fi@(g,_),typ) <- funs, - possibleRefVal gr state aval typ,g==f]) $ actFun state - where - funs = funsOnType (possibleRefVal gr state) gr aval - aval = actVal state - tree = actTree state - -headChangesState :: CGrammar -> State -> [Fun] -headChangesState gr state = errVal [] $ do - f@(m,c) <- funAtom (actAtom state) - typ0 <- lookupFunType gr m c - return [fun | (fun,typ) <- funRulesOf gr, fun /= f, typ == typ0] - --- alpha-conv ! - -possibleRefVal :: CGrammar -> State -> Val -> Type -> Bool -possibleRefVal gr state val typ = errVal True $ do --- was False - vtyp <- valType typ - let gen = actGen state - cs <- return [(val, vClos vtyp)] --- eqVal gen val (vClos vtyp) --- only poss cs - return $ possibleConstraints gr cs --- a simple heuristic - -possibleTreeVal :: CGrammar -> State -> Tree -> Bool -possibleTreeVal gr state tree = errVal True $ do --- was False - let aval = actVal state - let gval = valTree tree - let gen = actGen state - cs <- return [(aval, gval)] --- eqVal gen val (vClos vtyp) --- only poss cs - return $ possibleConstraints gr cs --- a simple heuristic - diff --git a/src/GF/UseGrammar/Generate.hs b/src/GF/UseGrammar/Generate.hs deleted file mode 100644 index 5f07e0b85..000000000 --- a/src/GF/UseGrammar/Generate.hs +++ /dev/null @@ -1,116 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Generate --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/10/12 12:38:30 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.16 $ --- --- Generate all trees of given category and depth. AR 30\/4\/2004 --- --- (c) Aarne Ranta 2004 under GNU GPL --- --- Purpose: to generate corpora. We use simple types and don't --- guarantee the correctness of bindings\/dependences. ------------------------------------------------------------------------------ - -module GF.UseGrammar.Generate (generateTrees,generateAll) where - -import GF.Canon.GFC -import GF.Grammar.LookAbs -import GF.Grammar.PrGrammar -import GF.Grammar.Macros -import GF.Grammar.Values -import GF.Grammar.Grammar (Cat) -import GF.Grammar.SGrammar -import GF.Data.Operations -import GF.Data.Zipper -import GF.Infra.Option -import Data.List - --- Generate all trees of given category and depth. AR 30/4/2004 --- (c) Aarne Ranta 2004 under GNU GPL --- --- Purpose: to generate corpora. We use simple types and don't --- guarantee the correctness of bindings/dependences. - - --- | the main function takes an abstract syntax and returns a list of trees -generateTrees :: - Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp] -generateTrees opts gr cat n mn mt = map str2tr $ generate gr' opts cat' n mn mt' - where - gr' = gr2sgr opts emptyProbs gr - cat' = prt $ snd cat - mt' = maybe Nothing (return . tr2str) mt ---- ifm = oElem withMetas opts - ifm = oElem showOld opts - -generateAll :: Options -> (Exp -> IO ()) -> GFCGrammar -> Cat -> IO () -generateAll opts io gr cat = mapM_ (io . str2tr) $ num $ gen cat' - where - num = optIntOrAll opts flagNumber - gr' = gr2sgr opts emptyProbs gr - cat' = prt $ snd cat - gen c = generate gr' opts c 10 Nothing Nothing - - - ------------------------------------------- --- do the main thing with a simpler data structure --- the first Int gives tree depth, the second constrains subtrees --- chosen for each branch. A small number, such as 2, is a good choice --- if the depth is large (more than 3) --- If a tree is given as argument, generation concerns its metavariables. - -generate :: SGrammar -> Options -> SCat -> Int -> Maybe Int -> Maybe STree -> [STree] -generate gr opts cat i mn mt = case mt of - Nothing -> gen opts cat - Just t -> genM t - where ---- now use ifm to choose between two algorithms - gen opts cat - | oElem (iOpt "mem") opts = concat $ errVal [] $ lookupTree id cat $ allTrees -- -old - | oElem (iOpt "nonub") opts = concatMap (\i -> gener i cat) [0..i-1] -- some duplicates - | otherwise = nub $ concatMap (\i -> gener i cat) [0..i-1] -- new - - gener 0 c = [SApp (f, []) | (f,([],_)) <- funs c] - gener i c = [ - tr | - (f,(cs,_)) <- funs c, - let alts = map (gener (i-1)) cs, - ts <- combinations alts, - let tr = SApp (f, ts) --- depth tr >= i -- NO! - ] - - allTrees = genAll i - - -- dynamic generation - genAll :: Int -> BinTree SCat [[STree]] - genAll i = iter i genNext (mapTree (\ (c,_) -> (c,[[]])) gr) - - iter 0 f tr = tr - iter n f tr = iter (n-1) f (f tr) - - genNext tr = mapTree (genNew tr) tr - - genNew tr (cat,ts) = let size = length ts in - (cat, [SApp (f, xs) | - (f,(cs,_)) <- funs cat, - xs <- combinations (map look cs), - let fxs = SApp (f, xs), - depth fxs == size] - : ts) - where - look c = concat $ errVal [] $ lookupTree id c tr - - funs cat = maybe id take mn $ errVal [] $ lookupTree id cat gr - - genM t = case t of - SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)] - SMeta k -> gen opts k - _ -> [t] diff --git a/src/GF/UseGrammar/GetTree.hs b/src/GF/UseGrammar/GetTree.hs deleted file mode 100644 index e980a3d95..000000000 --- a/src/GF/UseGrammar/GetTree.hs +++ /dev/null @@ -1,74 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : GetTree --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/09/15 16:22:02 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.9 $ --- --- how to form linearizable trees from strings and from terms of different levels --- --- 'String' --> raw 'Term' --> annot, qualif 'Term' --> 'Tree' ------------------------------------------------------------------------------ - -module GF.UseGrammar.GetTree where - -import GF.Canon.GFC -import GF.Grammar.Values -import qualified GF.Grammar.Grammar as G -import GF.Infra.Ident -import GF.Grammar.MMacros -import GF.Grammar.Macros -import GF.Compile.Rename -import GF.Grammar.TypeCheck -import GF.Grammar.AbsCompute (beta) -import GF.Compile.PGrammar -import GF.Compile.ShellState - -import GF.Data.Operations - -import Data.Char - --- how to form linearizable trees from strings and from terms of different levels --- --- String --> raw Term --> annot, qualif Term --> Tree - -string2tree :: StateGrammar -> String -> Tree -string2tree gr = errVal uTree . string2treeErr gr - -string2treeErr :: StateGrammar -> String -> Err Tree -string2treeErr _ "" = Bad "empty string" -string2treeErr gr s = do - t <- pTerm s - let t0 = beta [] t - let t1 = refreshMetas [] t0 - let t2 = qualifTerm abstr t1 - annotate grc t2 - where - abstr = absId gr - grc = grammar gr - -string2Cat, string2Fun :: StateGrammar -> String -> (Ident,Ident) -string2Cat gr c = (absId gr,identC c) -string2Fun = string2Cat - -strings2Cat, strings2Fun :: String -> (Ident,Ident) -strings2Cat s = (identC m, identC (drop 1 c)) where (m,c) = span (/= '.') s -strings2Fun = strings2Cat - -string2ref :: StateGrammar -> String -> Err G.Term -string2ref gr s = case s of - 'x':'_':ds -> return $ freshAsTerm ds --- hack for generated vars - '"':_:_ -> return $ G.K $ init $ tail s - _:_ | all isDigit s -> return $ G.EInt $ read s - _ | elem '.' s -> return $ uncurry G.Q $ strings2Fun s - _ -> return $ G.Vr $ identC s - -string2cat :: StateGrammar -> String -> Err G.Cat -string2cat gr s = - if elem '.' s - then return $ strings2Fun s - else return $ curry id (absId gr) (identC s) diff --git a/src/GF/UseGrammar/Information.hs b/src/GF/UseGrammar/Information.hs deleted file mode 100644 index 4526980d6..000000000 --- a/src/GF/UseGrammar/Information.hs +++ /dev/null @@ -1,162 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Information --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/10/05 20:02:20 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.7 $ --- --- information on module, category, function, operation, parameter,... --- AR 16\/9\/2003. --- uses source grammar ------------------------------------------------------------------------------ - -module GF.UseGrammar.Information ( - showInformation, - missingLinCanonGrammar - ) where - -import GF.Grammar.Grammar -import GF.Infra.Ident -import GF.Infra.Modules -import GF.Infra.Option -import GF.CF.CF -import GF.CF.PPrCF -import GF.Compile.ShellState -import GF.Grammar.PrGrammar -import GF.Grammar.Lookup -import GF.Grammar.Macros (zIdent) -import qualified GF.Canon.GFC as GFC -import qualified GF.Canon.AbsGFC as AbsGFC - -import GF.Data.Operations -import GF.Infra.UseIO - --- information on module, category, function, operation, parameter,... AR 16/9/2003 --- uses source grammar - --- | the top level function -showInformation :: Options -> ShellState -> Ident -> IOE () -showInformation opts st c = do - is <- ioeErr $ getInformation opts st c - if null is - then putStrLnE "Identifier not in scope" - else mapM_ (putStrLnE . prInformationM c) is - where - prInformationM c (i,m) = prInformation opts c i ++ "file:" +++ m ++ "\n" - --- | the data type of different kinds of information -data Information = - IModAbs SourceAbs - | IModRes SourceRes - | IModCnc SourceCnc - | IModule SourceAbs -- ^ to be deprecated - | ICatAbs Ident Context [Ident] - | ICatCnc Ident Type [CFRule] Term - | IFunAbs Ident Type (Maybe Term) - | IFunCnc Ident Type [CFRule] Term - | IOper Ident Type Term - | IParam Ident [Param] [Term] - | IValue Ident Type - -type CatId = AbsGFC.CIdent -type FunId = AbsGFC.CIdent - -prInformation :: Options -> Ident -> Information -> String -prInformation opts c i = unlines $ prt c : case i of - IModule m -> [ - "module of type" +++ show (mtype m), - "extends" +++ show (extends m), - "opens" +++ show (opens m), - "defines" +++ unwords (map prt (ownConstants (jments m))) - ] - ICatAbs m co _ -> [ - "category in abstract module" +++ prt m, - if null co then "not a dependent type" - else "dependent type with context" +++ prContext co - ] - ICatCnc m ty cfs tr -> [ - "category in concrete module" +++ prt m, - "linearization type" +++ prt ty - ] - IFunAbs m ty _ -> [ - "function in abstract module" +++ prt m, - "type" +++ prt ty - ] - IFunCnc m ty cfs tr -> [ - "function in concrete module" +++ prt m, - "linearization" +++ prt tr - --- "linearization type" +++ prt ty - ] - IOper m ty tr -> [ - "operation in resource module" +++ prt m, - "type" +++ prt ty, - "definition" +++ prt tr - ] - IParam m ty ts -> [ - "parameter type in resource module" +++ prt m, - "constructors" +++ unwords (map prParam ty), - "values" +++ unwords (map prt ts) - ] - IValue m ty -> [ - "parameter constructor in resource module" +++ prt m, - "type" +++ show ty - ] - --- | also finds out if an identifier is defined in many places -getInformation :: Options -> ShellState -> Ident -> Err [(Information,FilePath)] -getInformation opts st c = allChecks $ [ - do - m <- lookupModule src c - case m of - ModMod mo -> returnm c $ IModule mo - _ -> prtBad "not a source module" c - ] ++ map lookInSrc ss ++ map lookInCan cs - where - lookInSrc (i,m) = do - j <- lookupInfo m c - case j of - AbsCat (Yes co) _ -> returnm i $ ICatAbs i co [] --- - AbsFun (Yes ty) _ -> returnm i $ IFunAbs i ty Nothing --- - CncCat (Yes ty) _ _ -> do - ---- let cat = ident2CFCat i c - ---- rs <- concat [rs | (c,rs) <- cf, ] - returnm i $ ICatCnc i ty [] ty --- - CncFun _ (Yes tr) _ -> do - rs <- return [] - returnm i $ IFunCnc i tr rs tr --- - ResOper (Yes ty) (Yes tr) -> returnm i $ IOper i ty tr - ResParam (Yes (ps,_)) -> do - ts <- allParamValues src (QC i c) - returnm i $ IParam i ps ts - ResValue (Yes (ty,_)) -> returnm i $ IValue i ty --- - - _ -> prtBad "nothing available for" i - lookInCan (i,m) = do - Bad "nothing available yet in canonical" - - returnm m i = return (i, pathOfModule st m) - - src = srcModules st - can = canModules st - ss = [(i,m) | (i,ModMod m) <- modules src] - cs = [(i,m) | (i,ModMod m) <- modules can] - cf = concatMap ruleGroupsOfCF $ map snd $ cfs st - -ownConstants :: BinTree Ident Info -> [Ident] -ownConstants = map fst . filter isOwn . tree2list where - isOwn (c,i) = case i of - AnyInd _ _ -> False - _ -> True - -missingLinCanonGrammar :: GFC.CanonGrammar -> String -missingLinCanonGrammar cgr = - unlines $ concat [prt_ c : missing js | (c,js) <- concretes] where - missing js = map ((" " ++) . prt_) $ filter (not . flip isInBinTree js) abstract - abstract = err (const []) (map fst . tree2list . jments) $ lookupModMod cgr absId - absId = maybe (zIdent "") id $ greatestAbstract cgr - concretes = [(cnc,jments mo) | - cnc <- allConcretes cgr absId, Ok mo <- [lookupModMod cgr cnc]] diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs deleted file mode 100644 index c9b94ccb0..000000000 --- a/src/GF/UseGrammar/Linear.hs +++ /dev/null @@ -1,292 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Linear --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/14 16:03:41 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.19 $ --- --- Linearization for canonical GF. AR 7\/6\/2003 ------------------------------------------------------------------------------ - -module GF.UseGrammar.Linear where - -import GF.Canon.GFC -import GF.Canon.AbsGFC -import qualified GF.Grammar.Abstract as A -import GF.Canon.MkGFC (rtQIdent) ---- -import GF.Infra.Ident -import GF.Grammar.PrGrammar -import GF.Canon.CMacros -import GF.Canon.Look -import GF.Grammar.LookAbs -import GF.Grammar.MMacros -import GF.Grammar.TypeCheck (annotate) ---- -import GF.Data.Str -import GF.Text.Text -----import TypeCheck -- to annotate - -import GF.Data.Operations -import GF.Data.Zipper -import qualified GF.Infra.Modules as M - -import Control.Monad -import Data.List (intersperse) - --- Linearization for canonical GF. AR 7/6/2003 - --- | The worker function: linearize a Tree, return --- a record. Possibly mark subtrees. --- --- NB. Constants in trees are annotated by the name of the abstract module. --- A concrete module name must be given to find (and choose) linearization rules. --- --- - If no marking is wanted, 'noMark' :: 'Marker'. --- --- - For xml marking, use 'markXML' :: 'Marker' -linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term -linearizeToRecord gr mk m = lin [] where - - lin ts t@(Tr (n,xs)) = errIn ("linearization of" +++ prt t) $ do - - let binds = A.bindsNode n - at = A.atomNode n - fmk = markSubtree mk n ts (A.isFocusNode n) - c <- A.val2cat $ A.valNode n - xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs - - r <- case at of - A.AtC f -> lookf c t f >>= comp xs' - A.AtI i -> return $ recInt i - A.AtL s -> return $ recS $ tK $ prt at - A.AtF i -> return $ recS $ tK $ prt at - A.AtV x -> lookCat c >>= comp [tK (prt_ at)] - A.AtM m -> lookCat c >>= comp [tK (prt_ at)] - - r' <- case r of -- to see stg in case the result is variants {} - FV [] -> lookCat c >>= comp [tK (prt_ t)] - _ -> return r - - return $ fmk $ mkBinds binds r' - - look = lookupLin gr . redirectIdent m . rtQIdent - comp = ccompute gr - mkBinds bs bdy = case bdy of - R fs -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs - FV rs -> FV $ map (mkBinds bs) rs - - recS t = R [Ass (L (identC "s")) t] ---- - - recInt i = R [ - Ass (L (identC "last")) (EInt (rem i 10)), - Ass (L (identC "s")) (tK $ show i), - Ass (L (identC "size")) (EInt (if i > 9 then 1 else 0)) - ] - - lookCat = return . errVal defLindef . look - ---- should always be given in the module - - -- to show missing linearization as term - lookf c t f = case look f of - Ok h -> return h - _ -> lookCat c >>= comp [tK (prt_ t)] - - --- | thus the special case: -linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term -linearizeNoMark gr = linearizeToRecord gr noMark - --- | expand tables in linearized term to full, normal-order tables --- --- NB expand from inside-out so that values are not looked up in copies of branches - -expandLinTables :: CanonGrammar -> Term -> Err Term -expandLinTables gr t = case t of - R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs] - T ty rs -> do - rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out - let t' = T ty $ map (uncurry Cas) rs' - vs <- alls ty - ps <- mapM term2patt vs - ts' <- mapM (comp . S t') $ vs - return $ T ty [Cas [p] t | (p,t) <- zip ps ts'] - V ty ts0 -> do - ts <- mapM exp ts0 -- expand from inside-out - vs <- alls ty - ps <- mapM term2patt vs - return $ T ty [Cas [p] t | (p,t) <- zip ps ts] - FV ts -> liftM FV $ mapM exp ts - _ -> composOp exp t - where - alls = allParamValues gr - exp = expandLinTables gr - comp = ccompute gr [] - --- Do this for an entire grammar: - -unoptimizeCanon :: CanonGrammar -> CanonGrammar -unoptimizeCanon g@(M.MGrammar ms) = M.MGrammar $ map (unoptimizeCanonMod g) ms - -unoptimizeCanonMod :: CanonGrammar -> CanonModule -> CanonModule -unoptimizeCanonMod g = convMod where - convMod (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os defs)) = - (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os (mapTree convDef defs))) - convMod mm = mm - convDef (c,CncCat ty df pr) = (c,CncCat ty (convT df) (convT pr)) - convDef (f,CncFun c xs li pr) = (f,CncFun c xs (convT li) (convT pr)) - convDef cd = cd - convT = err error id . exp - -- a version of expandLinTables that does not destroy share optimization - exp t = case t of - R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs] - T ty rs@[Cas [_] _] -> do - rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out - let t' = T ty $ map (uncurry Cas) rs' - vs <- alls ty - ps <- mapM term2patt vs - ts' <- mapM (comp . S t') $ vs - return $ T ty [Cas [p] t | (p,t) <- zip ps ts'] - V ty ts0 -> do - ts <- mapM exp ts0 -- expand from inside-out - vs <- alls ty - ps <- mapM term2patt vs - return $ T ty [Cas [p] t | (p,t) <- zip ps ts] - FV ts -> liftM FV $ mapM exp ts - I _ -> comp t - _ -> composOp exp t - where - alls = allParamValues g - comp = ccompute g [] - - --- | from records, one can get to records of tables of strings -rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]] -rec2strTables r = do - vs <- allLinValues r - mapM (mapPairsM (mapPairsM strsFromTerm)) vs - --- | from these tables, one may want to extract the ones for the "s" label -strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]] -strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0] - -linLab0 :: Label -linLab0 = L (identC "s") - --- | to get lists of token lists is easy -sTables2strs :: [[([Patt],[Str])]] -> [[Str]] -sTables2strs = map snd . concat - --- | from this, to get a list of strings -strs2strings :: [[Str]] -> [String] -strs2strings = map unlex - --- | this is just unwords; use an unlexer from Text to postprocess -unlex :: [Str] -> String -unlex = concat . map sstr . take 1 ---- - --- | finally, a top-level function to get a string from an expression -linTree2string :: Marker -> CanonGrammar -> Ident -> A.Tree -> String -linTree2string mk gr m e = head $ linTree2strings mk gr m e -- never empty - --- | you can also get many strings -linTree2strings :: Marker -> CanonGrammar -> Ident -> A.Tree -> [String] -linTree2strings mk gr m e = err return id $ do - t <- linearizeToRecord gr mk m e - r <- expandLinTables gr t - ts <- rec2strTables r - let ss = strs2strings $ sTables2strs $ strTables2sTables ts - ifNull (prtBad "empty linearization of" e) return ss -- thus never empty - --- | argument is a Tree, value is a list of strs; needed in Parsing -allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str] -allLinsOfTree gr a e = err (singleton . str) id $ do - e' <- return e ---- annotateExp gr e - r <- linearizeNoMark gr a e' - r' <- expandLinTables gr r - ts <- rec2strTables r' - return $ concat $ sTables2strs $ strTables2sTables ts - --- | the value is a list of structures arranged as records of tables of terms -allLinsAsRec :: CanonGrammar -> Ident -> A.Tree -> Err [[(Label,[([Patt],Term)])]] -allLinsAsRec gr c t = linearizeNoMark gr c t >>= expandLinTables gr >>= allLinValues - --- | the value is a list of structures arranged as records of tables of strings --- only taking into account string fields --- True: sep. by /, False: sep by \n -allLinTables :: - Bool -> CanonGrammar ->Ident ->A.Tree ->Err [[(Label,[([Patt],[String])])]] -allLinTables slash gr c t = do - r' <- allLinsAsRec gr c t - mapM (mapM getS) r' - where - getS (lab,pss) = liftM (curry id lab) $ mapM gets pss - gets (ps,t) = liftM (curry id ps . cc . map str2strings) $ strsFromTerm t - cc = concat . intersperse [if slash then "/" else "\n"] - --- | the value is a list of strings gathered from all fields - -allLinBranchFields :: CanonGrammar -> Ident -> A.Tree -> Err [String] -allLinBranchFields gr c trm = do - r <- linearizeNoMark gr c trm >>= expandLinTables gr - return [s | (_,t) <- allLinBranches r, s <- gets t] - where - gets t = concat [cc (map str2strings s) | Ok s <- [strsFromTerm t]] - cc = concat . intersperse ["/"] - -prLinTable :: Bool -> [[(Label,[([Patt],[String])])]] -> [String] -prLinTable pars = concatMap prOne . concat where - prOne (lab,pss) = (if pars then ((prt lab) :) else id) (map pr pss) ---- - pr (ps,ss) = (if pars then ((unwords (map prt_ ps) +++ ":") +++) - else id) (unwords ss) - -{- --- the value is a list of strs -allLinStrings :: CanonGrammar -> Tree -> [Str] -allLinStrings gr ft = case allLinsAsStrs gr ft of - Ok ts -> map snd $ concat $ map snd $ concat ts - Bad s -> [str s] - --- the value is a list of strs, not forgetting their arguments -allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]] -allLinsAsStrs gr ft = do - lpts <- allLinearizations gr ft - return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts - - --- to a list of strings -linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String] -linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk - --- to a list of token lists -linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]] -linearizeToStrss gr mk e = do - R rs <- linearizeToRecord gr mk e ---- - t <- lookupErr linLab0 [(r,s) | Ass r s <- rs] - return $ map strsFromTerm $ allInTable t --} - --- | the value is a list of strings, not forgetting their arguments -allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]] -allLinsOfFun gr f = do - t <- lookupLin gr f - allAllLinValues t --- all fields, not only s. 11/12/2005 - - --- | returns printname if one exists; otherwise linearizes with metas -printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String -printOrLinearize gr c f@(m, d) = errVal (prt fq) $ - case lookupPrintname gr (CIQ c d) of - Ok t -> do - ss <- strsFromTerm t - let s = strs2strings [ss] - return $ ifNull (prt fq) head s - _ -> do - ty <- lookupFunType gr m d - f' <- ref2exp [] ty (A.QC m d) - tr <- annotate gr f' - return $ linTree2string noMark gr c tr - where - fq = CIQ m d diff --git a/src/GF/UseGrammar/MatchTerm.hs b/src/GF/UseGrammar/MatchTerm.hs deleted file mode 100644 index 9acffd44c..000000000 --- a/src/GF/UseGrammar/MatchTerm.hs +++ /dev/null @@ -1,50 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : MatchTerm --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- --- functions for matching with terms. AR 16/3/2006 ------------------------------------------------------------------------------ - -module GF.UseGrammar.MatchTerm where - -import GF.Data.Operations -import GF.Data.Zipper - -import GF.Grammar.Grammar -import GF.Grammar.PrGrammar -import GF.Infra.Ident -import GF.Grammar.Values -import GF.Grammar.Macros -import GF.Grammar.MMacros - -import Control.Monad -import Data.List - --- test if a term has duplicated idents, either any or just atoms - -hasDupIdent, hasDupAtom :: Exp -> Bool -hasDupIdent = (>1) . maximum . map length . group . sort . allConstants True -hasDupAtom = (>1) . maximum . map length . group . sort . allConstants False - --- test if a certain ident occurs in term - -grepIdent :: Ident -> Exp -> Bool -grepIdent c = elem c . allConstants True - --- form the list of all constants, optionally ignoring all but atoms - -allConstants :: Bool -> Exp -> [Ident] -allConstants alsoApp = err (const []) snd . flip appSTM [] . collect where - collect e = case e of - Q _ c -> add c e - QC _ c -> add c e - Cn c -> add c e - App f a | not alsoApp -> case f of - App g b -> collect b >> collect a - _ -> collect a - _ -> composOp collect e - add c e = updateSTM (c:) >> return e diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs deleted file mode 100644 index 3aeb08dc7..000000000 --- a/src/GF/UseGrammar/Morphology.hs +++ /dev/null @@ -1,140 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Morphology --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:49 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.8 $ --- --- Morphological analyser constructed from a GF grammar. --- --- we first found the binary search tree sorted by word forms more efficient --- than a trie, at least for grammars with 7000 word forms --- (18\/11\/2003) but this may change since we have to use a trie --- for decompositions and also want to use it in the parser ------------------------------------------------------------------------------ - -module GF.UseGrammar.Morphology where - -import GF.Canon.AbsGFC -import GF.Canon.GFC -import GF.Grammar.PrGrammar -import GF.Canon.CMacros -import GF.Canon.Look -import GF.Grammar.LookAbs -import GF.Infra.Ident -import qualified GF.Grammar.Macros as M -import GF.UseGrammar.Linear - -import GF.Data.Operations -import GF.Data.Glue - -import Data.Char -import Data.List (sortBy, intersperse) -import Control.Monad (liftM) -import GF.Data.Trie2 - --- construct a morphological analyser from a GF grammar. AR 11/4/2001 - --- we first found the binary search tree sorted by word forms more efficient --- than a trie, at least for grammars with 7000 word forms --- (18\/11\/2003) but this may change since we have to use a trie --- for decompositions and also want to use it in the parser - -type Morpho = Trie Char String - -emptyMorpho :: Morpho -emptyMorpho = emptyTrie - -appMorpho :: Morpho -> String -> (String,[String]) -appMorpho = appMorphoOnly ----- add lookup for literals - --- without literals -appMorphoOnly :: Morpho -> String -> (String,[String]) -appMorphoOnly m s = trieLookup m s - --- recognize word, exluding literals -isKnownWord :: Morpho -> String -> Bool -isKnownWord mo = not . null . snd . appMorphoOnly mo - -mkMorpho :: CanonGrammar -> Ident -> Morpho -mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where - - comp = ccompute gr [] -- to undo 'values' optimization - - mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun - mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun - - -- gather forms of lexical items - allLins fun@(m,f) = errVal [] $ do - ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues - ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts - return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] - prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)]) - - -- gather syncategorematic words - allSyns fun@(m,f) = errVal [] $ do - tss <- allLinsOfFun gr (CIQ a f) - let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] - return $ concat $ map wordsInTerm ss - prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f]) - - -- all words, Left from lexical rules and Right syncategorematic - allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where - allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]] - lexRole t = case M.typeForm t of - Ok ([],_,_) -> Left - _ -> Right - --- printing full-form lexicon and results - -prMorpho :: Morpho -> String -prMorpho = unlines . map prMorphoAnalysis . collapse - -prMorphoAnalysis :: (String,[String]) -> String -prMorphoAnalysis (w,fs0) = - let fs = filter (not . null) fs0 in - if null fs then w ++++ "*" else unlines (w:fs) - -prMorphoAnalysisShort :: (String,[String]) -> String -prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where - w' = if null fs then w +++ "*" else "" - -tagPrt :: Print a => (a,a) -> String -tagPrt (m,c) = "+" ++ prt c --- module name - --- | print all words recognized -allMorphoWords :: Morpho -> [String] -allMorphoWords = map fst . collapse - --- analyse running text and show results either in short form or on separate lines - --- | analyse running text and show just the word, with "*" if not found -morphoTextStatus :: Morpho -> String -> String -morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where - prMark (w,fs) = if null fs then "*" +++ w else w - --- | analyse running text and show results in short form, one word per line -morphoTextShort :: Morpho -> String -> String -morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words - --- | analyse running text and show results on separate lines -morphoText :: Morpho -> String -> String -morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words - --- format used in the Italian Verb Engine -prFullForm :: Morpho -> String -prFullForm = unlines . map prOne . collapse where - prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) - --- using Huet's unglueing method to find word boundaries ----- it would be much better to use a trie also for morphological analysis, ----- so this is for the sake of experiment ----- Moreover, we should specify the cases in which this happens - not all words - -decomposeWords :: Morpho -> String -> [String] -decomposeWords mo s = errVal (words s) $ decomposeSimple mo s diff --git a/src/GF/UseGrammar/Paraphrases.hs b/src/GF/UseGrammar/Paraphrases.hs deleted file mode 100644 index d04f22aa6..000000000 --- a/src/GF/UseGrammar/Paraphrases.hs +++ /dev/null @@ -1,70 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Paraphrases --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:49 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.6 $ --- --- paraphrases of GF terms. AR 6\/10\/1998 -- 24\/9\/1999 -- 5\/7\/2000 -- 5\/6\/2002 --- --- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL) --- --- thus inherited from the old GF. Incomplete and inefficient... ------------------------------------------------------------------------------ - -module GF.UseGrammar.Paraphrases (mkParaphrases) where - -import GF.Grammar.Abstract -import GF.Grammar.PrGrammar -import GF.Grammar.LookAbs -import GF.Grammar.AbsCompute - -import GF.Data.Operations - -import Data.List (nub) - --- paraphrases of GF terms. AR 6/10/1998 -- 24/9/1999 -- 5/7/2000 -- 5/6/2002 --- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL) --- thus inherited from the old GF. Incomplete and inefficient... - -mkParaphrases :: GFCGrammar -> Term -> [Term] -mkParaphrases st = nub . map (beta []) . paraphrases (allDefs st) - -type Definition = (Fun,Term) - -paraphrases :: [Definition] -> Term -> [Term] -paraphrases th t = - paraImmed th t ++ ---- paraMatch th t ++ - case t of - App c a -> [App d b | d <- paraphrases th c, b <- paraphrases th a] - Abs x b -> [Abs x d | d <- paraphrases th b] - c -> [] - ++ [t] - -paraImmed :: [Definition] -> Term -> [Term] -paraImmed defs t = - [Q m f | ((m,f), u) <- defs, t == u] ++ --- eqTerm - case t of - ---- Cn c -> [u | (f, u) <- defs, eqStrIdent f c] - _ -> [] - -{- --- -paraMatch :: [Definition] -> Trm -> [Trm] -paraMatch th@defs t = - [mkApp (Cn f) xx | (PC f zz, u) <- defs, - let (fs,sn) = fullApp u, fs == h, length sn == length zz] ++ - case findAMatch defs t of - Ok (g,b) -> [substTerm [] g b] - _ -> [] - where - (h,xx) = fullApp t - fullApp c = case c of - App f a -> (f', a' ++ [a]) where (f',a') = fullApp f - c -> (c,[]) - --} diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs deleted file mode 100644 index 2ca057410..000000000 --- a/src/GF/UseGrammar/Parsing.hs +++ /dev/null @@ -1,177 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Parsing --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/06/02 10:23:52 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.25 $ --- --- (Description of the module) ------------------------------------------------------------------------------ - -module GF.UseGrammar.Parsing where - -import GF.Infra.CheckM -import qualified GF.Canon.AbsGFC as C -import GF.Canon.GFC -import GF.Canon.MkGFC (trExp) ---- -import GF.Canon.CMacros -import GF.Grammar.MMacros (refreshMetas) -import GF.UseGrammar.Linear -import GF.Data.Str -import GF.CF.CF -import GF.CF.CFIdent -import GF.Infra.Ident -import GF.Grammar.TypeCheck -import GF.Grammar.Values ---import CFMethod -import GF.UseGrammar.Tokenize -import GF.UseGrammar.Morphology (isKnownWord) -import GF.CF.Profile -import GF.Infra.Option -import GF.UseGrammar.Custom -import GF.Compile.ShellState - -import GF.CF.PPrCF (prCFTree) --- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE -import qualified GF.Parsing.GFC as New - -import GF.Data.Operations - -import Data.List (nub,sortBy) -import Data.Char (toLower) -import Control.Monad (liftM) - --- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002 - -parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree] -parseString os sg cat = liftM fst . parseStringMsg os sg cat - -parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String) -parseStringMsg os sg cat s = do - case checkStart $ parseStringC os sg cat s of - Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss) - Bad s -> return ([],s) - -parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] -parseStringC opts0 sg cat s - | oElem (iOpt "old") opts0 || - (not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do - let opts = unionOptions opts0 $ stateOptions sg - cf = stateCF sg - gr = stateGrammarST sg - cn = cncId sg - toks = customOrDefault opts useTokenizer customTokenizer sg s - parser = customOrDefault opts useParser customParser sg cat - if oElem (iOpt "cut") opts - then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks - else mapM (tokens2trms opts sg cn parser) toks >>= return . concat - ----- | or [oElem p opts0 | ----- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do - - | otherwise = do - let opts = unionOptions opts0 $ stateOptions sg - algorithm | oElem newCParser opts0 = "c" - | oElem newMParser opts0 = "m" - | oElem newFParser opts0 = "f" - | otherwise = "f" -- default algorithm: FCFG - strategy = maybe "bottomup" id $ getOptVal opts useParser - -- -parser=bottomup/topdown - tokenizer = customOrDefault opts useTokenizer customTokenizer sg - toks = case tokenizer s of - t:_ -> t - _ -> [] ---- no support for undet. tok. - unknowns = - [w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w] - where - unk w = not $ isKnownWord (morpho sg) w - uncap (c:cs) = toLower c : cs - uncap s = s - - case unknowns of - _:_ | oElem (iOpt "trynextlang") opts -> return [] - _:_ -> fail $ "Unknown words:" +++ unwords unknowns - _ -> do - - ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks - ts' <- checkErr $ - allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts - return $ optIntOrAll opts flagNumber ts' - - -tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] -tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info - where result = parser toks - info = snd result - trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2)) - -trees2trms :: - Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree] -trees2trms opts sg cn as ts0 info = do - let s = unwords $ map prCFTok as - ts <- case () of - _ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return [] - _ | raw -> do - ts1 <- return (map cf2trm0 ts0) ----- should not need annot - checks [ - mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails - ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return [] - ] - _ -> do - let num = optIntOrN opts flagRawtrees 999999 - let (ts01,rest) = splitAt num ts0 - if null rest then return () - else raise ("Warning: only" +++ show num +++ "raw parses out of" +++ - show (length ts0) +++ - "considered; use -rawtrees=<Int> to see more" - ) - (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01 - if null ts1 then raise ss else return () - ts2 <- checkErr $ - allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ---- - if forgive then return ts2 else do - let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2] - ps = [t | (t,ss) <- tsss, - any (compatToks as) (map str2cftoks ss)] - if null ps - then raise $ "Failure in morphology." ++ - if verb - then "\nPossible corrections: " +++++ - unlines (nub (map sstr (concatMap snd tsss))) - else "" - else return ps - if verb - then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info - else return () - - return $ optIntOrAll opts flagNumber $ nub ts - where - gr = stateGrammarST sg - - raw = oElem rawParse opts - verb = oElem beVerbose opts - forgive = oElem forgiveParse opts - ----- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a] - -unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of - [] -> "where all words are known" - us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals - where - terminals = map TS $ stateGrammarWords sg - noMatch t = all (not . compatTok t) terminals - - ---- too much type checking in building term info? return FullTerm to save work? - --- | raw parsing: so simple it is for a context-free CF grammar -cf2trm0 :: CFTree -> C.Exp -cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees) - where - cffun2trm (CFFun (fun,_)) = fun - mkApp = foldl C.EApp - mkAppAtom a = mkApp (C.EAtom a) diff --git a/src/GF/UseGrammar/Randomized.hs b/src/GF/UseGrammar/Randomized.hs deleted file mode 100644 index c1c77edb2..000000000 --- a/src/GF/UseGrammar/Randomized.hs +++ /dev/null @@ -1,66 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Randomized --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:51 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.8 $ --- --- random generation and refinement. AR 22\/8\/2001. --- implemented as sequence of refinement menu selecsions, encoded as integers ------------------------------------------------------------------------------ - -module GF.UseGrammar.Randomized where - -import GF.Grammar.Abstract -import GF.UseGrammar.Editing - -import GF.Data.Operations -import GF.Data.Zipper - ---- import Arch (myStdGen) --- circular for hbc -import System.Random --- (mkStdGen, StdGen, randoms) --- bad import for hbc - --- random generation and refinement. AR 22/8/2001 --- implemented as sequence of refinement menu selecsions, encoded as integers - -myStdGen :: Int -> StdGen -myStdGen = mkStdGen --- - --- | build one random tree; use mx to prevent infinite search -mkRandomTree :: StdGen -> Int -> CGrammar -> Either Cat Fun -> Err Tree -mkRandomTree gen mx gr cat = mkTreeFromInts (take mx (randoms gen)) gr cat - -refineRandom :: StdGen -> Int -> CGrammar -> Action -refineRandom gen mx = mkStateFromInts $ take mx $ map abs (randoms gen) - --- | build a tree from a list of integers -mkTreeFromInts :: [Int] -> CGrammar -> Either Cat Fun -> Err Tree -mkTreeFromInts ints gr catfun = do - st0 <- either (\cat -> newCat gr cat initState) - (\fun -> newFun gr fun initState) - catfun - state <- mkStateFromInts ints gr st0 - return $ loc2tree state - -mkStateFromInts :: [Int] -> CGrammar -> Action -mkStateFromInts ints gr z = mkRandomState ints z >>= reCheckState gr where - mkRandomState [] state = do - testErr (isCompleteState state) "not completed" - return state - mkRandomState (n:ns) state = do - let refs = refinementsState gr state - refs0 = map (not . snd . snd) refs - testErr (not (null refs0)) $ "no nonrecursive refinements available for" +++ - prt (actVal state) - (ref,_) <- (refs !? (n `mod` (length refs))) - state1 <- refineWithAtom False gr ref state - if isCompleteState state1 - then return state1 - else do - state2 <- goNextMeta state1 - mkRandomState ns state2 - diff --git a/src/GF/UseGrammar/Session.hs b/src/GF/UseGrammar/Session.hs deleted file mode 100644 index e54d0e3fb..000000000 --- a/src/GF/UseGrammar/Session.hs +++ /dev/null @@ -1,181 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Session --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/08/17 15:13:55 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.12 $ --- --- (Description of the module) ------------------------------------------------------------------------------ - -module GF.UseGrammar.Session where - -import GF.Grammar.Abstract -import GF.Infra.Option -import GF.UseGrammar.Custom -import GF.UseGrammar.Editing -import GF.Compile.ShellState ---- grammar - -import GF.Data.Operations -import GF.Data.Zipper (keepPosition) --- - --- First version 8/2001. Adapted to GFC with modules 19/6/2003. --- Nothing had to be changed, which is a sign of good modularity. - --- keep these abstract - --- | 'Exp'-list: candidate refinements,clipboard -type SState = [(State,([Exp],[Clip]),SInfo)] - --- | 'String' is message, 'Int' is the view -type SInfo = ([String],(Int,Options)) - -initSState :: SState -initSState = [(initState, ([],[]), (["Select 'New' category to start"],(0,noOptions)))] - -- instead of empty - -type Clip = Tree ---- (Exp,Type) - --- | (peb): Something wrong with this definition?? --- Shouldn't the result type be 'SInfo'? --- --- > okInfo :: Int -> SInfo == ([String], (Int, Options)) -okInfo :: n -> ([s], (n, Bool)) -okInfo n = ([],(n,True)) - -stateSState :: SState -> State -candsSState :: SState -> [Exp] -clipSState :: SState -> [Clip] -infoSState :: SState -> SInfo -msgSState :: SState -> [String] -viewSState :: SState -> Int -optsSState :: SState -> Options - -stateSState ((s,_,_):_) = s -candsSState ((_,(ts,_),_):_)= ts -clipSState ((_,(_,ts),_):_)= ts -infoSState ((_,_,i):_) = i -msgSState ((_,_,(m,_)):_) = m -viewSState ((_,_,(_,(v,_))):_) = v -optsSState ((_,_,(_,(_,o))):_) = o - -treeSState :: SState -> Tree -treeSState = actTree . stateSState - - --- | from state to state -type ECommand = SState -> SState - --- * elementary commands - --- ** change state, drop cands, drop message, preserve options - -changeState :: State -> ECommand -changeState s ss = changeMsg [] $ (s,([],clipSState ss),infoSState ss) : ss - -changeCands :: [Exp] -> ECommand -changeCands ts ss@((s,(_,cb),(_,b)):_) = (s,(ts,cb),(candInfo ts,b)) : ss - -addtoClip :: Clip -> ECommand -addtoClip t ss@((s,(ts,cb),(i,b)):_) = (s,(ts,t:cb),(i,b)) : ss - -removeClip :: Int -> ECommand -removeClip n ss@((s,(ts,cb),(i,b)):_) = (s,(ts, drop n cb),(i,b)) : ss - -changeMsg :: [String] -> ECommand -changeMsg m ((s,ts,(_,b)):ss) = (s,ts,(m,b)) : ss -- just change message -changeMsg m _ = (s,ts,(m,b)) : [] where [(s,ts,(_,b))] = initSState - -changeView :: ECommand -changeView ((s,ts,(m,(v,b))):ss) = (s,ts,(m,(v+1,b))) : ss -- toggle view - -withMsg :: [String] -> ECommand -> ECommand -withMsg m c = changeMsg m . c - -changeStOptions :: (Options -> Options) -> ECommand -changeStOptions f ((s,ts,(m,(v,o))):ss) = (s,ts,(m,(v, f o))) : ss - -noNeedForMsg :: ECommand -noNeedForMsg = changeMsg [] -- everything's all right: no message - -candInfo :: [Exp] -> [String] -candInfo ts = case length ts of - 0 -> ["no acceptable alternative"] - 1 -> ["just one acceptable alternative"] - n -> [show n +++ "alternatives to select"] - --- * keep SState abstract from this on - --- ** editing commands - -action2command :: Action -> ECommand -action2command act state = case act (stateSState state) of - Ok s -> changeState s state - Bad m -> changeMsg [m] state - -action2commandNext :: Action -> ECommand -- move to next meta after execution -action2commandNext act = action2command (\s -> act s >>= goNextMetaIfCan) - -action2commandKeep :: Action -> ECommand -- keep old position after execution -action2commandKeep act = action2command (\s -> keepPosition act s) - -undoCommand :: Int -> ECommand -undoCommand n ss = - let k = length ss in - if k < n - then changeMsg ["cannot go all the way back"] [last ss] - else changeMsg ["successful undo"] (drop n ss) - -selectCand :: CGrammar -> Int -> ECommand -selectCand gr i state = err (\m -> changeMsg [m] state) id $ do - exp <- candsSState state !? i - let s = stateSState state - tree <- annotateInState gr exp s - return $ case replaceSubTree tree s of - Ok st' -> changeState st' state - Bad s -> changeMsg [s] state - -refineByExps :: Bool -> CGrammar -> [Exp] -> ECommand -refineByExps der gr trees = case trees of - [t] -> action2commandNext (refineWithExpTC der gr t) - _ -> changeCands trees - -refineByTrees :: Bool -> CGrammar -> [Tree] -> ECommand -refineByTrees der gr trees = case trees of - [t] -> action2commandNext (refineOrReplaceWithTree der gr t) - _ -> changeCands $ map tree2exp trees - -replaceByTrees :: CGrammar -> [Exp] -> ECommand -replaceByTrees gr trees = case trees of - [t] -> action2commandNext (\s -> - annotateExpInState gr t s >>= flip replaceSubTree s) - _ -> changeCands trees - -replaceByEditCommand :: StateGrammar -> String -> ECommand -replaceByEditCommand gr co = - action2commandKeep $ - maybe return ($ gr) $ - lookupCustom customEditCommand (strCI co) - -replaceByTermCommand :: Bool -> StateGrammar -> String -> Tree -> ECommand ---- -replaceByTermCommand der gr co exp = - let g = grammar gr in - refineByTrees der g $ maybe [exp] (\f -> f gr exp) $ - lookupCustom customTermCommand (strCI co) - -possClipsSState :: StateGrammar -> SState -> [(Int,Clip)] -possClipsSState gr s = filter poss $ zip [0..] (clipSState s) - where - poss = possibleTreeVal cgr st . snd - st = stateSState s - cgr = grammar gr - -getNumberedClip :: Int -> SState -> Err Clip -getNumberedClip i s = if length cs > i then return (cs !! i) - else Bad "not enough clips" - where - cs = clipSState s diff --git a/src/GF/UseGrammar/Statistics.hs b/src/GF/UseGrammar/Statistics.hs deleted file mode 100644 index 46e4fcc3b..000000000 --- a/src/GF/UseGrammar/Statistics.hs +++ /dev/null @@ -1,44 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Statistics --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/09/04 11:45:38 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.1 $ --- --- statistics on canonical grammar: amounts of generated code --- AR 4\/9\/2005. --- uses canonical grammar ------------------------------------------------------------------------------ - -module GF.UseGrammar.Statistics (prStatistics) where - -import GF.Infra.Modules -import GF.Infra.Option -import GF.Grammar.PrGrammar -import GF.Canon.GFC -import GF.Canon.MkGFC - -import GF.Data.Operations - -import Data.List (sortBy) - --- | the top level function -prStatistics :: CanonGrammar -> String -prStatistics can = unlines $ [ - show (length mods) ++ "\t\t modules", - show chars ++ "\t\t gfc size", - "", - "Top 40 definitions" - ] ++ - [show d ++ "\t\t " ++ f | (d,f) <- tops] - where - tops = take 40 $ reverse $ sortBy (\ (i,_) (j,_) -> compare i j) defs - defs = [(length (prt (info2def j)), name m j) | (m,j) <- infos] - infos = [(m,j) | (m,ModMod mo) <- mods, j <- tree2list (jments mo)] - name m (f,_) = prt m ++ "." ++ prt f - mods = modules can - chars = length $ prCanon can diff --git a/src/GF/UseGrammar/Tokenize.hs b/src/GF/UseGrammar/Tokenize.hs deleted file mode 100644 index 9f1ab5449..000000000 --- a/src/GF/UseGrammar/Tokenize.hs +++ /dev/null @@ -1,222 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Tokenize --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/09/29 13:20:08 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.14 $ --- --- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002. --- an entry for each is included in 'Custom.customTokenizer' ------------------------------------------------------------------------------ - -module GF.UseGrammar.Tokenize ( tokWords, - tokLits, - tokVars, - lexHaskell, - lexHaskellLiteral, - lexHaskellVar, - lexText, - lexTextVar, - lexC2M, lexC2M', - lexTextLiteral, - lexIgnore, - wordsLits - ) where - -import GF.Data.Operations ----- import UseGrammar (isLiteral,identC) -import GF.CF.CFIdent - -import Data.Char - --- lexers = tokenizers, to prepare input for GF grammars. AR 4/1/2002 --- an entry for each is included in Custom.customTokenizer - --- | just words -tokWords :: String -> [CFTok] -tokWords = map tS . words - -tokLits :: String -> [CFTok] -tokLits = map mkCFTok . mergeStr . wordsLits where - mergeStr ss = case ss of - w@(c:cs):rest | elem c "\'\"" && c /= last w -> getStr [w] rest - w :rest -> w : mergeStr rest - [] -> [] - getStr v ss = case ss of - w@(_:_):rest | elem (last w) "\'\"" -> (unwords (reverse (w:v))) : mergeStr rest - w :rest -> getStr (w:v) rest - [] -> reverse v - -tokVars :: String -> [CFTok] -tokVars = map mkCFTokVar . wordsLits - -isFloat s = case s of - c:cs | isDigit c -> isFloat cs - '.':cs@(_:_) -> all isDigit cs - _ -> False - -isString s = case s of - c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs - _ -> False - - -mkCFTok :: String -> CFTok -mkCFTok s = case s of - '"' :cs@(_:_) | last cs == '"' -> tL $ init cs - '\'':cs@(_:_) | last cs == '\'' -> tL $ init cs --- 's Gravenhage - _:_ | isFloat s -> tF s - _:_ | all isDigit s -> tI s - _ -> tS s - -mkCFTokVar :: String -> CFTok -mkCFTokVar s = case s of - '?':_:_ -> tM s --- "?" --- compat with prCF - 'x':'_':_ -> tV s - 'x':[] -> tV s - '$':xs@(_:_) -> if last s == '$' then tV (init xs) else tS s - _ -> tS s - -mkTokVars :: (String -> [CFTok]) -> String -> [CFTok] -mkTokVars tok = map tv . tok where - tv (TS s) = mkCFTokVar s - tv t = t - -mkLit :: String -> CFTok -mkLit s - | isFloat s = tF s - | all isDigit s = tI s - | otherwise = tL s - --- obsolete -mkTL :: String -> CFTok -mkTL s - | isFloat s = tF s - | all isDigit s = tI s - | otherwise = tL ("'" ++ s ++ "'") - - --- | Haskell lexer, usable for much code -lexHaskell :: String -> [CFTok] -lexHaskell ss = case lex ss of - [(w@(_:_),ws)] -> tS w : lexHaskell ws - _ -> [] - --- | somewhat shaky text lexer -lexText :: String -> [CFTok] -lexText = uncap . lx where - - lx s = case s of - '?':'?':cs -> tS "??" : lx cs - p : cs | isMPunct p -> tS [p] : uncap (lx cs) - p : cs | isPunct p -> tS [p] : lx cs - s : cs | isSpace s -> lx cs - _ : _ -> getWord s - _ -> [] - - getWord s = tS w : lx ws where (w,ws) = span isNotSpec s - isMPunct c = elem c ".!?" - isPunct c = elem c ",:;()\"" - isNotSpec c = not (isMPunct c || isPunct c || isSpace c) - uncap (TS (c:cs) : ws) = tC (c:cs) : ws - uncap s = s - --- | lexer for C--, a mini variant of C -lexC2M :: String -> [CFTok] -lexC2M = lexC2M' False - -lexC2M' :: Bool -> String -> [CFTok] -lexC2M' isHigherOrder s = case s of - '#':cs -> lexC $ dropWhile (/='\n') cs - '/':'*':cs -> lexC $ dropComment cs - c:cs | isSpace c -> lexC cs - c:cs | isAlpha c -> getId s - c:cs | isDigit c -> getLit s - c:d:cs | isSymb [c,d] -> tS [c,d] : lexC cs - c:cs | isSymb [c] -> tS [c] : lexC cs - _ -> [] --- covers end of file and unknown characters - where - lexC = lexC2M' isHigherOrder - getId s = mkT i : lexC cs where (i,cs) = span isIdChar s - getLit s = tI i : lexC cs where (i,cs) = span isDigit s ---- Float! - isIdChar c = isAlpha c || isDigit c || elem c "'_" - isSymb = reservedAnsiCSymbol - dropComment s = case s of - '*':'/':cs -> cs - _:cs -> dropComment cs - _ -> [] - mkT i = if (isRes i) then (tS i) else - if isHigherOrder then (tV i) else (tL ("'" ++ i ++ "'")) - isRes = reservedAnsiC - - -reservedAnsiCSymbol s = case lookupTree show s ansiCtree of - Ok True -> True - _ -> False - -reservedAnsiC s = case lookupTree show s ansiCtree of - Ok False -> True - _ -> False - --- | for an efficient lexer: precompile this! -ansiCtree = buildTree $ [(s,True) | s <- reservedAnsiCSymbols] ++ - [(s,False) | s <- reservedAnsiCWords] - -reservedAnsiCSymbols = words $ - "<<= >>= << >> ++ -- == <= >= *= += -= %= /= &= ^= |= " ++ - "^ { } = , ; + * - ( ) < > & % ! ~" - -reservedAnsiCWords = words $ - "auto break case char const continue default " ++ - "do double else enum extern float for goto if int " ++ - "long register return short signed sizeof static struct switch typedef " ++ - "union unsigned void volatile while " ++ - "main printin putchar" --- these are not ansi-C - --- | turn unknown tokens into string literals; not recursively for literals 123, 'foo' -unknown2string :: (String -> Bool) -> [CFTok] -> [CFTok] -unknown2string isKnown = map mkOne where - mkOne t@(TS s) - | isKnown s = t - | isFloat s = tF s - | all isDigit s = tI s - | otherwise = tL s - mkOne t@(TC s) = if isKnown s then t else mkLit s - mkOne t = t - -unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok] -unknown2var isKnown = map mkOne where - mkOne t@(TS "??") = if isKnown "??" then t else tM "??" - mkOne t@(TS s) - | isKnown s = t - | isFloat s = tF s - | isString s = tL (init (tail s)) - | all isDigit s = tI s - | otherwise = tV s - mkOne t@(TC s) = if isKnown s then t else tV s - mkOne t = t - -lexTextLiteral, lexHaskellLiteral, lexHaskellVar :: (String -> Bool) -> String -> [CFTok] - -lexTextLiteral isKnown = unknown2string (eitherUpper isKnown) . lexText -lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell - -lexHaskellVar isKnown = unknown2var isKnown . lexHaskell -lexTextVar isKnown = unknown2var (eitherUpper isKnown) . lexText - - -eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs) -eitherUpper isKnown w = isKnown w - --- ignore unknown tokens (e.g. keyword spotting) - -lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok] -lexIgnore isKnown = concatMap mkOne where - mkOne t@(TS s) - | isKnown s = [t] - | otherwise = [] - mkOne t = [t] - diff --git a/src/GF/UseGrammar/Transfer.hs b/src/GF/UseGrammar/Transfer.hs deleted file mode 100644 index 5d62f4385..000000000 --- a/src/GF/UseGrammar/Transfer.hs +++ /dev/null @@ -1,79 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Transfer --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:53 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.5 $ --- --- linearize, parse, etc, by transfer. AR 9\/10\/2003 ------------------------------------------------------------------------------ - -module GF.UseGrammar.Transfer where - -import GF.Grammar.Grammar -import GF.Grammar.Values -import GF.Grammar.AbsCompute -import qualified GF.Canon.GFC as GFC -import GF.Grammar.LookAbs -import GF.Grammar.MMacros -import GF.Grammar.Macros -import GF.Grammar.PrGrammar -import GF.Grammar.TypeCheck - -import GF.Infra.Ident -import GF.Data.Operations - -import qualified Transfer.Core.Abs as T - -import Control.Monad - - --- transfer is done in T.Exp - we only need these conversions. - -exp2core :: Ident -> Exp -> T.Exp -exp2core f = T.EApp (T.EVar (var f)) . exp2c where - exp2c e = case e of - App f a -> T.EApp (exp2c f) (exp2c a) - Abs x b -> T.EAbs (T.PVVar (var x)) (exp2c b) ---- should be syntactic abstr - Q _ c -> T.EVar (var c) - QC _ c -> T.EVar (var c) - K s -> T.EStr s - EInt i -> T.EInteger $ toInteger i - Meta m -> T.EMeta (T.TMeta (prt m)) ---- which meta symbol? - Vr x -> T.EVar (var x) ---- should be syntactic var - - var x = T.CIdent $ prt x - -core2exp :: T.Exp -> Exp -core2exp e = case e of - T.EApp f a -> App (core2exp f) (core2exp a) - T.EAbs (T.PVVar x) b -> Abs (var x) (core2exp b) ---- only from syntactic abstr - T.EVar c -> Vr (var c) -- GF annotates to Q or QC - T.EStr s -> K s - T.EInteger i -> EInt $ fromInteger i - T.EMeta _ -> uExp -- meta symbol 0, refreshed by GF - where - var :: T.CIdent -> Ident - var (T.CIdent x) = zIdent x - - - --- The following are now obsolete (30/11/2005) --- linearize, parse, etc, by transfer. AR 9/10/2003 - -doTransfer :: GFC.CanonGrammar -> Ident -> Tree -> Err Tree -doTransfer gr tra t = do - cat <- liftM snd $ val2cat $ valTree t - f <- lookupTransfer gr tra cat - e <- compute gr $ App f $ tree2exp t - annotate gr e - -useByTransfer :: (Tree -> Err a) -> GFC.CanonGrammar -> Ident -> (Tree -> Err a) -useByTransfer lin gr tra t = doTransfer gr tra t >>= lin - -mkByTransfer :: (a -> Err [Tree]) -> GFC.CanonGrammar -> Ident -> (a -> Err [Tree]) -mkByTransfer parse gr tra s = parse s >>= mapM (doTransfer gr tra) diff --git a/src/GF/UseGrammar/TreeSelections.hs b/src/GF/UseGrammar/TreeSelections.hs deleted file mode 100644 index 9bf2711be..000000000 --- a/src/GF/UseGrammar/TreeSelections.hs +++ /dev/null @@ -1,77 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : TreeSelections --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- choose shallowest trees, and remove an overload resolution prefix ------------------------------------------------------------------------------ - -module GF.UseGrammar.TreeSelections ( - - getOverloadResults, smallestTrs, sizeTr, depthTr - - ) where - -import GF.Grammar.Abstract -import GF.Grammar.Macros - -import GF.Data.Operations -import GF.Data.Zipper -import Data.List - --- AR 2/7/2007 --- The top-level function takes a set of trees (typically parses) --- and returns the list of those trees that have the minimum size. --- In addition, the overload prefix "ovrld123_", is removed --- from each constructor in which it appears. This is used for --- showing the library API constructors in a parsable grammar. --- TODO: access the generic functions smallestTrs, sizeTr, depthTr from shell - -getOverloadResults :: [Tree] -> [Tree] -getOverloadResults = smallestTrs sizeTr . map (mkOverload "ovrld") - --- NB: this does not always give the desired result, since --- some genuine alternatives may be deeper: now we will exclude the --- latter of --- --- mkCl this_NP love_V2 (mkNP that_NP here_Adv) --- mkCl this_NP (mkVP (mkVP love_V2 that_NP) here_Adv) --- --- A perfect method would know the definitional equivalences of constructors. --- --- Notice also that size is a better measure than depth, because: --- 1. Global depth does not exclude the latter of --- --- mkCl (mkNP he_Pron) love_V2 that_NP --- mkCl (mkNP he_Pron) (mkVP love_V2 that_NP) --- --- 2. Length is needed to exclude the latter of --- --- mkS (mkCl (mkNP he_Pron) love_V2 that_NP) --- mkS presentTense (mkCl (mkNP he_Pron) love_V2 that_NP) --- - -smallestTrs :: (Tr a -> Int) -> [Tr a] -> [Tr a] -smallestTrs size ts = map fst $ filter ((==mx) . snd) tds where - tds = [(t, size t) | t <- ts] - mx = minimum $ map snd tds - -depthTr :: Tr a -> Int -depthTr (Tr (_, ts)) = case ts of - [] -> 1 - _ -> 1 + (maximum $ map depthTr ts) - -sizeTr :: Tr a -> Int -sizeTr (Tr (_, ts)) = 1 + sum (map sizeTr ts) - --- remove from each constant a prefix starting with "pref", up to first "_" --- example format: ovrld123_mkNP - -mkOverload :: String -> Tree -> Tree -mkOverload pref = mapTr (changeAtom overAtom) where - overAtom a = case a of - AtC (m, IC f) | isPrefixOf pref f -> - AtC (m, IC (tail (dropWhile (/='_') f))) - _ -> a diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs deleted file mode 100644 index 841a9c6dc..000000000 --- a/src/GF/UseGrammar/Treebank.hs +++ /dev/null @@ -1,251 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Treebank --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- Generate multilingual treebanks. AR 8\/2\/2006 --- --- (c) Aarne Ranta 2006 under GNU GPL --- --- Purpose: to generate treebanks. ------------------------------------------------------------------------------ - -module GF.UseGrammar.Treebank ( - mkMultiTreebank, - mkUniTreebank, - multi2uniTreebank, - uni2multiTreebank, - testMultiTreebank, - treesTreebank, - getTreebank, - getUniTreebank, - readUniTreebanks, - readMultiTreebank, - lookupTreebank, - assocsTreebank, - isWordInTreebank, - printAssoc, - mkCompactTreebank - ) where - -import GF.Compile.ShellState -import GF.UseGrammar.Linear -- (linTree2string) -import GF.UseGrammar.Custom -import GF.UseGrammar.GetTree (string2tree) -import GF.Grammar.TypeCheck (annotate) -import GF.Canon.CMacros (noMark) -import GF.Grammar.Grammar (Trm) -import GF.Grammar.MMacros (exp2tree) -import GF.Grammar.Macros (zIdent) -import GF.Grammar.PrGrammar (prt_,prt) -import GF.Grammar.Values (tree2exp) -import GF.Data.Operations -import GF.Infra.Option -import GF.Infra.Ident (Ident) -import GF.Infra.UseIO -import qualified GF.Grammar.Abstract as A -import qualified Data.Map as M -import qualified Data.Set as S -import qualified Data.List as L -import Control.Monad (liftM) -import System.FilePath - --- Generate a treebank with a multilingual grammar. AR 8/2/2006 --- (c) Aarne Ranta 2006 under GNU GPL - --- keys are trees; format: XML file -type MultiTreebank = [(String,[(String,String)])] -- tree,lang,lin - --- keys are strings; format: string TAB tree TAB ... TAB tree -type UniTreebank = Treebank -- M.Map String [String] -- string,tree - --- both formats can be read from both kinds of files -readUniTreebanks :: FilePath -> IO [(Ident,UniTreebank)] -readUniTreebanks file = do - s <- readFileIf file - return $ if isMultiTreebank s - then multi2uniTreebank $ getTreebank $ lines s - else - let tb = getUniTreebank $ lines s - in [(zIdent (dropExtension file),tb)] - -readMultiTreebank :: FilePath -> IO MultiTreebank -readMultiTreebank file = do - s <- readFileIf file - return $ if isMultiTreebank s - then getTreebank $ lines s - else uni2multiTreebank (zIdent (dropExtension file)) $ getUniTreebank $ lines s - -isMultiTreebank :: String -> Bool -isMultiTreebank s = take 10 s == "<treebank>" - -multi2uniTreebank :: MultiTreebank -> [(Ident,UniTreebank)] -multi2uniTreebank mt@((_,lls):_) = [(zIdent la, mkTb la) | (la,_) <- lls] where - mkTb la = M.fromListWith (++) [(s,[t]) | (t,lls) <- mt, (l,s) <- lls, l==la] -multi2uniTreebank [] = [] - -uni2multiTreebank :: Ident -> UniTreebank -> MultiTreebank -uni2multiTreebank la tb = - [(t,[(prt_ la, s)]) | (s,ts) <- assocsTreebank tb, t <- ts] - --- | the main functions - --- builds a treebank where trees are the keys, and writes a file (opt. XML) -mkMultiTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res -mkMultiTreebank opts sh com trees - | oElem (iOpt "compact") opts = mkCompactTreebank opts sh trees -mkMultiTreebank opts sh com trees = - putInXML opts "treebank" comm (concatMap mkItem tris) where - mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs) --- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs) - mkTree t = putInXML opts "tree" [] (puts $ showTree t) - mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize opts sh lg t) - - langs = [prt_ l | l <- allLanguages sh] - comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr - abstr = "" --- "Abs" ---- - cat i = " number=" ++ show (show i) --- " cat=" ++ show "S" ---- - lang lg = " lang=" ++ show (prt_ (zIdent lg)) - tris = zip trees [1..] - --- builds a unilingual treebank where strings are the keys into an internal treebank - -mkUniTreebank :: Options -> ShellState -> Language -> [A.Tree] -> Treebank -mkUniTreebank opts sh lg trees = M.fromListWith (++) [(lin t, [prt_ t]) | t <- trees] - where - lang = prt_ lg - lin t = linearize opts sh lang t - --- reads a treebank and linearizes its trees again, printing all differences -testMultiTreebank :: Options -> ShellState -> String -> Res -testMultiTreebank opts sh = putInXML opts "testtreebank" [] . - concatMap testOne . - getTreebanks . lines - where - testOne (e,lang,str0) = do - let tr = annot gr e - let str = linearize opts sh lang tr - if str == str0 then ret else putInXML opts "diff" [] $ concat [ - putInXML opts "tree" [] (puts $ showTree tr), - putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0, - putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str - ] - gr = firstStateGrammar sh - --- writes all the trees of the treebank -treesTreebank :: Options -> String -> [String] -treesTreebank _ = terms . getTreebank . lines where - terms ts = [t | (t,_) <- ts] - --- string vs. IO -type Res = [String] -- IO () -puts :: String -> Res -puts = return -- putStrLn -ret = [] -- return () --- - --- here strings are keys -assocsTreebank :: UniTreebank -> [(String,[String])] -assocsTreebank = M.assocs - -isWordInTreebank :: UniTreebank -> String -> Bool -isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb))) - -printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts] - -getTreebanks :: [String] -> [(String,String,String)] -getTreebanks = concatMap grps . getTreebank where - grps (t,lls) = [(t,x,y) | (x,y) <- lls] - -getTreebank :: [String] -> MultiTreebank -getTreebank ll = case ll of - l:ls@(_:_:_) -> - let (l1,l2) = getItem ls - (tr,lins) = getTree l1 - lglins = getLins lins - in (tr,lglins) : getTreebank l2 - _ -> [] - where - getItem = span ((/="</item") . take 6) - - getTree (_:ss) = - let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2) - - getLins (beg:str:end:ss) = (getLang beg, str):getLins ss - getLins _ = [] - - getLang = takeWhile (/='"') . tail . dropWhile (/='"') - -getUniTreebank :: [String] -> UniTreebank -getUniTreebank ls = M.fromListWith (++) [(s, ts) | s:ts <- map chop ls] where - chop = chunks '\t' - -lookupTreebank :: Treebank -> String -> [String] -lookupTreebank tb s = maybe [] id $ M.lookup s tb - -annot :: StateGrammar -> String -> A.Tree -annot gr s = errVal (error "illegal tree") $ do - let t = tree2exp $ string2tree gr s - annotate (grammar gr) t - -putInXML :: Options -> String -> String -> Res -> Res -putInXML opts tag attrs io = - (ifXML $ puts $ tagXML $ tag ++ attrs) ++ - io ++ - (ifXML $ puts $ tagXML $ '/':tag) - where - ifXML c = if oElem showXML opts then c else [] - - -tagXML :: String -> String -tagXML s = "<" ++ s ++ ">" - --- print the treebank in a compact format: --- first a sorted list of all words, referrable by index --- then the linearization of each tree, as sequences of word indices --- this format is usable in embedded translation systems. - -mkCompactTreebank :: Options -> ShellState -> [A.Tree] -> [String] -mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh - -printCompactTreebank :: (MultiTreebank,[String]) -> [String] -printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where - ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb - - linss = map (unwords . pad) linss0 - linss0 = map (map (show . encode) . words) allExs - allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]] - encode w = maybe undefined id $ M.lookup w wmap - wmap = M.fromAscList $ zip ws [1..] - stat = unwords $ map show [length ws, length lgs, length tb, smax] - langs = unwords lgs - smax = maximum $ map length linss0 - pad ws = ws ++ replicate (smax - length ws) "0" - --- [(String,[(String,String)])] -- tree,lang,lin -mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String]) -mkJustMultiTreebank opts sh ts = - ([(prt_ t, [(la, lin la t) | la <- langs]) | t <- ts],langs) where - langs = map prt_ $ allLanguages sh - lin = linearize opts sh - - ---- these handy functions are borrowed from EmbedAPI - -linearize opts mgr lang = lin where - sgr = stateGrammarOfLangOpt False mgr zlang - cgr = canModules mgr - zlang = zIdent lang - untok = customOrDefault (addOptions opts (stateOptions sgr)) useUntokenizer customUntokenizer sgr - lin - | oElem showRecord opts = err id id . liftM prt . linearizeNoMark cgr zlang - | oElem tableLin opts = - err id id . liftM (unlines . map untok . prLinTable True) . allLinTables True cgr zlang - | oElem showAll opts = - err id id . liftM (unlines . map untok . prLinTable False) . allLinTables False cgr zlang - - | otherwise = untok . linTree2string noMark cgr zlang - -showTree t = prt_ $ tree2exp t |
