diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/UseGrammar | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/UseGrammar')
| -rw-r--r-- | src-3.0/GF/UseGrammar/Custom.hs | 494 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Editing.hs | 435 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Generate.hs | 116 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/GetTree.hs | 74 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Information.hs | 162 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Linear.hs | 292 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/MatchTerm.hs | 50 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Morphology.hs | 140 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Paraphrases.hs | 70 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Parsing.hs | 177 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Randomized.hs | 66 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Session.hs | 181 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Statistics.hs | 44 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Tokenize.hs | 222 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Transfer.hs | 79 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/TreeSelections.hs | 77 | ||||
| -rw-r--r-- | src-3.0/GF/UseGrammar/Treebank.hs | 251 |
17 files changed, 2930 insertions, 0 deletions
diff --git a/src-3.0/GF/UseGrammar/Custom.hs b/src-3.0/GF/UseGrammar/Custom.hs new file mode 100644 index 000000000..983b7f683 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Custom.hs @@ -0,0 +1,494 @@ +---------------------------------------------------------------------- +-- | +-- Module : Custom +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/16 10:21:21 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.85 $ +-- +-- A database for customizable GF shell commands. +-- +-- databases for customizable commands. AR 21\/11\/2001. +-- for: grammar parsers, grammar printers, term commands, string commands. +-- idea: items added here are usable throughout GF; nothing else need be edited. +-- they are often usable through the API: hence API cannot be imported here! +-- +-- Major redesign 3\/4\/2002: the first entry in each database is DEFAULT. +-- If no other value is given, the default is selected. +-- Because of this, two invariants have to be preserved: +-- +-- - no databases may be empty +-- +-- - additions are made to the end of the database +----------------------------------------------------------------------------- + +module GF.UseGrammar.Custom where + +import GF.Data.Operations +import GF.Text.Text +import GF.UseGrammar.Tokenize +import GF.Grammar.Values +import qualified GF.Grammar.Grammar as G +import qualified GF.Canon.AbsGFC as A +import qualified GF.Canon.GFC as C + +import qualified GF.Devel.GFCCtoJS as JS +import GF.Canon.CanonToGFCC +import qualified GF.Devel.GFCCtoHaskell as CCH + +import qualified GF.Source.AbsGF as GF +import qualified GF.Grammar.MMacros as MM +import GF.Grammar.AbsCompute +import GF.Grammar.TypeCheck +import GF.UseGrammar.Generate +import GF.UseGrammar.MatchTerm +import GF.UseGrammar.Linear (unoptimizeCanon) +------import Compile +import GF.Compile.ShellState +import GF.UseGrammar.Editing +import GF.UseGrammar.Paraphrases +import GF.Infra.Option +import GF.CF.CF +import GF.CF.CFIdent + +import GF.Canon.CanonToGrammar +import GF.CF.PPrCF +import GF.CF.PrLBNF +import GF.Grammar.PrGrammar +import GF.Compile.PrOld +import GF.Canon.MkGFC +import GF.Speech.PrGSL (gslPrinter) +import GF.Speech.PrJSGF (jsgfPrinter) +import GF.Speech.PrSRGS +import GF.Speech.PrSRGS_ABNF +import qualified GF.Speech.SISR as SISR +import GF.Speech.PrSLF +import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) +import GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) +import GF.Speech.GrammarToVoiceXML (grammar2vxml) + +import GF.Data.Zipper + +import GF.UseGrammar.Statistics +import GF.UseGrammar.Morphology +import GF.UseGrammar.Information +import GF.API.GrammarToHaskell +import GF.API.GrammarToTransfer +-----import GrammarToCanon (showCanon, showCanonOpt) +-----import qualified GrammarToGFC as GFC +import GF.Probabilistic.Probabilistic (prProbs) + +-- the cf parsing algorithms +import GF.CF.ChartParser -- OBSOLETE +import qualified GF.Parsing.CF as PCF +import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE + +-- grammar conversions -- peb 19/4-04 +-- see also customGrammarPrinter +import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE +import qualified GF.Printing.PrintParser as PrtOld -- OBSOLETE +import qualified GF.Infra.Print as Prt +import qualified GF.Conversion.GFC as Cnv +import qualified GF.Conversion.Types as CnvTypes +import qualified GF.Conversion.Haskell as CnvHaskell +import qualified GF.Conversion.Prolog as CnvProlog +import qualified GF.Conversion.TypeGraph as CnvTypeGraph +import GF.Canon.Unparametrize +import GF.Canon.Subexpressions +import GF.Canon.AbsToBNF + +import GF.Canon.GFC +import qualified GF.Canon.MkGFC as MC +import GF.CFGM.PrintCFGrammar (prCanonAsCFGM) +import GF.Visualization.VisualizeGrammar (visualizeCanonGrammar, visualizeSourceGrammar) + +import GF.API.MyParser + +import qualified GF.Infra.Modules as M +import GF.Infra.UseIO + +import Control.Monad +import Data.Char +import Data.Maybe (fromMaybe) + +-- character codings +import GF.Text.Unicode +import GF.Text.UTF8 (decodeUTF8) +import GF.Text.Greek (mkGreek) +import GF.Text.Arabic (mkArabic) +import GF.Text.Hebrew (mkHebrew) +import GF.Text.Russian (mkRussian, mkRusKOI8) +import GF.Text.Ethiopic (mkEthiopic) +import GF.Text.Tamil (mkTamil) +import GF.Text.OCSCyrillic (mkOCSCyrillic) +import GF.Text.LatinASupplement (mkLatinASupplement) +import GF.Text.Devanagari (mkDevanagari) +import GF.Text.Hiragana (mkJapanese) +import GF.Text.ExtendedArabic (mkArabic0600) +import GF.Text.ExtendedArabic (mkExtendedArabic) +import GF.Text.ExtraDiacritics (mkExtraDiacritics) + +-- minimal version also used in Hugs. AR 2/12/2002. + +-- databases for customizable commands. AR 21/11/2001 +-- for: grammar parsers, grammar printers, term commands, string commands +-- idea: items added here are usable throughout GF; nothing else need be edited +-- they are often usable through the API: hence API cannot be imported here! + +-- Major redesign 3/4/2002: the first entry in each database is DEFAULT. +-- If no other value is given, the default is selected. +-- Because of this, two invariants have to be preserved: +-- - no databases may be empty +-- - additions are made to the end of the database + +-- * these are the databases; the comment gives the name of the flag + +-- | grammarFormat, \"-format=x\" or file suffix +customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar) + +-- | grammarPrinter, \"-printer=x\" +customGrammarPrinter :: CustomData (Options -> StateGrammar -> String) + +-- | multiGrammarPrinter, \"-printer=x\" +customMultiGrammarPrinter :: CustomData (Options -> CanonGrammar -> String) + +-- | syntaxPrinter, \"-printer=x\" +customSyntaxPrinter :: CustomData (GF.Grammar -> String) + +-- | termPrinter, \"-printer=x\" +customTermPrinter :: CustomData (StateGrammar -> Tree -> String) + +-- | termCommand, \"-transform=x\" +customTermCommand :: CustomData (StateGrammar -> Tree -> [Tree]) + +-- | editCommand, \"-edit=x\" +customEditCommand :: CustomData (StateGrammar -> Action) + +-- | filterString, \"-filter=x\" +customStringCommand :: CustomData (StateGrammar -> String -> String) + +-- | useParser, \"-parser=x\" +customParser :: CustomData (StateGrammar -> CFCat -> CFParser) + +-- | useTokenizer, \"-lexer=x\" +customTokenizer :: CustomData (StateGrammar -> String -> [[CFTok]]) + +-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string +customUntokenizer :: CustomData (StateGrammar -> String -> String) + +-- | uniCoding, \"-coding=x\" +-- +-- contains conversions from different codings to the internal +-- unicode coding +customUniCoding :: CustomData (String -> String) + +-- | this is the way of selecting an item +customOrDefault :: Options -> OptFun -> CustomData a -> a +customOrDefault opts optfun db = maybe (defaultCustomVal db) id $ + customAsOptVal opts optfun db + +-- | to produce menus of custom operations +customInfo :: CustomData a -> (String, [String]) +customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c)) + +------------------------------- +-- * types and stuff + +type CommandId = String + +strCI :: String -> CommandId +strCI = id + +ciStr :: CommandId -> String +ciStr = id + +ciOpt :: CommandId -> Option +ciOpt = iOpt + +newtype CustomData a = CustomData (String, [(CommandId,a)]) + +customData :: String -> [(CommandId, a)] -> CustomData a +customData title db = CustomData (title,db) + +dbCustomData :: CustomData a -> [(CommandId, a)] +dbCustomData (CustomData (_,db)) = db + +titleCustomData :: CustomData a -> String +titleCustomData (CustomData (t,_)) = t + +lookupCustom :: CustomData a -> CommandId -> Maybe a +lookupCustom = flip lookup . dbCustomData + +customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a +customAsOptVal opts optfun db = do + arg <- getOptVal opts optfun + lookupCustom db (strCI arg) + +-- | take the first entry from the database +defaultCustomVal :: CustomData a -> a +defaultCustomVal (CustomData (s,db)) = + ifNull (error ("empty database:" +++ s)) (snd . head) db + +------------------------------------------------------------------------- +-- * and here's the customizable part: + +-- grammar parsers: the ID is also used as file name suffix +customGrammarParser = + customData "Grammar parsers, selected by file name suffix" $ + [ +------ (strCI "gf", compileModule noOptions) -- DEFAULT +-- add your own grammar parsers here + ] + + +customGrammarPrinter = + customData "Grammar printers, selected by option -printer=x" $ + [ + (strCI "gfc", \_ -> prCanon . stateGrammarST) -- DEFAULT + ,(strCI "gf", \_ -> err id prGrammar . canon2sourceGrammar . stateGrammarST) + ,(strCI "cf", \_ -> prCF . stateCF) + ,(strCI "old", \_ -> printGrammarOld . stateGrammarST) + ,(strCI "gsl", gslPrinter) + ,(strCI "jsgf", jsgfPrinter Nothing) + ,(strCI "jsgf_sisr_old", jsgfPrinter (Just SISR.SISROld)) + ,(strCI "srgs_xml", srgsXmlPrinter Nothing False) + ,(strCI "srgs_xml_non_rec", srgsXmlNonRecursivePrinter) + ,(strCI "srgs_xml_prob", srgsXmlPrinter Nothing True) + ,(strCI "srgs_xml_sisr_old", srgsXmlPrinter (Just SISR.SISROld) False) + ,(strCI "srgs_abnf", srgsAbnfPrinter Nothing False) + ,(strCI "srgs_abnf_non_rec", srgsAbnfNonRecursivePrinter) + ,(strCI "srgs_abnf_sisr_old", srgsAbnfPrinter (Just SISR.SISROld) False) + ,(strCI "vxml", grammar2vxml) + ,(strCI "slf", slfPrinter) + ,(strCI "slf_graphviz", slfGraphvizPrinter) + ,(strCI "slf_sub", slfSubPrinter) + ,(strCI "slf_sub_graphviz", slfSubGraphvizPrinter) + ,(strCI "fa_graphviz", faGraphvizPrinter) + ,(strCI "fa_c", faCPrinter) + ,(strCI "regexp", regexpPrinter) + ,(strCI "regexps", multiRegexpPrinter) + ,(strCI "regular", regularPrinter) + ,(strCI "plbnf", \_ -> prLBNF True) + ,(strCI "lbnf", \_ -> prLBNF False) + ,(strCI "bnf", \_ -> prBNF False) + ,(strCI "absbnf", \_ -> abstract2bnf . stateGrammarST) + ,(strCI "haskell", \_ -> grammar2haskell . stateGrammarST) + ,(strCI "gfcc_haskell", \opts -> CCH.grammar2haskell . + canon2gfcc opts . stateGrammarST) + ,(strCI "haskell_gadt", \_ -> grammar2haskellGADT . stateGrammarST) + ,(strCI "transfer", \_ -> grammar2transfer . stateGrammarST) + ,(strCI "morpho", \_ -> prMorpho . stateMorpho) + ,(strCI "fullform",\_ -> prFullForm . stateMorpho) + ,(strCI "opts", \_ -> prOpts . stateOptions) + ,(strCI "words", \_ -> unwords . stateGrammarWords) + ,(strCI "printnames", \_ -> C.prPrintnamesGrammar . stateGrammarST) + ,(strCI "stat", \_ -> prStatistics . stateGrammarST) + ,(strCI "probs", \_ -> prProbs . stateProbs) + ,(strCI "unpar", \_ -> prCanon . unparametrizeCanon . stateGrammarST) + ,(strCI "subs", \_ -> prSubtermStat . stateGrammarST) + +{- ---- + (strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT + ,(strCI "canon", showCanon "Lang" . stateGrammarST) + ,(strCI "gfc", GFC.showGFC . stateGrammarST) + ,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST) +-} + +-- add your own grammar printers here + +-- grammar conversions: + ,(strCI "mcfg", \_ -> Prt.prt . stateMCFG) + ,(strCI "fcfg", \_ -> Prt.prt . fst . stateFCFG) + ,(strCI "cfg", \_ -> Prt.prt . stateCFG) + ,(strCI "pinfo", \_ -> Prt.prt . statePInfo) + ,(strCI "abstract", \_ -> Prt.prtAfter "\n" . Cnv.gfc2abstract . stateGrammarLang) + + ,(strCI "functiongraph",\_ -> CnvTypeGraph.prtFunctionGraph . Cnv.gfc2simple noOptions . stateGrammarLang) + ,(strCI "typegraph", \_ -> CnvTypeGraph.prtTypeGraph . Cnv.gfc2simple noOptions . stateGrammarLang) + + ,(strCI "gfc-haskell", \_ -> CnvHaskell.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts) + ,(strCI "mcfg-haskell", \_ -> CnvHaskell.prtMGrammar . stateMCFG) + ,(strCI "cfg-haskell", \_ -> CnvHaskell.prtCGrammar . stateCFG) + ,(strCI "gfc-prolog", \_ -> CnvProlog.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts) + ,(strCI "mcfg-prolog", \_ -> CnvProlog.prtMGrammar . stateMCFG) + ,(strCI "cfg-prolog", \_ -> CnvProlog.prtCGrammar . stateCFG) + +-- obsolete, or only for testing: + ,(strCI "abs-skvatt", \_ -> Cnv.abstract2skvatt . Cnv.gfc2abstract . stateGrammarLang) + ,(strCI "cfg-skvatt", \_ -> Cnv.cfg2skvatt . stateCFG) + ,(strCI "simple", \_ -> Prt.prt . uncurry Cnv.gfc2simple . stateGrammarLangOpts) + ,(strCI "mcfg-erasing", \_ -> Prt.prt . fst . snd . uncurry Cnv.convertGFC . stateGrammarLangOpts) +-- ,(strCI "mcfg-old", PrtOld.prt . CnvOld.mcfg . statePInfoOld) +-- ,(strCI "cfg-old", PrtOld.prt . CnvOld.cfg . statePInfoOld) + ] + where stateGrammarLangOpts s = (stateOptions s, stateGrammarLang s) + +customMultiGrammarPrinter = + customData "Printers for multiple grammars, selected by option -printer=x" $ + [ + (strCI "gfcm", const MC.prCanon) + ,(strCI "gfcc", canon2gfccPr) + ,(strCI "js", \opts -> JS.gfcc2js . canon2gfcc opts) + ,(strCI "header", const (MC.prCanonMGr . unoptimizeCanon)) + ,(strCI "cfgm", prCanonAsCFGM) + ,(strCI "graph", visualizeCanonGrammar) + ,(strCI "missing", const missingLinCanonGrammar) + +-- to prolog format: + ,(strCI "gfc-prolog", CnvProlog.prtSMulti) + ,(strCI "mcfg-prolog", CnvProlog.prtMMulti) + ,(strCI "cfg-prolog", CnvProlog.prtCMulti) + ] + + +customSyntaxPrinter = + customData "Syntax printers, selected by option -printer=x" $ + [ +-- add your own grammar printers here + ] + + +customTermPrinter = + customData "Term printers, selected by option -printer=x" $ + [ + (strCI "gf", const prt) -- DEFAULT +-- add your own term printers here + ] + +customTermCommand = + customData "Term transformers, selected by option -transform=x" $ + [ + (strCI "identity", \_ t -> [t]) -- DEFAULT + ,(strCI "compute", \g t -> let gr = grammar g in + err (const [t]) return + (exp2termCommand gr (computeAbsTerm gr) t)) + ,(strCI "nodup", \_ t -> if (hasDupIdent $ tree2exp t) then [] else [t]) + ,(strCI "nodupatom", \_ t -> if (hasDupAtom $ tree2exp t) then [] else [t]) + ,(strCI "paraphrase", \g t -> let gr = grammar g in + exp2termlistCommand gr (mkParaphrases gr) t) + + ,(strCI "generate", \g t -> let gr = grammar g + cat = actCat $ tree2loc t --- not needed + in + [tr | t <- generateTrees noOptions gr cat 2 Nothing (Just t), + Ok tr <- [annotate gr $ MM.qualifTerm (absId g) t]]) + ,(strCI "typecheck", \g t -> err (const []) (return . loc2tree) + (reCheckStateReject (grammar g) (tree2loc t))) + ,(strCI "solve", \g t -> err (const []) (return . loc2tree) + (solveAll (grammar g) (tree2loc t) + >>= rejectUnsolvable)) + ,(strCI "context", \g t -> err (const [t]) (return . loc2tree) + (contextRefinements (grammar g) (tree2loc t))) + ,(strCI "reindex", \g t -> let gr = grammar g in + err (const [t]) return + (exp2termCommand gr (return . MM.reindexTerm) t)) +--- ,(strCI "delete", \g t -> [MM.mExp0]) +-- add your own term commands here + ] + +customEditCommand = + customData "Editor state transformers, selected by option -edit=x" $ + [ + (strCI "identity", const return) -- DEFAULT + ,(strCI "typecheck", \g -> reCheckState (grammar g)) + ,(strCI "solve", \g -> solveAll (grammar g)) + ,(strCI "context", \g -> contextRefinements (grammar g)) + ,(strCI "compute", \g -> computeSubTree (grammar g)) + ,(strCI "paraphrase", const return) --- done ad hoc on top level + ,(strCI "generate", const return) --- done ad hoc on top level + ,(strCI "transfer", const return) --- done ad hoc on top level +-- add your own edit commands here + ] + +customStringCommand = + customData "String filters, selected by option -filter=x" $ + [ + (strCI "identity", const $ id) -- DEFAULT + ,(strCI "erase", const $ const "") + ,(strCI "take100", const $ take 100) + ,(strCI "text", const $ formatAsText) + ,(strCI "code", const $ formatAsCode) +---- ,(strCI "latexfile", const $ mkLatexFile) + ,(strCI "length", const $ show . length) +-- add your own string commands here + ] + +customParser = + customData "Parsers, selected by option -parser=x" $ + [ + (strCI "chart", PCFOld.parse "ibn" . stateCF) -- DEPRECATED + ,(strCI "bottomup", PCF.parse "gb" . stateCF) + ,(strCI "topdown", PCF.parse "gt" . stateCF) +-- commented for now, since there's a bug in the incremental algorithm: +-- ,(strCI "incremental", PCF.parse "ib" . stateCF) +-- ,(strCI "incremental-bottomup", PCF.parse "ib" . stateCF) +-- ,(strCI "incremental-topdown", PCF.parse "it" . stateCF) + ,(strCI "old", chartParser . stateCF) -- DEPRECATED + ,(strCI "myparser", myParser) +-- add your own parsers here + ] + +customTokenizer = + let sg = singleton in + customData "Tokenizers, selected by option -lexer=x" $ + [ + (strCI "words", const $ sg . tokWords) + ,(strCI "literals", const $ sg . tokLits) + ,(strCI "vars", const $ sg . tokVars) + ,(strCI "chars", const $ sg . map (tS . singleton)) + ,(strCI "code", const $ sg . lexHaskell) + ,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr)) + ,(strCI "textvars", \gr -> sg . (lexTextVar $ stateIsWord gr)) + ,(strCI "text", const $ sg . lexText) + ,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr)) + ,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr)) + ,(strCI "textlit", \gr -> sg . (lexTextLiteral $ stateIsWord gr)) + ,(strCI "codeC", const $ sg . lexC2M) + ,(strCI "ignore", \gr -> sg . lexIgnore (stateIsWord gr) . tokLits) + ,(strCI "subseqs", \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits) + ,(strCI "codeCHigh", const $ sg . lexC2M' True) +-- add your own tokenizers here + ] + +customUntokenizer = + customData "Untokenizers, selected by option -unlexer=x" $ + [ + (strCI "unwords", const $ id) -- DEFAULT + ,(strCI "text", const $ formatAsText) + ,(strCI "html", const $ formatAsHTML) + ,(strCI "latex", const $ formatAsLatex) + ,(strCI "code", const $ formatAsCode) + ,(strCI "concat", const $ filter (not . isSpace)) + ,(strCI "textlit", const $ formatAsTextLit) + ,(strCI "codelit", const $ formatAsCodeLit) + ,(strCI "concat", const $ concatRemSpace) + ,(strCI "glue", const $ performBinds) + ,(strCI "finnish", const $ performBindsFinnish) + ,(strCI "reverse", const $ reverse) + ,(strCI "bind", const $ performBinds) -- backward compat +-- add your own untokenizers here + ] + +customUniCoding = + customData "Alphabet codings, selected by option -coding=x" $ + [ + (strCI "latin1", id) -- DEFAULT + ,(strCI "utf8", decodeUTF8) + ,(strCI "greek", treat [] mkGreek) + ,(strCI "hebrew", mkHebrew) + ,(strCI "arabic", mkArabic) + ,(strCI "russian", treat [] mkRussian) + ,(strCI "russianKOI8", mkRusKOI8) + ,(strCI "ethiopic", mkEthiopic) + ,(strCI "tamil", mkTamil) + ,(strCI "OCScyrillic", mkOCSCyrillic) + ,(strCI "devanagari", mkDevanagari) + ,(strCI "latinasupplement", mkLatinASupplement) + ,(strCI "japanese", mkJapanese) + ,(strCI "arabic0600", mkArabic0600) + ,(strCI "extendedarabic", mkExtendedArabic) + ,(strCI "extradiacritics", mkExtraDiacritics) + ] diff --git a/src-3.0/GF/UseGrammar/Editing.hs b/src-3.0/GF/UseGrammar/Editing.hs new file mode 100644 index 000000000..762562eb0 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Editing.hs @@ -0,0 +1,435 @@ +---------------------------------------------------------------------- +-- | +-- Module : Editing +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:45 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.14 $ +-- +-- generic tree editing, with some grammar notions assumed. AR 18\/8\/2001. +-- 19\/6\/2003 for GFC +----------------------------------------------------------------------------- + +module GF.UseGrammar.Editing where + +import GF.Grammar.Abstract +import qualified GF.Canon.GFC as GFC +import GF.Grammar.TypeCheck +import GF.Grammar.LookAbs +import GF.Grammar.AbsCompute +import GF.Grammar.Macros (errorCat) + +import GF.Data.Operations +import GF.Data.Zipper + +-- generic tree editing, with some grammar notions assumed. AR 18/8/2001 +-- 19/6/2003 for GFC + +type CGrammar = GFC.CanonGrammar + +type State = Loc TrNode + +-- | the "empty" state +initState :: State +initState = tree2loc uTree + +isRootState :: State -> Bool +isRootState s = case actPath s of + Top -> True + _ -> False + +actTree :: State -> Tree +actTree (Loc (t,_)) = t + +actPath :: State -> Path TrNode +actPath (Loc (_,p)) = p + +actVal :: State -> Val +actVal = valNode . nodeTree . actTree + +actCat :: State -> Cat +actCat = errVal errorCat . val2cat . actVal ---- undef + +actAtom :: State -> Atom +actAtom = atomTree . actTree + +actFun :: State -> Err Fun +actFun s = case actAtom s of + AtC f -> return f + t -> prtBad "active atom: expected function, found" t + +actExp :: State -> Exp +actExp = tree2exp . actTree + +-- | current local bindings +actBinds :: State -> Binds +actBinds = bindsNode . nodeTree . actTree + +-- | constraints in current subtree +actConstrs :: State -> Constraints +actConstrs = allConstrsTree . actTree + +-- | constraints in the whole tree +allConstrs :: State -> Constraints +allConstrs = allConstrsTree . loc2tree + +-- | metas in current subtree +actMetas :: State -> [Meta] +actMetas = metasTree . actTree + +-- | metas in the whole tree +allMetas :: State -> [Meta] +allMetas = metasTree . loc2tree + +actTreeBody :: State -> Tree +actTreeBody = bodyTree . actTree + +allPrevBinds :: State -> Binds +allPrevBinds = concatMap bindsNode . traverseCollect . actPath + +allBinds :: State -> Binds +allBinds s = actBinds s ++ allPrevBinds s + +actGen :: State -> Int +actGen = length . allBinds -- symbol generator for VGen + +allPrevVars :: State -> [Var] +allPrevVars = map fst . allPrevBinds + +allVars :: State -> [Var] +allVars = map fst . allBinds + +vGenIndex :: State -> Int +vGenIndex = length . allBinds + +actIsMeta :: State -> Bool +actIsMeta = atomIsMeta . actAtom + +actMeta :: State -> Err Meta +actMeta = getMetaAtom . actAtom + +-- | meta substs are not only on the actual path... +entireMetaSubst :: State -> MetaSubst +entireMetaSubst = concatMap metaSubstsNode . scanTree . loc2tree + +isCompleteTree :: Tree -> Bool +isCompleteTree = null . filter atomIsMeta . map atomNode . scanTree + +isCompleteState :: State -> Bool +isCompleteState = isCompleteTree . loc2tree + +initStateCat :: Context -> Cat -> Err State +initStateCat cont cat = do + return $ tree2loc (Tr (mkNode [] mAtom (cat2val cont cat) ([],[]), [])) + +-- | this function only concerns the body of an expression... +annotateInState :: CGrammar -> Exp -> State -> Err Tree +annotateInState gr exp state = do + let binds = allBinds state + val = actVal state + annotateIn gr binds exp (Just val) + +-- | ...whereas this one works with lambda abstractions +annotateExpInState :: CGrammar -> Exp -> State -> Err Tree +annotateExpInState gr exp state = do + let cont = allPrevBinds state + binds = actBinds state + val = actVal state + typ <- mkProdVal binds val + annotateIn gr binds exp (Just typ) + +treeByExp :: (Exp -> Err Exp) -> CGrammar -> Exp -> State -> Err Tree +treeByExp trans gr exp0 state = do + exp <- trans exp0 + annotateExpInState gr exp state + +-- * actions + +type Action = State -> Err State + +newCat :: CGrammar -> Cat -> Action +newCat gr cat@(m,c) _ = do + cont <- lookupCatContext gr m c + testErr (null cont) "start cat must have null context" -- for easier meta refresh + initStateCat cont cat + +newFun :: CGrammar -> Fun -> Action +newFun gr fun@(m,c) _ = do + typ <- lookupFunType gr m c + cat <- valCat typ + st1 <- newCat gr cat initState + refineWithAtom True gr (qq fun) st1 + +newTree :: Tree -> Action +newTree t _ = return $ tree2loc t + +newExpTC :: CGrammar -> Exp -> Action +newExpTC gr t s = annotate gr (refreshMetas [] t) >>= flip newTree s + +goNextMeta, goPrevMeta, goNextNewMeta, goPrevNewMeta, goNextMetaIfCan :: Action + +goNextMeta = repeatUntilErr actIsMeta goAhead -- can be the location itself +goPrevMeta = repeatUntilErr actIsMeta goBack + +goNextNewMeta s = goAhead s >>= goNextMeta -- always goes away from location +goPrevNewMeta s = goBack s >>= goPrevMeta + +goNextMetaIfCan = actionIfPossible goNextMeta + +actionIfPossible :: Action -> Action +actionIfPossible a s = return $ errVal s (a s) + +goFirstMeta, goLastMeta :: Action +goFirstMeta s = goNextMeta $ goRoot s +goLastMeta s = goLast s >>= goPrevMeta + +noMoreMetas :: State -> Bool +noMoreMetas = err (const True) (const False) . goNextMeta + +replaceSubTree :: Tree -> Action +replaceSubTree tree state = changeLoc state tree + +refineOrReplaceWithTree :: Bool -> CGrammar -> Tree -> Action +refineOrReplaceWithTree der gr tree state = case actMeta state of + Ok m -> refineWithTreeReal der gr tree m state + _ -> do + let tree1 = addBinds (actBinds state) $ tree + state' <- replaceSubTree tree1 state + reCheckState gr state' + +refineWithTree :: Bool -> CGrammar -> Tree -> Action +refineWithTree der gr tree state = do + m <- errIn "move pointer to meta" $ actMeta state + refineWithTreeReal der gr tree m state + +refineWithTreeReal :: Bool -> CGrammar -> Tree -> Meta -> Action +refineWithTreeReal der gr tree m state = do + state' <- replaceSubTree tree state + let cs0 = allConstrs state' + (cs,ms) = splitConstraints gr cs0 + v = vClos $ tree2exp (bodyTree tree) + msubst = (m,v) : ms + metaSubstRefinements gr msubst $ + mapLoc (reduceConstraintsNode gr . performMetaSubstNode msubst) state' + + -- without dep. types, no constraints, no grammar needed - simply: do + -- testErr (actIsMeta state) "move pointer to meta" + -- replaceSubTree tree state + +refineAllNodes :: Action -> Action +refineAllNodes act state = do + let estate0 = goFirstMeta state + case estate0 of + Bad _ -> return state + Ok state0 -> do + (state',n) <- tryRefine 0 state0 + if n==0 + then return state + else actionIfPossible goFirstMeta state' + where + tryRefine n state = err (const $ return (state,n)) return $ do + state' <- goNextMeta state + meta <- actMeta state' + case act state' of + Ok state2 -> tryRefine (n+1) state2 + _ -> err (const $ return (state',n)) return $ do + state2 <- goNextNewMeta state' + tryRefine n state2 + +uniqueRefinements :: CGrammar -> Action +uniqueRefinements = refineAllNodes . uniqueRefine + +metaSubstRefinements :: CGrammar -> MetaSubst -> Action +metaSubstRefinements gr = refineAllNodes . metaSubstRefine gr + +contextRefinements :: CGrammar -> Action +contextRefinements gr = refineAllNodes contextRefine where + contextRefine state = case varRefinementsState state of + [(e,_)] -> refineWithAtom False gr e state + _ -> Bad "no unique refinement in context" + varRefinementsState state = + [r | r@(e,_) <- refinementsState gr state, isVariable e] + +uniqueRefine :: CGrammar -> Action +uniqueRefine gr state = case refinementsState gr state of + [(e,(_,True))] -> Bad "only circular refinement" + [(e,_)] -> refineWithAtom False gr e state + _ -> Bad "no unique refinement" + +metaSubstRefine :: CGrammar -> MetaSubst -> Action +metaSubstRefine gr msubst state = do + m <- errIn "move pointer to meta" $ actMeta state + case lookup m msubst of + Just v -> do + e <- val2expSafe v + refineWithExpTC False gr e state + _ -> Bad "no metavariable substitution available" + +refineWithExpTC :: Bool -> CGrammar -> Exp -> Action +refineWithExpTC der gr exp0 state = do + let oldmetas = allMetas state + exp = refreshMetas oldmetas exp0 + tree0 <- annotateInState gr exp state + let tree = addBinds (actBinds state) $ tree0 + refineWithTree der gr tree state + +refineWithAtom :: Bool -> CGrammar -> Ref -> Action -- function or variable +refineWithAtom der gr at state = do + val <- lookupRef gr (allBinds state) at + typ <- val2exp val + let oldvars = allVars state + exp <- ref2exp oldvars typ at + refineWithExpTC der gr exp state + +-- | in this command, we know that the result is well-typed, since computation +-- rules have been type checked and the result is equal +computeSubTree :: CGrammar -> Action +computeSubTree gr state = do + let exp = tree2exp (actTree state) + tree <- treeByExp (compute gr) gr exp state + replaceSubTree tree state + +-- | but here we don't, since the transfer flag isn't type checked, +-- and computing the transfer function is not checked to preserve equality +transferSubTree :: Maybe Fun -> CGrammar -> Action +transferSubTree Nothing _ s = return s +transferSubTree (Just fun) gr state = do + let exp = mkApp (qq fun) [tree2exp $ actTree state] + tree <- treeByExp (compute gr) gr exp state + state' <- replaceSubTree tree state + reCheckState gr state' + +deleteSubTree :: CGrammar -> Action +deleteSubTree gr state = + if isRootState state + then do + let cat = actCat state + newCat gr cat state + else do + let metas = allMetas state + binds = actBinds state + exp = refreshMetas metas mExp0 + tree <- annotateInState gr exp state + state' <- replaceSubTree (addBinds binds tree) state + reCheckState gr state' --- must be unfortunately done. 20/11/2001 + +wrapWithFun :: CGrammar -> (Fun,Int) -> Action +wrapWithFun gr (f@(m,c),i) state = do + typ <- lookupFunType gr m c + let olds = allPrevVars state + oldmetas = allMetas state + exp0 <- fun2wrap olds ((f,i),typ) (tree2exp (actTreeBody state)) + let exp = refreshMetas oldmetas exp0 + tree0 <- annotateInState gr exp state + let tree = addBinds (actBinds state) $ tree0 + state' <- replaceSubTree tree state + reCheckState gr state' --- must be unfortunately done. 20/11/2001 + +alphaConvert :: CGrammar -> (Var,Var) -> Action +alphaConvert gr (x,x') state = do + let oldvars = allPrevVars state + testErr (notElem x' oldvars) ("clash with previous bindings" +++ show x') + let binds0 = actBinds state + vars0 = map fst binds0 + testErr (notElem x' vars0) ("clash with other bindings" +++ show x') + let binds = [(if z==x then x' else z, t) | (z,t) <- binds0] + vars = map fst binds + exp' <- alphaConv (vars ++ oldvars) (x,x') (tree2exp (actTreeBody state)) + let exp = mkAbs vars exp' + tree <- annotateExpInState gr exp state + replaceSubTree tree state + +changeFunHead :: CGrammar -> Fun -> Action +changeFunHead gr f state = do + let state' = changeNode (changeAtom (const (atomC f))) state + reCheckState gr state' --- must be done because of constraints elsewhere + +peelFunHead :: CGrammar -> (Fun,Int) -> Action +peelFunHead gr (f@(m,c),i) state = do + tree0 <- nthSubtree i $ actTree state + let tree = addBinds (actBinds state) $ tree0 + state' <- replaceSubTree tree state + reCheckState gr state' --- must be unfortunately done. 20/11/2001 + +-- | an expensive operation +reCheckState :: CGrammar -> State -> Err State +reCheckState gr st = annotate gr (tree2exp (loc2tree st)) >>= return . tree2loc + +-- | a variant that returns Bad instead of a tree with unsolvable constraints +reCheckStateReject :: CGrammar -> State -> Err State +reCheckStateReject gr st = do + st' <- reCheckState gr st + rejectUnsolvable st' + +rejectUnsolvable :: State -> Err State +rejectUnsolvable st = case (constrsNode $ nodeTree $ actTree st) of + [] -> return st + cs -> Bad $ "Unsolvable constraints:" +++ prConstraints cs + +-- | extract metasubstitutions from constraints and solve them +solveAll :: CGrammar -> State -> Err State +solveAll gr st = solve st >>= solve where + solve st0 = do ---- why need twice? + st <- reCheckState gr st0 + let cs0 = allConstrs st + (cs,ms) = splitConstraints gr cs0 + metaSubstRefinements gr ms $ + mapLoc (reduceConstraintsNode gr . performMetaSubstNode ms) st + +-- * active refinements + +refinementsState :: CGrammar -> State -> [(Term,(Val,Bool))] +refinementsState gr state = + let filt = possibleRefVal gr state in + if actIsMeta state + then refsForType filt gr (allBinds state) (actVal state) + else [] + +wrappingsState :: CGrammar -> State -> [((Fun,Int),Type)] +wrappingsState gr state + | actIsMeta state = [] + | isRootState state = funs + | otherwise = [rule | rule@(_,typ) <- funs, possibleRefVal gr state aval typ] + where + funs = funsOnType (possibleRefVal gr state) gr aval + aval = actVal state + +peelingsState :: CGrammar -> State -> [(Fun,Int)] +peelingsState gr state + | actIsMeta state = [] + | isRootState state = + err (const []) (\f -> [(f,i) | i <- [0 .. arityTree tree - 1]]) $ actFun state + | otherwise = + err (const []) + (\f -> [fi | (fi@(g,_),typ) <- funs, + possibleRefVal gr state aval typ,g==f]) $ actFun state + where + funs = funsOnType (possibleRefVal gr state) gr aval + aval = actVal state + tree = actTree state + +headChangesState :: CGrammar -> State -> [Fun] +headChangesState gr state = errVal [] $ do + f@(m,c) <- funAtom (actAtom state) + typ0 <- lookupFunType gr m c + return [fun | (fun,typ) <- funRulesOf gr, fun /= f, typ == typ0] + --- alpha-conv ! + +possibleRefVal :: CGrammar -> State -> Val -> Type -> Bool +possibleRefVal gr state val typ = errVal True $ do --- was False + vtyp <- valType typ + let gen = actGen state + cs <- return [(val, vClos vtyp)] --- eqVal gen val (vClos vtyp) --- only poss cs + return $ possibleConstraints gr cs --- a simple heuristic + +possibleTreeVal :: CGrammar -> State -> Tree -> Bool +possibleTreeVal gr state tree = errVal True $ do --- was False + let aval = actVal state + let gval = valTree tree + let gen = actGen state + cs <- return [(aval, gval)] --- eqVal gen val (vClos vtyp) --- only poss cs + return $ possibleConstraints gr cs --- a simple heuristic + diff --git a/src-3.0/GF/UseGrammar/Generate.hs b/src-3.0/GF/UseGrammar/Generate.hs new file mode 100644 index 000000000..5f07e0b85 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Generate.hs @@ -0,0 +1,116 @@ +---------------------------------------------------------------------- +-- | +-- Module : Generate +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/12 12:38:30 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.16 $ +-- +-- Generate all trees of given category and depth. AR 30\/4\/2004 +-- +-- (c) Aarne Ranta 2004 under GNU GPL +-- +-- Purpose: to generate corpora. We use simple types and don't +-- guarantee the correctness of bindings\/dependences. +----------------------------------------------------------------------------- + +module GF.UseGrammar.Generate (generateTrees,generateAll) where + +import GF.Canon.GFC +import GF.Grammar.LookAbs +import GF.Grammar.PrGrammar +import GF.Grammar.Macros +import GF.Grammar.Values +import GF.Grammar.Grammar (Cat) +import GF.Grammar.SGrammar +import GF.Data.Operations +import GF.Data.Zipper +import GF.Infra.Option +import Data.List + +-- Generate all trees of given category and depth. AR 30/4/2004 +-- (c) Aarne Ranta 2004 under GNU GPL +-- +-- Purpose: to generate corpora. We use simple types and don't +-- guarantee the correctness of bindings/dependences. + + +-- | the main function takes an abstract syntax and returns a list of trees +generateTrees :: + Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp] +generateTrees opts gr cat n mn mt = map str2tr $ generate gr' opts cat' n mn mt' + where + gr' = gr2sgr opts emptyProbs gr + cat' = prt $ snd cat + mt' = maybe Nothing (return . tr2str) mt +--- ifm = oElem withMetas opts + ifm = oElem showOld opts + +generateAll :: Options -> (Exp -> IO ()) -> GFCGrammar -> Cat -> IO () +generateAll opts io gr cat = mapM_ (io . str2tr) $ num $ gen cat' + where + num = optIntOrAll opts flagNumber + gr' = gr2sgr opts emptyProbs gr + cat' = prt $ snd cat + gen c = generate gr' opts c 10 Nothing Nothing + + + +------------------------------------------ +-- do the main thing with a simpler data structure +-- the first Int gives tree depth, the second constrains subtrees +-- chosen for each branch. A small number, such as 2, is a good choice +-- if the depth is large (more than 3) +-- If a tree is given as argument, generation concerns its metavariables. + +generate :: SGrammar -> Options -> SCat -> Int -> Maybe Int -> Maybe STree -> [STree] +generate gr opts cat i mn mt = case mt of + Nothing -> gen opts cat + Just t -> genM t + where +--- now use ifm to choose between two algorithms + gen opts cat + | oElem (iOpt "mem") opts = concat $ errVal [] $ lookupTree id cat $ allTrees -- -old + | oElem (iOpt "nonub") opts = concatMap (\i -> gener i cat) [0..i-1] -- some duplicates + | otherwise = nub $ concatMap (\i -> gener i cat) [0..i-1] -- new + + gener 0 c = [SApp (f, []) | (f,([],_)) <- funs c] + gener i c = [ + tr | + (f,(cs,_)) <- funs c, + let alts = map (gener (i-1)) cs, + ts <- combinations alts, + let tr = SApp (f, ts) +-- depth tr >= i -- NO! + ] + + allTrees = genAll i + + -- dynamic generation + genAll :: Int -> BinTree SCat [[STree]] + genAll i = iter i genNext (mapTree (\ (c,_) -> (c,[[]])) gr) + + iter 0 f tr = tr + iter n f tr = iter (n-1) f (f tr) + + genNext tr = mapTree (genNew tr) tr + + genNew tr (cat,ts) = let size = length ts in + (cat, [SApp (f, xs) | + (f,(cs,_)) <- funs cat, + xs <- combinations (map look cs), + let fxs = SApp (f, xs), + depth fxs == size] + : ts) + where + look c = concat $ errVal [] $ lookupTree id c tr + + funs cat = maybe id take mn $ errVal [] $ lookupTree id cat gr + + genM t = case t of + SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)] + SMeta k -> gen opts k + _ -> [t] diff --git a/src-3.0/GF/UseGrammar/GetTree.hs b/src-3.0/GF/UseGrammar/GetTree.hs new file mode 100644 index 000000000..e980a3d95 --- /dev/null +++ b/src-3.0/GF/UseGrammar/GetTree.hs @@ -0,0 +1,74 @@ +---------------------------------------------------------------------- +-- | +-- Module : GetTree +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/15 16:22:02 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.9 $ +-- +-- how to form linearizable trees from strings and from terms of different levels +-- +-- 'String' --> raw 'Term' --> annot, qualif 'Term' --> 'Tree' +----------------------------------------------------------------------------- + +module GF.UseGrammar.GetTree where + +import GF.Canon.GFC +import GF.Grammar.Values +import qualified GF.Grammar.Grammar as G +import GF.Infra.Ident +import GF.Grammar.MMacros +import GF.Grammar.Macros +import GF.Compile.Rename +import GF.Grammar.TypeCheck +import GF.Grammar.AbsCompute (beta) +import GF.Compile.PGrammar +import GF.Compile.ShellState + +import GF.Data.Operations + +import Data.Char + +-- how to form linearizable trees from strings and from terms of different levels +-- +-- String --> raw Term --> annot, qualif Term --> Tree + +string2tree :: StateGrammar -> String -> Tree +string2tree gr = errVal uTree . string2treeErr gr + +string2treeErr :: StateGrammar -> String -> Err Tree +string2treeErr _ "" = Bad "empty string" +string2treeErr gr s = do + t <- pTerm s + let t0 = beta [] t + let t1 = refreshMetas [] t0 + let t2 = qualifTerm abstr t1 + annotate grc t2 + where + abstr = absId gr + grc = grammar gr + +string2Cat, string2Fun :: StateGrammar -> String -> (Ident,Ident) +string2Cat gr c = (absId gr,identC c) +string2Fun = string2Cat + +strings2Cat, strings2Fun :: String -> (Ident,Ident) +strings2Cat s = (identC m, identC (drop 1 c)) where (m,c) = span (/= '.') s +strings2Fun = strings2Cat + +string2ref :: StateGrammar -> String -> Err G.Term +string2ref gr s = case s of + 'x':'_':ds -> return $ freshAsTerm ds --- hack for generated vars + '"':_:_ -> return $ G.K $ init $ tail s + _:_ | all isDigit s -> return $ G.EInt $ read s + _ | elem '.' s -> return $ uncurry G.Q $ strings2Fun s + _ -> return $ G.Vr $ identC s + +string2cat :: StateGrammar -> String -> Err G.Cat +string2cat gr s = + if elem '.' s + then return $ strings2Fun s + else return $ curry id (absId gr) (identC s) diff --git a/src-3.0/GF/UseGrammar/Information.hs b/src-3.0/GF/UseGrammar/Information.hs new file mode 100644 index 000000000..4526980d6 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Information.hs @@ -0,0 +1,162 @@ +---------------------------------------------------------------------- +-- | +-- Module : Information +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/05 20:02:20 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.7 $ +-- +-- information on module, category, function, operation, parameter,... +-- AR 16\/9\/2003. +-- uses source grammar +----------------------------------------------------------------------------- + +module GF.UseGrammar.Information ( + showInformation, + missingLinCanonGrammar + ) where + +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Infra.Option +import GF.CF.CF +import GF.CF.PPrCF +import GF.Compile.ShellState +import GF.Grammar.PrGrammar +import GF.Grammar.Lookup +import GF.Grammar.Macros (zIdent) +import qualified GF.Canon.GFC as GFC +import qualified GF.Canon.AbsGFC as AbsGFC + +import GF.Data.Operations +import GF.Infra.UseIO + +-- information on module, category, function, operation, parameter,... AR 16/9/2003 +-- uses source grammar + +-- | the top level function +showInformation :: Options -> ShellState -> Ident -> IOE () +showInformation opts st c = do + is <- ioeErr $ getInformation opts st c + if null is + then putStrLnE "Identifier not in scope" + else mapM_ (putStrLnE . prInformationM c) is + where + prInformationM c (i,m) = prInformation opts c i ++ "file:" +++ m ++ "\n" + +-- | the data type of different kinds of information +data Information = + IModAbs SourceAbs + | IModRes SourceRes + | IModCnc SourceCnc + | IModule SourceAbs -- ^ to be deprecated + | ICatAbs Ident Context [Ident] + | ICatCnc Ident Type [CFRule] Term + | IFunAbs Ident Type (Maybe Term) + | IFunCnc Ident Type [CFRule] Term + | IOper Ident Type Term + | IParam Ident [Param] [Term] + | IValue Ident Type + +type CatId = AbsGFC.CIdent +type FunId = AbsGFC.CIdent + +prInformation :: Options -> Ident -> Information -> String +prInformation opts c i = unlines $ prt c : case i of + IModule m -> [ + "module of type" +++ show (mtype m), + "extends" +++ show (extends m), + "opens" +++ show (opens m), + "defines" +++ unwords (map prt (ownConstants (jments m))) + ] + ICatAbs m co _ -> [ + "category in abstract module" +++ prt m, + if null co then "not a dependent type" + else "dependent type with context" +++ prContext co + ] + ICatCnc m ty cfs tr -> [ + "category in concrete module" +++ prt m, + "linearization type" +++ prt ty + ] + IFunAbs m ty _ -> [ + "function in abstract module" +++ prt m, + "type" +++ prt ty + ] + IFunCnc m ty cfs tr -> [ + "function in concrete module" +++ prt m, + "linearization" +++ prt tr + --- "linearization type" +++ prt ty + ] + IOper m ty tr -> [ + "operation in resource module" +++ prt m, + "type" +++ prt ty, + "definition" +++ prt tr + ] + IParam m ty ts -> [ + "parameter type in resource module" +++ prt m, + "constructors" +++ unwords (map prParam ty), + "values" +++ unwords (map prt ts) + ] + IValue m ty -> [ + "parameter constructor in resource module" +++ prt m, + "type" +++ show ty + ] + +-- | also finds out if an identifier is defined in many places +getInformation :: Options -> ShellState -> Ident -> Err [(Information,FilePath)] +getInformation opts st c = allChecks $ [ + do + m <- lookupModule src c + case m of + ModMod mo -> returnm c $ IModule mo + _ -> prtBad "not a source module" c + ] ++ map lookInSrc ss ++ map lookInCan cs + where + lookInSrc (i,m) = do + j <- lookupInfo m c + case j of + AbsCat (Yes co) _ -> returnm i $ ICatAbs i co [] --- + AbsFun (Yes ty) _ -> returnm i $ IFunAbs i ty Nothing --- + CncCat (Yes ty) _ _ -> do + ---- let cat = ident2CFCat i c + ---- rs <- concat [rs | (c,rs) <- cf, ] + returnm i $ ICatCnc i ty [] ty --- + CncFun _ (Yes tr) _ -> do + rs <- return [] + returnm i $ IFunCnc i tr rs tr --- + ResOper (Yes ty) (Yes tr) -> returnm i $ IOper i ty tr + ResParam (Yes (ps,_)) -> do + ts <- allParamValues src (QC i c) + returnm i $ IParam i ps ts + ResValue (Yes (ty,_)) -> returnm i $ IValue i ty --- + + _ -> prtBad "nothing available for" i + lookInCan (i,m) = do + Bad "nothing available yet in canonical" + + returnm m i = return (i, pathOfModule st m) + + src = srcModules st + can = canModules st + ss = [(i,m) | (i,ModMod m) <- modules src] + cs = [(i,m) | (i,ModMod m) <- modules can] + cf = concatMap ruleGroupsOfCF $ map snd $ cfs st + +ownConstants :: BinTree Ident Info -> [Ident] +ownConstants = map fst . filter isOwn . tree2list where + isOwn (c,i) = case i of + AnyInd _ _ -> False + _ -> True + +missingLinCanonGrammar :: GFC.CanonGrammar -> String +missingLinCanonGrammar cgr = + unlines $ concat [prt_ c : missing js | (c,js) <- concretes] where + missing js = map ((" " ++) . prt_) $ filter (not . flip isInBinTree js) abstract + abstract = err (const []) (map fst . tree2list . jments) $ lookupModMod cgr absId + absId = maybe (zIdent "") id $ greatestAbstract cgr + concretes = [(cnc,jments mo) | + cnc <- allConcretes cgr absId, Ok mo <- [lookupModMod cgr cnc]] diff --git a/src-3.0/GF/UseGrammar/Linear.hs b/src-3.0/GF/UseGrammar/Linear.hs new file mode 100644 index 000000000..c9b94ccb0 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Linear.hs @@ -0,0 +1,292 @@ +---------------------------------------------------------------------- +-- | +-- Module : Linear +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/14 16:03:41 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.19 $ +-- +-- Linearization for canonical GF. AR 7\/6\/2003 +----------------------------------------------------------------------------- + +module GF.UseGrammar.Linear where + +import GF.Canon.GFC +import GF.Canon.AbsGFC +import qualified GF.Grammar.Abstract as A +import GF.Canon.MkGFC (rtQIdent) ---- +import GF.Infra.Ident +import GF.Grammar.PrGrammar +import GF.Canon.CMacros +import GF.Canon.Look +import GF.Grammar.LookAbs +import GF.Grammar.MMacros +import GF.Grammar.TypeCheck (annotate) ---- +import GF.Data.Str +import GF.Text.Text +----import TypeCheck -- to annotate + +import GF.Data.Operations +import GF.Data.Zipper +import qualified GF.Infra.Modules as M + +import Control.Monad +import Data.List (intersperse) + +-- Linearization for canonical GF. AR 7/6/2003 + +-- | The worker function: linearize a Tree, return +-- a record. Possibly mark subtrees. +-- +-- NB. Constants in trees are annotated by the name of the abstract module. +-- A concrete module name must be given to find (and choose) linearization rules. +-- +-- - If no marking is wanted, 'noMark' :: 'Marker'. +-- +-- - For xml marking, use 'markXML' :: 'Marker' +linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term +linearizeToRecord gr mk m = lin [] where + + lin ts t@(Tr (n,xs)) = errIn ("linearization of" +++ prt t) $ do + + let binds = A.bindsNode n + at = A.atomNode n + fmk = markSubtree mk n ts (A.isFocusNode n) + c <- A.val2cat $ A.valNode n + xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs + + r <- case at of + A.AtC f -> lookf c t f >>= comp xs' + A.AtI i -> return $ recInt i + A.AtL s -> return $ recS $ tK $ prt at + A.AtF i -> return $ recS $ tK $ prt at + A.AtV x -> lookCat c >>= comp [tK (prt_ at)] + A.AtM m -> lookCat c >>= comp [tK (prt_ at)] + + r' <- case r of -- to see stg in case the result is variants {} + FV [] -> lookCat c >>= comp [tK (prt_ t)] + _ -> return r + + return $ fmk $ mkBinds binds r' + + look = lookupLin gr . redirectIdent m . rtQIdent + comp = ccompute gr + mkBinds bs bdy = case bdy of + R fs -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs + FV rs -> FV $ map (mkBinds bs) rs + + recS t = R [Ass (L (identC "s")) t] ---- + + recInt i = R [ + Ass (L (identC "last")) (EInt (rem i 10)), + Ass (L (identC "s")) (tK $ show i), + Ass (L (identC "size")) (EInt (if i > 9 then 1 else 0)) + ] + + lookCat = return . errVal defLindef . look + ---- should always be given in the module + + -- to show missing linearization as term + lookf c t f = case look f of + Ok h -> return h + _ -> lookCat c >>= comp [tK (prt_ t)] + + +-- | thus the special case: +linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term +linearizeNoMark gr = linearizeToRecord gr noMark + +-- | expand tables in linearized term to full, normal-order tables +-- +-- NB expand from inside-out so that values are not looked up in copies of branches + +expandLinTables :: CanonGrammar -> Term -> Err Term +expandLinTables gr t = case t of + R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs] + T ty rs -> do + rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out + let t' = T ty $ map (uncurry Cas) rs' + vs <- alls ty + ps <- mapM term2patt vs + ts' <- mapM (comp . S t') $ vs + return $ T ty [Cas [p] t | (p,t) <- zip ps ts'] + V ty ts0 -> do + ts <- mapM exp ts0 -- expand from inside-out + vs <- alls ty + ps <- mapM term2patt vs + return $ T ty [Cas [p] t | (p,t) <- zip ps ts] + FV ts -> liftM FV $ mapM exp ts + _ -> composOp exp t + where + alls = allParamValues gr + exp = expandLinTables gr + comp = ccompute gr [] + +-- Do this for an entire grammar: + +unoptimizeCanon :: CanonGrammar -> CanonGrammar +unoptimizeCanon g@(M.MGrammar ms) = M.MGrammar $ map (unoptimizeCanonMod g) ms + +unoptimizeCanonMod :: CanonGrammar -> CanonModule -> CanonModule +unoptimizeCanonMod g = convMod where + convMod (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os defs)) = + (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os (mapTree convDef defs))) + convMod mm = mm + convDef (c,CncCat ty df pr) = (c,CncCat ty (convT df) (convT pr)) + convDef (f,CncFun c xs li pr) = (f,CncFun c xs (convT li) (convT pr)) + convDef cd = cd + convT = err error id . exp + -- a version of expandLinTables that does not destroy share optimization + exp t = case t of + R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs] + T ty rs@[Cas [_] _] -> do + rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out + let t' = T ty $ map (uncurry Cas) rs' + vs <- alls ty + ps <- mapM term2patt vs + ts' <- mapM (comp . S t') $ vs + return $ T ty [Cas [p] t | (p,t) <- zip ps ts'] + V ty ts0 -> do + ts <- mapM exp ts0 -- expand from inside-out + vs <- alls ty + ps <- mapM term2patt vs + return $ T ty [Cas [p] t | (p,t) <- zip ps ts] + FV ts -> liftM FV $ mapM exp ts + I _ -> comp t + _ -> composOp exp t + where + alls = allParamValues g + comp = ccompute g [] + + +-- | from records, one can get to records of tables of strings +rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]] +rec2strTables r = do + vs <- allLinValues r + mapM (mapPairsM (mapPairsM strsFromTerm)) vs + +-- | from these tables, one may want to extract the ones for the "s" label +strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]] +strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0] + +linLab0 :: Label +linLab0 = L (identC "s") + +-- | to get lists of token lists is easy +sTables2strs :: [[([Patt],[Str])]] -> [[Str]] +sTables2strs = map snd . concat + +-- | from this, to get a list of strings +strs2strings :: [[Str]] -> [String] +strs2strings = map unlex + +-- | this is just unwords; use an unlexer from Text to postprocess +unlex :: [Str] -> String +unlex = concat . map sstr . take 1 ---- + +-- | finally, a top-level function to get a string from an expression +linTree2string :: Marker -> CanonGrammar -> Ident -> A.Tree -> String +linTree2string mk gr m e = head $ linTree2strings mk gr m e -- never empty + +-- | you can also get many strings +linTree2strings :: Marker -> CanonGrammar -> Ident -> A.Tree -> [String] +linTree2strings mk gr m e = err return id $ do + t <- linearizeToRecord gr mk m e + r <- expandLinTables gr t + ts <- rec2strTables r + let ss = strs2strings $ sTables2strs $ strTables2sTables ts + ifNull (prtBad "empty linearization of" e) return ss -- thus never empty + +-- | argument is a Tree, value is a list of strs; needed in Parsing +allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str] +allLinsOfTree gr a e = err (singleton . str) id $ do + e' <- return e ---- annotateExp gr e + r <- linearizeNoMark gr a e' + r' <- expandLinTables gr r + ts <- rec2strTables r' + return $ concat $ sTables2strs $ strTables2sTables ts + +-- | the value is a list of structures arranged as records of tables of terms +allLinsAsRec :: CanonGrammar -> Ident -> A.Tree -> Err [[(Label,[([Patt],Term)])]] +allLinsAsRec gr c t = linearizeNoMark gr c t >>= expandLinTables gr >>= allLinValues + +-- | the value is a list of structures arranged as records of tables of strings +-- only taking into account string fields +-- True: sep. by /, False: sep by \n +allLinTables :: + Bool -> CanonGrammar ->Ident ->A.Tree ->Err [[(Label,[([Patt],[String])])]] +allLinTables slash gr c t = do + r' <- allLinsAsRec gr c t + mapM (mapM getS) r' + where + getS (lab,pss) = liftM (curry id lab) $ mapM gets pss + gets (ps,t) = liftM (curry id ps . cc . map str2strings) $ strsFromTerm t + cc = concat . intersperse [if slash then "/" else "\n"] + +-- | the value is a list of strings gathered from all fields + +allLinBranchFields :: CanonGrammar -> Ident -> A.Tree -> Err [String] +allLinBranchFields gr c trm = do + r <- linearizeNoMark gr c trm >>= expandLinTables gr + return [s | (_,t) <- allLinBranches r, s <- gets t] + where + gets t = concat [cc (map str2strings s) | Ok s <- [strsFromTerm t]] + cc = concat . intersperse ["/"] + +prLinTable :: Bool -> [[(Label,[([Patt],[String])])]] -> [String] +prLinTable pars = concatMap prOne . concat where + prOne (lab,pss) = (if pars then ((prt lab) :) else id) (map pr pss) ---- + pr (ps,ss) = (if pars then ((unwords (map prt_ ps) +++ ":") +++) + else id) (unwords ss) + +{- +-- the value is a list of strs +allLinStrings :: CanonGrammar -> Tree -> [Str] +allLinStrings gr ft = case allLinsAsStrs gr ft of + Ok ts -> map snd $ concat $ map snd $ concat ts + Bad s -> [str s] + +-- the value is a list of strs, not forgetting their arguments +allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]] +allLinsAsStrs gr ft = do + lpts <- allLinearizations gr ft + return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts + + +-- to a list of strings +linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String] +linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk + +-- to a list of token lists +linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]] +linearizeToStrss gr mk e = do + R rs <- linearizeToRecord gr mk e ---- + t <- lookupErr linLab0 [(r,s) | Ass r s <- rs] + return $ map strsFromTerm $ allInTable t +-} + +-- | the value is a list of strings, not forgetting their arguments +allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]] +allLinsOfFun gr f = do + t <- lookupLin gr f + allAllLinValues t --- all fields, not only s. 11/12/2005 + + +-- | returns printname if one exists; otherwise linearizes with metas +printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String +printOrLinearize gr c f@(m, d) = errVal (prt fq) $ + case lookupPrintname gr (CIQ c d) of + Ok t -> do + ss <- strsFromTerm t + let s = strs2strings [ss] + return $ ifNull (prt fq) head s + _ -> do + ty <- lookupFunType gr m d + f' <- ref2exp [] ty (A.QC m d) + tr <- annotate gr f' + return $ linTree2string noMark gr c tr + where + fq = CIQ m d diff --git a/src-3.0/GF/UseGrammar/MatchTerm.hs b/src-3.0/GF/UseGrammar/MatchTerm.hs new file mode 100644 index 000000000..9acffd44c --- /dev/null +++ b/src-3.0/GF/UseGrammar/MatchTerm.hs @@ -0,0 +1,50 @@ +---------------------------------------------------------------------- +-- | +-- Module : MatchTerm +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- +-- functions for matching with terms. AR 16/3/2006 +----------------------------------------------------------------------------- + +module GF.UseGrammar.MatchTerm where + +import GF.Data.Operations +import GF.Data.Zipper + +import GF.Grammar.Grammar +import GF.Grammar.PrGrammar +import GF.Infra.Ident +import GF.Grammar.Values +import GF.Grammar.Macros +import GF.Grammar.MMacros + +import Control.Monad +import Data.List + +-- test if a term has duplicated idents, either any or just atoms + +hasDupIdent, hasDupAtom :: Exp -> Bool +hasDupIdent = (>1) . maximum . map length . group . sort . allConstants True +hasDupAtom = (>1) . maximum . map length . group . sort . allConstants False + +-- test if a certain ident occurs in term + +grepIdent :: Ident -> Exp -> Bool +grepIdent c = elem c . allConstants True + +-- form the list of all constants, optionally ignoring all but atoms + +allConstants :: Bool -> Exp -> [Ident] +allConstants alsoApp = err (const []) snd . flip appSTM [] . collect where + collect e = case e of + Q _ c -> add c e + QC _ c -> add c e + Cn c -> add c e + App f a | not alsoApp -> case f of + App g b -> collect b >> collect a + _ -> collect a + _ -> composOp collect e + add c e = updateSTM (c:) >> return e diff --git a/src-3.0/GF/UseGrammar/Morphology.hs b/src-3.0/GF/UseGrammar/Morphology.hs new file mode 100644 index 000000000..3aeb08dc7 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Morphology.hs @@ -0,0 +1,140 @@ +---------------------------------------------------------------------- +-- | +-- Module : Morphology +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:49 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- Morphological analyser constructed from a GF grammar. +-- +-- we first found the binary search tree sorted by word forms more efficient +-- than a trie, at least for grammars with 7000 word forms +-- (18\/11\/2003) but this may change since we have to use a trie +-- for decompositions and also want to use it in the parser +----------------------------------------------------------------------------- + +module GF.UseGrammar.Morphology where + +import GF.Canon.AbsGFC +import GF.Canon.GFC +import GF.Grammar.PrGrammar +import GF.Canon.CMacros +import GF.Canon.Look +import GF.Grammar.LookAbs +import GF.Infra.Ident +import qualified GF.Grammar.Macros as M +import GF.UseGrammar.Linear + +import GF.Data.Operations +import GF.Data.Glue + +import Data.Char +import Data.List (sortBy, intersperse) +import Control.Monad (liftM) +import GF.Data.Trie2 + +-- construct a morphological analyser from a GF grammar. AR 11/4/2001 + +-- we first found the binary search tree sorted by word forms more efficient +-- than a trie, at least for grammars with 7000 word forms +-- (18\/11\/2003) but this may change since we have to use a trie +-- for decompositions and also want to use it in the parser + +type Morpho = Trie Char String + +emptyMorpho :: Morpho +emptyMorpho = emptyTrie + +appMorpho :: Morpho -> String -> (String,[String]) +appMorpho = appMorphoOnly +---- add lookup for literals + +-- without literals +appMorphoOnly :: Morpho -> String -> (String,[String]) +appMorphoOnly m s = trieLookup m s + +-- recognize word, exluding literals +isKnownWord :: Morpho -> String -> Bool +isKnownWord mo = not . null . snd . appMorphoOnly mo + +mkMorpho :: CanonGrammar -> Ident -> Morpho +mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where + + comp = ccompute gr [] -- to undo 'values' optimization + + mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun + mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun + + -- gather forms of lexical items + allLins fun@(m,f) = errVal [] $ do + ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues + ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts + return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] + prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)]) + + -- gather syncategorematic words + allSyns fun@(m,f) = errVal [] $ do + tss <- allLinsOfFun gr (CIQ a f) + let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] + return $ concat $ map wordsInTerm ss + prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f]) + + -- all words, Left from lexical rules and Right syncategorematic + allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where + allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]] + lexRole t = case M.typeForm t of + Ok ([],_,_) -> Left + _ -> Right + +-- printing full-form lexicon and results + +prMorpho :: Morpho -> String +prMorpho = unlines . map prMorphoAnalysis . collapse + +prMorphoAnalysis :: (String,[String]) -> String +prMorphoAnalysis (w,fs0) = + let fs = filter (not . null) fs0 in + if null fs then w ++++ "*" else unlines (w:fs) + +prMorphoAnalysisShort :: (String,[String]) -> String +prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where + w' = if null fs then w +++ "*" else "" + +tagPrt :: Print a => (a,a) -> String +tagPrt (m,c) = "+" ++ prt c --- module name + +-- | print all words recognized +allMorphoWords :: Morpho -> [String] +allMorphoWords = map fst . collapse + +-- analyse running text and show results either in short form or on separate lines + +-- | analyse running text and show just the word, with "*" if not found +morphoTextStatus :: Morpho -> String -> String +morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where + prMark (w,fs) = if null fs then "*" +++ w else w + +-- | analyse running text and show results in short form, one word per line +morphoTextShort :: Morpho -> String -> String +morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words + +-- | analyse running text and show results on separate lines +morphoText :: Morpho -> String -> String +morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words + +-- format used in the Italian Verb Engine +prFullForm :: Morpho -> String +prFullForm = unlines . map prOne . collapse where + prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) + +-- using Huet's unglueing method to find word boundaries +---- it would be much better to use a trie also for morphological analysis, +---- so this is for the sake of experiment +---- Moreover, we should specify the cases in which this happens - not all words + +decomposeWords :: Morpho -> String -> [String] +decomposeWords mo s = errVal (words s) $ decomposeSimple mo s diff --git a/src-3.0/GF/UseGrammar/Paraphrases.hs b/src-3.0/GF/UseGrammar/Paraphrases.hs new file mode 100644 index 000000000..d04f22aa6 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Paraphrases.hs @@ -0,0 +1,70 @@ +---------------------------------------------------------------------- +-- | +-- Module : Paraphrases +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:49 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- paraphrases of GF terms. AR 6\/10\/1998 -- 24\/9\/1999 -- 5\/7\/2000 -- 5\/6\/2002 +-- +-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL) +-- +-- thus inherited from the old GF. Incomplete and inefficient... +----------------------------------------------------------------------------- + +module GF.UseGrammar.Paraphrases (mkParaphrases) where + +import GF.Grammar.Abstract +import GF.Grammar.PrGrammar +import GF.Grammar.LookAbs +import GF.Grammar.AbsCompute + +import GF.Data.Operations + +import Data.List (nub) + +-- paraphrases of GF terms. AR 6/10/1998 -- 24/9/1999 -- 5/7/2000 -- 5/6/2002 +-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL) +-- thus inherited from the old GF. Incomplete and inefficient... + +mkParaphrases :: GFCGrammar -> Term -> [Term] +mkParaphrases st = nub . map (beta []) . paraphrases (allDefs st) + +type Definition = (Fun,Term) + +paraphrases :: [Definition] -> Term -> [Term] +paraphrases th t = + paraImmed th t ++ +--- paraMatch th t ++ + case t of + App c a -> [App d b | d <- paraphrases th c, b <- paraphrases th a] + Abs x b -> [Abs x d | d <- paraphrases th b] + c -> [] + ++ [t] + +paraImmed :: [Definition] -> Term -> [Term] +paraImmed defs t = + [Q m f | ((m,f), u) <- defs, t == u] ++ --- eqTerm + case t of + ---- Cn c -> [u | (f, u) <- defs, eqStrIdent f c] + _ -> [] + +{- --- +paraMatch :: [Definition] -> Trm -> [Trm] +paraMatch th@defs t = + [mkApp (Cn f) xx | (PC f zz, u) <- defs, + let (fs,sn) = fullApp u, fs == h, length sn == length zz] ++ + case findAMatch defs t of + Ok (g,b) -> [substTerm [] g b] + _ -> [] + where + (h,xx) = fullApp t + fullApp c = case c of + App f a -> (f', a' ++ [a]) where (f',a') = fullApp f + c -> (c,[]) + +-} diff --git a/src-3.0/GF/UseGrammar/Parsing.hs b/src-3.0/GF/UseGrammar/Parsing.hs new file mode 100644 index 000000000..2ca057410 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Parsing.hs @@ -0,0 +1,177 @@ +---------------------------------------------------------------------- +-- | +-- Module : Parsing +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/06/02 10:23:52 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.25 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.UseGrammar.Parsing where + +import GF.Infra.CheckM +import qualified GF.Canon.AbsGFC as C +import GF.Canon.GFC +import GF.Canon.MkGFC (trExp) ---- +import GF.Canon.CMacros +import GF.Grammar.MMacros (refreshMetas) +import GF.UseGrammar.Linear +import GF.Data.Str +import GF.CF.CF +import GF.CF.CFIdent +import GF.Infra.Ident +import GF.Grammar.TypeCheck +import GF.Grammar.Values +--import CFMethod +import GF.UseGrammar.Tokenize +import GF.UseGrammar.Morphology (isKnownWord) +import GF.CF.Profile +import GF.Infra.Option +import GF.UseGrammar.Custom +import GF.Compile.ShellState + +import GF.CF.PPrCF (prCFTree) +-- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE +import qualified GF.Parsing.GFC as New + +import GF.Data.Operations + +import Data.List (nub,sortBy) +import Data.Char (toLower) +import Control.Monad (liftM) + +-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002 + +parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree] +parseString os sg cat = liftM fst . parseStringMsg os sg cat + +parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String) +parseStringMsg os sg cat s = do + case checkStart $ parseStringC os sg cat s of + Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss) + Bad s -> return ([],s) + +parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] +parseStringC opts0 sg cat s + | oElem (iOpt "old") opts0 || + (not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do + let opts = unionOptions opts0 $ stateOptions sg + cf = stateCF sg + gr = stateGrammarST sg + cn = cncId sg + toks = customOrDefault opts useTokenizer customTokenizer sg s + parser = customOrDefault opts useParser customParser sg cat + if oElem (iOpt "cut") opts + then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks + else mapM (tokens2trms opts sg cn parser) toks >>= return . concat + +---- | or [oElem p opts0 | +---- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do + + | otherwise = do + let opts = unionOptions opts0 $ stateOptions sg + algorithm | oElem newCParser opts0 = "c" + | oElem newMParser opts0 = "m" + | oElem newFParser opts0 = "f" + | otherwise = "f" -- default algorithm: FCFG + strategy = maybe "bottomup" id $ getOptVal opts useParser + -- -parser=bottomup/topdown + tokenizer = customOrDefault opts useTokenizer customTokenizer sg + toks = case tokenizer s of + t:_ -> t + _ -> [] ---- no support for undet. tok. + unknowns = + [w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w] + where + unk w = not $ isKnownWord (morpho sg) w + uncap (c:cs) = toLower c : cs + uncap s = s + + case unknowns of + _:_ | oElem (iOpt "trynextlang") opts -> return [] + _:_ -> fail $ "Unknown words:" +++ unwords unknowns + _ -> do + + ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks + ts' <- checkErr $ + allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts + return $ optIntOrAll opts flagNumber ts' + + +tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] +tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info + where result = parser toks + info = snd result + trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2)) + +trees2trms :: + Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree] +trees2trms opts sg cn as ts0 info = do + let s = unwords $ map prCFTok as + ts <- case () of + _ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return [] + _ | raw -> do + ts1 <- return (map cf2trm0 ts0) ----- should not need annot + checks [ + mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails + ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return [] + ] + _ -> do + let num = optIntOrN opts flagRawtrees 999999 + let (ts01,rest) = splitAt num ts0 + if null rest then return () + else raise ("Warning: only" +++ show num +++ "raw parses out of" +++ + show (length ts0) +++ + "considered; use -rawtrees=<Int> to see more" + ) + (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01 + if null ts1 then raise ss else return () + ts2 <- checkErr $ + allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ---- + if forgive then return ts2 else do + let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2] + ps = [t | (t,ss) <- tsss, + any (compatToks as) (map str2cftoks ss)] + if null ps + then raise $ "Failure in morphology." ++ + if verb + then "\nPossible corrections: " +++++ + unlines (nub (map sstr (concatMap snd tsss))) + else "" + else return ps + if verb + then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info + else return () + + return $ optIntOrAll opts flagNumber $ nub ts + where + gr = stateGrammarST sg + + raw = oElem rawParse opts + verb = oElem beVerbose opts + forgive = oElem forgiveParse opts + +---- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a] + +unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of + [] -> "where all words are known" + us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals + where + terminals = map TS $ stateGrammarWords sg + noMatch t = all (not . compatTok t) terminals + + +--- too much type checking in building term info? return FullTerm to save work? + +-- | raw parsing: so simple it is for a context-free CF grammar +cf2trm0 :: CFTree -> C.Exp +cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees) + where + cffun2trm (CFFun (fun,_)) = fun + mkApp = foldl C.EApp + mkAppAtom a = mkApp (C.EAtom a) diff --git a/src-3.0/GF/UseGrammar/Randomized.hs b/src-3.0/GF/UseGrammar/Randomized.hs new file mode 100644 index 000000000..c1c77edb2 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Randomized.hs @@ -0,0 +1,66 @@ +---------------------------------------------------------------------- +-- | +-- Module : Randomized +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:51 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- random generation and refinement. AR 22\/8\/2001. +-- implemented as sequence of refinement menu selecsions, encoded as integers +----------------------------------------------------------------------------- + +module GF.UseGrammar.Randomized where + +import GF.Grammar.Abstract +import GF.UseGrammar.Editing + +import GF.Data.Operations +import GF.Data.Zipper + +--- import Arch (myStdGen) --- circular for hbc +import System.Random --- (mkStdGen, StdGen, randoms) --- bad import for hbc + +-- random generation and refinement. AR 22/8/2001 +-- implemented as sequence of refinement menu selecsions, encoded as integers + +myStdGen :: Int -> StdGen +myStdGen = mkStdGen --- + +-- | build one random tree; use mx to prevent infinite search +mkRandomTree :: StdGen -> Int -> CGrammar -> Either Cat Fun -> Err Tree +mkRandomTree gen mx gr cat = mkTreeFromInts (take mx (randoms gen)) gr cat + +refineRandom :: StdGen -> Int -> CGrammar -> Action +refineRandom gen mx = mkStateFromInts $ take mx $ map abs (randoms gen) + +-- | build a tree from a list of integers +mkTreeFromInts :: [Int] -> CGrammar -> Either Cat Fun -> Err Tree +mkTreeFromInts ints gr catfun = do + st0 <- either (\cat -> newCat gr cat initState) + (\fun -> newFun gr fun initState) + catfun + state <- mkStateFromInts ints gr st0 + return $ loc2tree state + +mkStateFromInts :: [Int] -> CGrammar -> Action +mkStateFromInts ints gr z = mkRandomState ints z >>= reCheckState gr where + mkRandomState [] state = do + testErr (isCompleteState state) "not completed" + return state + mkRandomState (n:ns) state = do + let refs = refinementsState gr state + refs0 = map (not . snd . snd) refs + testErr (not (null refs0)) $ "no nonrecursive refinements available for" +++ + prt (actVal state) + (ref,_) <- (refs !? (n `mod` (length refs))) + state1 <- refineWithAtom False gr ref state + if isCompleteState state1 + then return state1 + else do + state2 <- goNextMeta state1 + mkRandomState ns state2 + diff --git a/src-3.0/GF/UseGrammar/Session.hs b/src-3.0/GF/UseGrammar/Session.hs new file mode 100644 index 000000000..e54d0e3fb --- /dev/null +++ b/src-3.0/GF/UseGrammar/Session.hs @@ -0,0 +1,181 @@ +---------------------------------------------------------------------- +-- | +-- Module : Session +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/17 15:13:55 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.12 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.UseGrammar.Session where + +import GF.Grammar.Abstract +import GF.Infra.Option +import GF.UseGrammar.Custom +import GF.UseGrammar.Editing +import GF.Compile.ShellState ---- grammar + +import GF.Data.Operations +import GF.Data.Zipper (keepPosition) --- + +-- First version 8/2001. Adapted to GFC with modules 19/6/2003. +-- Nothing had to be changed, which is a sign of good modularity. + +-- keep these abstract + +-- | 'Exp'-list: candidate refinements,clipboard +type SState = [(State,([Exp],[Clip]),SInfo)] + +-- | 'String' is message, 'Int' is the view +type SInfo = ([String],(Int,Options)) + +initSState :: SState +initSState = [(initState, ([],[]), (["Select 'New' category to start"],(0,noOptions)))] + -- instead of empty + +type Clip = Tree ---- (Exp,Type) + +-- | (peb): Something wrong with this definition?? +-- Shouldn't the result type be 'SInfo'? +-- +-- > okInfo :: Int -> SInfo == ([String], (Int, Options)) +okInfo :: n -> ([s], (n, Bool)) +okInfo n = ([],(n,True)) + +stateSState :: SState -> State +candsSState :: SState -> [Exp] +clipSState :: SState -> [Clip] +infoSState :: SState -> SInfo +msgSState :: SState -> [String] +viewSState :: SState -> Int +optsSState :: SState -> Options + +stateSState ((s,_,_):_) = s +candsSState ((_,(ts,_),_):_)= ts +clipSState ((_,(_,ts),_):_)= ts +infoSState ((_,_,i):_) = i +msgSState ((_,_,(m,_)):_) = m +viewSState ((_,_,(_,(v,_))):_) = v +optsSState ((_,_,(_,(_,o))):_) = o + +treeSState :: SState -> Tree +treeSState = actTree . stateSState + + +-- | from state to state +type ECommand = SState -> SState + +-- * elementary commands + +-- ** change state, drop cands, drop message, preserve options + +changeState :: State -> ECommand +changeState s ss = changeMsg [] $ (s,([],clipSState ss),infoSState ss) : ss + +changeCands :: [Exp] -> ECommand +changeCands ts ss@((s,(_,cb),(_,b)):_) = (s,(ts,cb),(candInfo ts,b)) : ss + +addtoClip :: Clip -> ECommand +addtoClip t ss@((s,(ts,cb),(i,b)):_) = (s,(ts,t:cb),(i,b)) : ss + +removeClip :: Int -> ECommand +removeClip n ss@((s,(ts,cb),(i,b)):_) = (s,(ts, drop n cb),(i,b)) : ss + +changeMsg :: [String] -> ECommand +changeMsg m ((s,ts,(_,b)):ss) = (s,ts,(m,b)) : ss -- just change message +changeMsg m _ = (s,ts,(m,b)) : [] where [(s,ts,(_,b))] = initSState + +changeView :: ECommand +changeView ((s,ts,(m,(v,b))):ss) = (s,ts,(m,(v+1,b))) : ss -- toggle view + +withMsg :: [String] -> ECommand -> ECommand +withMsg m c = changeMsg m . c + +changeStOptions :: (Options -> Options) -> ECommand +changeStOptions f ((s,ts,(m,(v,o))):ss) = (s,ts,(m,(v, f o))) : ss + +noNeedForMsg :: ECommand +noNeedForMsg = changeMsg [] -- everything's all right: no message + +candInfo :: [Exp] -> [String] +candInfo ts = case length ts of + 0 -> ["no acceptable alternative"] + 1 -> ["just one acceptable alternative"] + n -> [show n +++ "alternatives to select"] + +-- * keep SState abstract from this on + +-- ** editing commands + +action2command :: Action -> ECommand +action2command act state = case act (stateSState state) of + Ok s -> changeState s state + Bad m -> changeMsg [m] state + +action2commandNext :: Action -> ECommand -- move to next meta after execution +action2commandNext act = action2command (\s -> act s >>= goNextMetaIfCan) + +action2commandKeep :: Action -> ECommand -- keep old position after execution +action2commandKeep act = action2command (\s -> keepPosition act s) + +undoCommand :: Int -> ECommand +undoCommand n ss = + let k = length ss in + if k < n + then changeMsg ["cannot go all the way back"] [last ss] + else changeMsg ["successful undo"] (drop n ss) + +selectCand :: CGrammar -> Int -> ECommand +selectCand gr i state = err (\m -> changeMsg [m] state) id $ do + exp <- candsSState state !? i + let s = stateSState state + tree <- annotateInState gr exp s + return $ case replaceSubTree tree s of + Ok st' -> changeState st' state + Bad s -> changeMsg [s] state + +refineByExps :: Bool -> CGrammar -> [Exp] -> ECommand +refineByExps der gr trees = case trees of + [t] -> action2commandNext (refineWithExpTC der gr t) + _ -> changeCands trees + +refineByTrees :: Bool -> CGrammar -> [Tree] -> ECommand +refineByTrees der gr trees = case trees of + [t] -> action2commandNext (refineOrReplaceWithTree der gr t) + _ -> changeCands $ map tree2exp trees + +replaceByTrees :: CGrammar -> [Exp] -> ECommand +replaceByTrees gr trees = case trees of + [t] -> action2commandNext (\s -> + annotateExpInState gr t s >>= flip replaceSubTree s) + _ -> changeCands trees + +replaceByEditCommand :: StateGrammar -> String -> ECommand +replaceByEditCommand gr co = + action2commandKeep $ + maybe return ($ gr) $ + lookupCustom customEditCommand (strCI co) + +replaceByTermCommand :: Bool -> StateGrammar -> String -> Tree -> ECommand ---- +replaceByTermCommand der gr co exp = + let g = grammar gr in + refineByTrees der g $ maybe [exp] (\f -> f gr exp) $ + lookupCustom customTermCommand (strCI co) + +possClipsSState :: StateGrammar -> SState -> [(Int,Clip)] +possClipsSState gr s = filter poss $ zip [0..] (clipSState s) + where + poss = possibleTreeVal cgr st . snd + st = stateSState s + cgr = grammar gr + +getNumberedClip :: Int -> SState -> Err Clip +getNumberedClip i s = if length cs > i then return (cs !! i) + else Bad "not enough clips" + where + cs = clipSState s diff --git a/src-3.0/GF/UseGrammar/Statistics.hs b/src-3.0/GF/UseGrammar/Statistics.hs new file mode 100644 index 000000000..46e4fcc3b --- /dev/null +++ b/src-3.0/GF/UseGrammar/Statistics.hs @@ -0,0 +1,44 @@ +---------------------------------------------------------------------- +-- | +-- Module : Statistics +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/04 11:45:38 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.1 $ +-- +-- statistics on canonical grammar: amounts of generated code +-- AR 4\/9\/2005. +-- uses canonical grammar +----------------------------------------------------------------------------- + +module GF.UseGrammar.Statistics (prStatistics) where + +import GF.Infra.Modules +import GF.Infra.Option +import GF.Grammar.PrGrammar +import GF.Canon.GFC +import GF.Canon.MkGFC + +import GF.Data.Operations + +import Data.List (sortBy) + +-- | the top level function +prStatistics :: CanonGrammar -> String +prStatistics can = unlines $ [ + show (length mods) ++ "\t\t modules", + show chars ++ "\t\t gfc size", + "", + "Top 40 definitions" + ] ++ + [show d ++ "\t\t " ++ f | (d,f) <- tops] + where + tops = take 40 $ reverse $ sortBy (\ (i,_) (j,_) -> compare i j) defs + defs = [(length (prt (info2def j)), name m j) | (m,j) <- infos] + infos = [(m,j) | (m,ModMod mo) <- mods, j <- tree2list (jments mo)] + name m (f,_) = prt m ++ "." ++ prt f + mods = modules can + chars = length $ prCanon can diff --git a/src-3.0/GF/UseGrammar/Tokenize.hs b/src-3.0/GF/UseGrammar/Tokenize.hs new file mode 100644 index 000000000..9f1ab5449 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Tokenize.hs @@ -0,0 +1,222 @@ +---------------------------------------------------------------------- +-- | +-- Module : Tokenize +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/29 13:20:08 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.14 $ +-- +-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002. +-- an entry for each is included in 'Custom.customTokenizer' +----------------------------------------------------------------------------- + +module GF.UseGrammar.Tokenize ( tokWords, + tokLits, + tokVars, + lexHaskell, + lexHaskellLiteral, + lexHaskellVar, + lexText, + lexTextVar, + lexC2M, lexC2M', + lexTextLiteral, + lexIgnore, + wordsLits + ) where + +import GF.Data.Operations +---- import UseGrammar (isLiteral,identC) +import GF.CF.CFIdent + +import Data.Char + +-- lexers = tokenizers, to prepare input for GF grammars. AR 4/1/2002 +-- an entry for each is included in Custom.customTokenizer + +-- | just words +tokWords :: String -> [CFTok] +tokWords = map tS . words + +tokLits :: String -> [CFTok] +tokLits = map mkCFTok . mergeStr . wordsLits where + mergeStr ss = case ss of + w@(c:cs):rest | elem c "\'\"" && c /= last w -> getStr [w] rest + w :rest -> w : mergeStr rest + [] -> [] + getStr v ss = case ss of + w@(_:_):rest | elem (last w) "\'\"" -> (unwords (reverse (w:v))) : mergeStr rest + w :rest -> getStr (w:v) rest + [] -> reverse v + +tokVars :: String -> [CFTok] +tokVars = map mkCFTokVar . wordsLits + +isFloat s = case s of + c:cs | isDigit c -> isFloat cs + '.':cs@(_:_) -> all isDigit cs + _ -> False + +isString s = case s of + c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs + _ -> False + + +mkCFTok :: String -> CFTok +mkCFTok s = case s of + '"' :cs@(_:_) | last cs == '"' -> tL $ init cs + '\'':cs@(_:_) | last cs == '\'' -> tL $ init cs --- 's Gravenhage + _:_ | isFloat s -> tF s + _:_ | all isDigit s -> tI s + _ -> tS s + +mkCFTokVar :: String -> CFTok +mkCFTokVar s = case s of + '?':_:_ -> tM s --- "?" --- compat with prCF + 'x':'_':_ -> tV s + 'x':[] -> tV s + '$':xs@(_:_) -> if last s == '$' then tV (init xs) else tS s + _ -> tS s + +mkTokVars :: (String -> [CFTok]) -> String -> [CFTok] +mkTokVars tok = map tv . tok where + tv (TS s) = mkCFTokVar s + tv t = t + +mkLit :: String -> CFTok +mkLit s + | isFloat s = tF s + | all isDigit s = tI s + | otherwise = tL s + +-- obsolete +mkTL :: String -> CFTok +mkTL s + | isFloat s = tF s + | all isDigit s = tI s + | otherwise = tL ("'" ++ s ++ "'") + + +-- | Haskell lexer, usable for much code +lexHaskell :: String -> [CFTok] +lexHaskell ss = case lex ss of + [(w@(_:_),ws)] -> tS w : lexHaskell ws + _ -> [] + +-- | somewhat shaky text lexer +lexText :: String -> [CFTok] +lexText = uncap . lx where + + lx s = case s of + '?':'?':cs -> tS "??" : lx cs + p : cs | isMPunct p -> tS [p] : uncap (lx cs) + p : cs | isPunct p -> tS [p] : lx cs + s : cs | isSpace s -> lx cs + _ : _ -> getWord s + _ -> [] + + getWord s = tS w : lx ws where (w,ws) = span isNotSpec s + isMPunct c = elem c ".!?" + isPunct c = elem c ",:;()\"" + isNotSpec c = not (isMPunct c || isPunct c || isSpace c) + uncap (TS (c:cs) : ws) = tC (c:cs) : ws + uncap s = s + +-- | lexer for C--, a mini variant of C +lexC2M :: String -> [CFTok] +lexC2M = lexC2M' False + +lexC2M' :: Bool -> String -> [CFTok] +lexC2M' isHigherOrder s = case s of + '#':cs -> lexC $ dropWhile (/='\n') cs + '/':'*':cs -> lexC $ dropComment cs + c:cs | isSpace c -> lexC cs + c:cs | isAlpha c -> getId s + c:cs | isDigit c -> getLit s + c:d:cs | isSymb [c,d] -> tS [c,d] : lexC cs + c:cs | isSymb [c] -> tS [c] : lexC cs + _ -> [] --- covers end of file and unknown characters + where + lexC = lexC2M' isHigherOrder + getId s = mkT i : lexC cs where (i,cs) = span isIdChar s + getLit s = tI i : lexC cs where (i,cs) = span isDigit s ---- Float! + isIdChar c = isAlpha c || isDigit c || elem c "'_" + isSymb = reservedAnsiCSymbol + dropComment s = case s of + '*':'/':cs -> cs + _:cs -> dropComment cs + _ -> [] + mkT i = if (isRes i) then (tS i) else + if isHigherOrder then (tV i) else (tL ("'" ++ i ++ "'")) + isRes = reservedAnsiC + + +reservedAnsiCSymbol s = case lookupTree show s ansiCtree of + Ok True -> True + _ -> False + +reservedAnsiC s = case lookupTree show s ansiCtree of + Ok False -> True + _ -> False + +-- | for an efficient lexer: precompile this! +ansiCtree = buildTree $ [(s,True) | s <- reservedAnsiCSymbols] ++ + [(s,False) | s <- reservedAnsiCWords] + +reservedAnsiCSymbols = words $ + "<<= >>= << >> ++ -- == <= >= *= += -= %= /= &= ^= |= " ++ + "^ { } = , ; + * - ( ) < > & % ! ~" + +reservedAnsiCWords = words $ + "auto break case char const continue default " ++ + "do double else enum extern float for goto if int " ++ + "long register return short signed sizeof static struct switch typedef " ++ + "union unsigned void volatile while " ++ + "main printin putchar" --- these are not ansi-C + +-- | turn unknown tokens into string literals; not recursively for literals 123, 'foo' +unknown2string :: (String -> Bool) -> [CFTok] -> [CFTok] +unknown2string isKnown = map mkOne where + mkOne t@(TS s) + | isKnown s = t + | isFloat s = tF s + | all isDigit s = tI s + | otherwise = tL s + mkOne t@(TC s) = if isKnown s then t else mkLit s + mkOne t = t + +unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok] +unknown2var isKnown = map mkOne where + mkOne t@(TS "??") = if isKnown "??" then t else tM "??" + mkOne t@(TS s) + | isKnown s = t + | isFloat s = tF s + | isString s = tL (init (tail s)) + | all isDigit s = tI s + | otherwise = tV s + mkOne t@(TC s) = if isKnown s then t else tV s + mkOne t = t + +lexTextLiteral, lexHaskellLiteral, lexHaskellVar :: (String -> Bool) -> String -> [CFTok] + +lexTextLiteral isKnown = unknown2string (eitherUpper isKnown) . lexText +lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell + +lexHaskellVar isKnown = unknown2var isKnown . lexHaskell +lexTextVar isKnown = unknown2var (eitherUpper isKnown) . lexText + + +eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs) +eitherUpper isKnown w = isKnown w + +-- ignore unknown tokens (e.g. keyword spotting) + +lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok] +lexIgnore isKnown = concatMap mkOne where + mkOne t@(TS s) + | isKnown s = [t] + | otherwise = [] + mkOne t = [t] + diff --git a/src-3.0/GF/UseGrammar/Transfer.hs b/src-3.0/GF/UseGrammar/Transfer.hs new file mode 100644 index 000000000..5d62f4385 --- /dev/null +++ b/src-3.0/GF/UseGrammar/Transfer.hs @@ -0,0 +1,79 @@ +---------------------------------------------------------------------- +-- | +-- Module : Transfer +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:53 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.5 $ +-- +-- linearize, parse, etc, by transfer. AR 9\/10\/2003 +----------------------------------------------------------------------------- + +module GF.UseGrammar.Transfer where + +import GF.Grammar.Grammar +import GF.Grammar.Values +import GF.Grammar.AbsCompute +import qualified GF.Canon.GFC as GFC +import GF.Grammar.LookAbs +import GF.Grammar.MMacros +import GF.Grammar.Macros +import GF.Grammar.PrGrammar +import GF.Grammar.TypeCheck + +import GF.Infra.Ident +import GF.Data.Operations + +import qualified Transfer.Core.Abs as T + +import Control.Monad + + +-- transfer is done in T.Exp - we only need these conversions. + +exp2core :: Ident -> Exp -> T.Exp +exp2core f = T.EApp (T.EVar (var f)) . exp2c where + exp2c e = case e of + App f a -> T.EApp (exp2c f) (exp2c a) + Abs x b -> T.EAbs (T.PVVar (var x)) (exp2c b) ---- should be syntactic abstr + Q _ c -> T.EVar (var c) + QC _ c -> T.EVar (var c) + K s -> T.EStr s + EInt i -> T.EInteger $ toInteger i + Meta m -> T.EMeta (T.TMeta (prt m)) ---- which meta symbol? + Vr x -> T.EVar (var x) ---- should be syntactic var + + var x = T.CIdent $ prt x + +core2exp :: T.Exp -> Exp +core2exp e = case e of + T.EApp f a -> App (core2exp f) (core2exp a) + T.EAbs (T.PVVar x) b -> Abs (var x) (core2exp b) ---- only from syntactic abstr + T.EVar c -> Vr (var c) -- GF annotates to Q or QC + T.EStr s -> K s + T.EInteger i -> EInt $ fromInteger i + T.EMeta _ -> uExp -- meta symbol 0, refreshed by GF + where + var :: T.CIdent -> Ident + var (T.CIdent x) = zIdent x + + + +-- The following are now obsolete (30/11/2005) +-- linearize, parse, etc, by transfer. AR 9/10/2003 + +doTransfer :: GFC.CanonGrammar -> Ident -> Tree -> Err Tree +doTransfer gr tra t = do + cat <- liftM snd $ val2cat $ valTree t + f <- lookupTransfer gr tra cat + e <- compute gr $ App f $ tree2exp t + annotate gr e + +useByTransfer :: (Tree -> Err a) -> GFC.CanonGrammar -> Ident -> (Tree -> Err a) +useByTransfer lin gr tra t = doTransfer gr tra t >>= lin + +mkByTransfer :: (a -> Err [Tree]) -> GFC.CanonGrammar -> Ident -> (a -> Err [Tree]) +mkByTransfer parse gr tra s = parse s >>= mapM (doTransfer gr tra) diff --git a/src-3.0/GF/UseGrammar/TreeSelections.hs b/src-3.0/GF/UseGrammar/TreeSelections.hs new file mode 100644 index 000000000..9bf2711be --- /dev/null +++ b/src-3.0/GF/UseGrammar/TreeSelections.hs @@ -0,0 +1,77 @@ +---------------------------------------------------------------------- +-- | +-- Module : TreeSelections +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- choose shallowest trees, and remove an overload resolution prefix +----------------------------------------------------------------------------- + +module GF.UseGrammar.TreeSelections ( + + getOverloadResults, smallestTrs, sizeTr, depthTr + + ) where + +import GF.Grammar.Abstract +import GF.Grammar.Macros + +import GF.Data.Operations +import GF.Data.Zipper +import Data.List + +-- AR 2/7/2007 +-- The top-level function takes a set of trees (typically parses) +-- and returns the list of those trees that have the minimum size. +-- In addition, the overload prefix "ovrld123_", is removed +-- from each constructor in which it appears. This is used for +-- showing the library API constructors in a parsable grammar. +-- TODO: access the generic functions smallestTrs, sizeTr, depthTr from shell + +getOverloadResults :: [Tree] -> [Tree] +getOverloadResults = smallestTrs sizeTr . map (mkOverload "ovrld") + +-- NB: this does not always give the desired result, since +-- some genuine alternatives may be deeper: now we will exclude the +-- latter of +-- +-- mkCl this_NP love_V2 (mkNP that_NP here_Adv) +-- mkCl this_NP (mkVP (mkVP love_V2 that_NP) here_Adv) +-- +-- A perfect method would know the definitional equivalences of constructors. +-- +-- Notice also that size is a better measure than depth, because: +-- 1. Global depth does not exclude the latter of +-- +-- mkCl (mkNP he_Pron) love_V2 that_NP +-- mkCl (mkNP he_Pron) (mkVP love_V2 that_NP) +-- +-- 2. Length is needed to exclude the latter of +-- +-- mkS (mkCl (mkNP he_Pron) love_V2 that_NP) +-- mkS presentTense (mkCl (mkNP he_Pron) love_V2 that_NP) +-- + +smallestTrs :: (Tr a -> Int) -> [Tr a] -> [Tr a] +smallestTrs size ts = map fst $ filter ((==mx) . snd) tds where + tds = [(t, size t) | t <- ts] + mx = minimum $ map snd tds + +depthTr :: Tr a -> Int +depthTr (Tr (_, ts)) = case ts of + [] -> 1 + _ -> 1 + (maximum $ map depthTr ts) + +sizeTr :: Tr a -> Int +sizeTr (Tr (_, ts)) = 1 + sum (map sizeTr ts) + +-- remove from each constant a prefix starting with "pref", up to first "_" +-- example format: ovrld123_mkNP + +mkOverload :: String -> Tree -> Tree +mkOverload pref = mapTr (changeAtom overAtom) where + overAtom a = case a of + AtC (m, IC f) | isPrefixOf pref f -> + AtC (m, IC (tail (dropWhile (/='_') f))) + _ -> a diff --git a/src-3.0/GF/UseGrammar/Treebank.hs b/src-3.0/GF/UseGrammar/Treebank.hs new file mode 100644 index 000000000..841a9c6dc --- /dev/null +++ b/src-3.0/GF/UseGrammar/Treebank.hs @@ -0,0 +1,251 @@ +---------------------------------------------------------------------- +-- | +-- Module : Treebank +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- Generate multilingual treebanks. AR 8\/2\/2006 +-- +-- (c) Aarne Ranta 2006 under GNU GPL +-- +-- Purpose: to generate treebanks. +----------------------------------------------------------------------------- + +module GF.UseGrammar.Treebank ( + mkMultiTreebank, + mkUniTreebank, + multi2uniTreebank, + uni2multiTreebank, + testMultiTreebank, + treesTreebank, + getTreebank, + getUniTreebank, + readUniTreebanks, + readMultiTreebank, + lookupTreebank, + assocsTreebank, + isWordInTreebank, + printAssoc, + mkCompactTreebank + ) where + +import GF.Compile.ShellState +import GF.UseGrammar.Linear -- (linTree2string) +import GF.UseGrammar.Custom +import GF.UseGrammar.GetTree (string2tree) +import GF.Grammar.TypeCheck (annotate) +import GF.Canon.CMacros (noMark) +import GF.Grammar.Grammar (Trm) +import GF.Grammar.MMacros (exp2tree) +import GF.Grammar.Macros (zIdent) +import GF.Grammar.PrGrammar (prt_,prt) +import GF.Grammar.Values (tree2exp) +import GF.Data.Operations +import GF.Infra.Option +import GF.Infra.Ident (Ident) +import GF.Infra.UseIO +import qualified GF.Grammar.Abstract as A +import qualified Data.Map as M +import qualified Data.Set as S +import qualified Data.List as L +import Control.Monad (liftM) +import System.FilePath + +-- Generate a treebank with a multilingual grammar. AR 8/2/2006 +-- (c) Aarne Ranta 2006 under GNU GPL + +-- keys are trees; format: XML file +type MultiTreebank = [(String,[(String,String)])] -- tree,lang,lin + +-- keys are strings; format: string TAB tree TAB ... TAB tree +type UniTreebank = Treebank -- M.Map String [String] -- string,tree + +-- both formats can be read from both kinds of files +readUniTreebanks :: FilePath -> IO [(Ident,UniTreebank)] +readUniTreebanks file = do + s <- readFileIf file + return $ if isMultiTreebank s + then multi2uniTreebank $ getTreebank $ lines s + else + let tb = getUniTreebank $ lines s + in [(zIdent (dropExtension file),tb)] + +readMultiTreebank :: FilePath -> IO MultiTreebank +readMultiTreebank file = do + s <- readFileIf file + return $ if isMultiTreebank s + then getTreebank $ lines s + else uni2multiTreebank (zIdent (dropExtension file)) $ getUniTreebank $ lines s + +isMultiTreebank :: String -> Bool +isMultiTreebank s = take 10 s == "<treebank>" + +multi2uniTreebank :: MultiTreebank -> [(Ident,UniTreebank)] +multi2uniTreebank mt@((_,lls):_) = [(zIdent la, mkTb la) | (la,_) <- lls] where + mkTb la = M.fromListWith (++) [(s,[t]) | (t,lls) <- mt, (l,s) <- lls, l==la] +multi2uniTreebank [] = [] + +uni2multiTreebank :: Ident -> UniTreebank -> MultiTreebank +uni2multiTreebank la tb = + [(t,[(prt_ la, s)]) | (s,ts) <- assocsTreebank tb, t <- ts] + +-- | the main functions + +-- builds a treebank where trees are the keys, and writes a file (opt. XML) +mkMultiTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res +mkMultiTreebank opts sh com trees + | oElem (iOpt "compact") opts = mkCompactTreebank opts sh trees +mkMultiTreebank opts sh com trees = + putInXML opts "treebank" comm (concatMap mkItem tris) where + mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs) +-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs) + mkTree t = putInXML opts "tree" [] (puts $ showTree t) + mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize opts sh lg t) + + langs = [prt_ l | l <- allLanguages sh] + comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr + abstr = "" --- "Abs" ---- + cat i = " number=" ++ show (show i) --- " cat=" ++ show "S" ---- + lang lg = " lang=" ++ show (prt_ (zIdent lg)) + tris = zip trees [1..] + +-- builds a unilingual treebank where strings are the keys into an internal treebank + +mkUniTreebank :: Options -> ShellState -> Language -> [A.Tree] -> Treebank +mkUniTreebank opts sh lg trees = M.fromListWith (++) [(lin t, [prt_ t]) | t <- trees] + where + lang = prt_ lg + lin t = linearize opts sh lang t + +-- reads a treebank and linearizes its trees again, printing all differences +testMultiTreebank :: Options -> ShellState -> String -> Res +testMultiTreebank opts sh = putInXML opts "testtreebank" [] . + concatMap testOne . + getTreebanks . lines + where + testOne (e,lang,str0) = do + let tr = annot gr e + let str = linearize opts sh lang tr + if str == str0 then ret else putInXML opts "diff" [] $ concat [ + putInXML opts "tree" [] (puts $ showTree tr), + putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0, + putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str + ] + gr = firstStateGrammar sh + +-- writes all the trees of the treebank +treesTreebank :: Options -> String -> [String] +treesTreebank _ = terms . getTreebank . lines where + terms ts = [t | (t,_) <- ts] + +-- string vs. IO +type Res = [String] -- IO () +puts :: String -> Res +puts = return -- putStrLn +ret = [] -- return () +-- + +-- here strings are keys +assocsTreebank :: UniTreebank -> [(String,[String])] +assocsTreebank = M.assocs + +isWordInTreebank :: UniTreebank -> String -> Bool +isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb))) + +printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts] + +getTreebanks :: [String] -> [(String,String,String)] +getTreebanks = concatMap grps . getTreebank where + grps (t,lls) = [(t,x,y) | (x,y) <- lls] + +getTreebank :: [String] -> MultiTreebank +getTreebank ll = case ll of + l:ls@(_:_:_) -> + let (l1,l2) = getItem ls + (tr,lins) = getTree l1 + lglins = getLins lins + in (tr,lglins) : getTreebank l2 + _ -> [] + where + getItem = span ((/="</item") . take 6) + + getTree (_:ss) = + let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2) + + getLins (beg:str:end:ss) = (getLang beg, str):getLins ss + getLins _ = [] + + getLang = takeWhile (/='"') . tail . dropWhile (/='"') + +getUniTreebank :: [String] -> UniTreebank +getUniTreebank ls = M.fromListWith (++) [(s, ts) | s:ts <- map chop ls] where + chop = chunks '\t' + +lookupTreebank :: Treebank -> String -> [String] +lookupTreebank tb s = maybe [] id $ M.lookup s tb + +annot :: StateGrammar -> String -> A.Tree +annot gr s = errVal (error "illegal tree") $ do + let t = tree2exp $ string2tree gr s + annotate (grammar gr) t + +putInXML :: Options -> String -> String -> Res -> Res +putInXML opts tag attrs io = + (ifXML $ puts $ tagXML $ tag ++ attrs) ++ + io ++ + (ifXML $ puts $ tagXML $ '/':tag) + where + ifXML c = if oElem showXML opts then c else [] + + +tagXML :: String -> String +tagXML s = "<" ++ s ++ ">" + +-- print the treebank in a compact format: +-- first a sorted list of all words, referrable by index +-- then the linearization of each tree, as sequences of word indices +-- this format is usable in embedded translation systems. + +mkCompactTreebank :: Options -> ShellState -> [A.Tree] -> [String] +mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh + +printCompactTreebank :: (MultiTreebank,[String]) -> [String] +printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where + ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb + + linss = map (unwords . pad) linss0 + linss0 = map (map (show . encode) . words) allExs + allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]] + encode w = maybe undefined id $ M.lookup w wmap + wmap = M.fromAscList $ zip ws [1..] + stat = unwords $ map show [length ws, length lgs, length tb, smax] + langs = unwords lgs + smax = maximum $ map length linss0 + pad ws = ws ++ replicate (smax - length ws) "0" + +-- [(String,[(String,String)])] -- tree,lang,lin +mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String]) +mkJustMultiTreebank opts sh ts = + ([(prt_ t, [(la, lin la t) | la <- langs]) | t <- ts],langs) where + langs = map prt_ $ allLanguages sh + lin = linearize opts sh + + +--- these handy functions are borrowed from EmbedAPI + +linearize opts mgr lang = lin where + sgr = stateGrammarOfLangOpt False mgr zlang + cgr = canModules mgr + zlang = zIdent lang + untok = customOrDefault (addOptions opts (stateOptions sgr)) useUntokenizer customUntokenizer sgr + lin + | oElem showRecord opts = err id id . liftM prt . linearizeNoMark cgr zlang + | oElem tableLin opts = + err id id . liftM (unlines . map untok . prLinTable True) . allLinTables True cgr zlang + | oElem showAll opts = + err id id . liftM (unlines . map untok . prLinTable False) . allLinTables False cgr zlang + + | otherwise = untok . linTree2string noMark cgr zlang + +showTree t = prt_ $ tree2exp t |
