summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorkrangelov <kr.angelov@gmail.com>2019-05-28 12:59:07 +0200
committerkrangelov <kr.angelov@gmail.com>2019-05-28 12:59:07 +0200
commit8c18d7162fc197b45a0292e93f29116c918b9d2f (patch)
treea0f46643bd143967e6c070c41084a487844501b8 /src
parentac039ec74fad8983e8826c5357f1a40d73a0970a (diff)
bits of documentation
Diffstat (limited to 'src')
-rw-r--r--src/runtime/haskell-bind/PGF2.hsc28
-rw-r--r--src/runtime/haskell-bind/PGF2/Internal.hsc22
2 files changed, 37 insertions, 13 deletions
diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc
index 5644b6ce8..fd77dbbf7 100644
--- a/src/runtime/haskell-bind/PGF2.hsc
+++ b/src/runtime/haskell-bind/PGF2.hsc
@@ -466,8 +466,23 @@ newGraphvizOptions pool opts = do
-- Functions using Concr
-- Morpho analyses, parsing & linearization
-type MorphoAnalysis = (Fun,Cat,Float)
-
+-- | This triple is returned by all functions that deal with
+-- the grammar's lexicon. Its first element is the name of an abstract
+-- lexical function which can produce a given word or
+-- a multiword expression (i.e. this is the lemma).
+-- After that follows a string which describes
+-- the particular inflection form.
+--
+-- The last element is a logarithm from the
+-- the probability of the function. The probability is not
+-- conditionalized on the category of the function. This makes it
+-- possible to compare the likelihood of two functions even if they
+-- have different types.
+type MorphoAnalysis = (Fun,String,Float)
+
+-- | 'lookupMorpho' takes a string which must be a single word or
+-- a multiword expression. It then computes the list of all possible
+-- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
lookupMorpho (Concr concr master) sent =
unsafePerformIO $
@@ -481,6 +496,15 @@ lookupMorpho (Concr concr master) sent =
freeHaskellFunPtr fptr
readIORef ref
+-- | 'lookupCohorts' takes an arbitrary string an produces
+-- a list of all places where lexical items from the grammar have been
+-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
+-- where @start-end@ identifies the span in the text and @ans@ is
+-- the list of possible morphological analyses similar to 'lookupMorpho'.
+--
+-- The list is sorted first by the @start@ position and after than
+-- by the @end@ position. This can be used for instance if you want to
+-- filter only the longest matches.
lookupCohorts :: Concr -> String -> [(Int,[MorphoAnalysis],Int)]
lookupCohorts lang@(Concr concr master) sent =
unsafePerformIO $
diff --git a/src/runtime/haskell-bind/PGF2/Internal.hsc b/src/runtime/haskell-bind/PGF2/Internal.hsc
index 6582a9ada..3cb4199d0 100644
--- a/src/runtime/haskell-bind/PGF2/Internal.hsc
+++ b/src/runtime/haskell-bind/PGF2/Internal.hsc
@@ -528,17 +528,17 @@ newAbstr aflags cats funs = unsafePerformIO $ do
data ConcrInfo = ConcrInfo (Ptr GuSeq) (Ptr GuMap) (Ptr GuMap) (Ptr GuSeq) (Ptr GuSeq) (Ptr GuMap) (Ptr PgfConcr -> Ptr GuPool -> IO ()) CInt
-newConcr :: (?builder :: Builder s) => AbstrInfo ->
- [(String,Literal)] -> -- ^ Concrete syntax flags
- [(String,String)] -> -- ^ Printnames
- [(FId,[FunId])] -> -- ^ Lindefs
- [(FId,[FunId])] -> -- ^ Linrefs
- [(FId,[Production])] -> -- ^ Productions
- [(Fun,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun)
- [[Symbol]] -> -- ^ Sequences (must be sorted)
- [(Cat,FId,FId,[String])] -> -- ^ Concrete categories
- FId -> -- ^ The total count of the categories
- ConcrInfo
+newConcr :: (?builder :: Builder s) => AbstrInfo
+ -> [(String,Literal)] -- ^ Concrete syntax flags
+ -> [(String,String)] -- ^ Printnames
+ -> [(FId,[FunId])] -- ^ Lindefs
+ -> [(FId,[FunId])] -- ^ Linrefs
+ -> [(FId,[Production])] -- ^ Productions
+ -> [(Fun,[SeqId])] -- ^ Concrete functions (must be sorted by Fun)
+ -> [[Symbol]] -- ^ Sequences (must be sorted)
+ -> [(Cat,FId,FId,[String])] -- ^ Concrete categories
+ -> FId -- ^ The total count of the categories
+ -> ConcrInfo
newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
c_cflags <- newFlags cflags pool
c_printname <- newMap (#size GuString) gu_string_hasher newUtf8CString