summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2012-09-12 12:52:03 +0000
committeraarne <aarne@chalmers.se>2012-09-12 12:52:03 +0000
commitacb3fa961e6a9e548addabf34796f7645832c884 (patch)
tree02c7cd92ee65ea7e45fb477692f6813d4c1a35fa
parent38db61e34f53c6245f38915d272362ca34711d91 (diff)
First version of a web server for morphological paradigms.
-rw-r--r--src/www/gfmorpho/GFMorpho.hs108
-rw-r--r--src/www/gfmorpho/README23
-rw-r--r--src/www/gfmorpho/gfmorpho.cgi7
-rw-r--r--src/www/gfmorpho/gfmorpho.html100
4 files changed, 238 insertions, 0 deletions
diff --git a/src/www/gfmorpho/GFMorpho.hs b/src/www/gfmorpho/GFMorpho.hs
new file mode 100644
index 000000000..e01d1da56
--- /dev/null
+++ b/src/www/gfmorpho/GFMorpho.hs
@@ -0,0 +1,108 @@
+import Network.HTTP.Base
+import Codec.Binary.UTF8.String
+import Data.Char
+import Data.List
+import System
+
+main = do
+ xs <- getArgs
+ let xxoo = lexArgs (unwords xs)
+ case pArgs xxoo of
+ Just (oo,xx) -> do
+ morpho oo xx
+ _ -> do
+ putStrLn $ "cannot read " ++ unwords xs ++ "."
+ putStrLn "<p>"
+ putStrLn usage
+
+usage = "usage: gfmorpho LANG POS FORMS OPT*"
+
+noParse xx = length xx < 3 ----
+
+lexArgs = map (decodeString . urlDecode) . words . map unspec . drop 1 . dropWhile (/='=') where
+ unspec c = case c of
+ '=' -> ' '
+ '+' -> ' '
+ _ -> c
+
+pArgs xxoo = do
+ let (oo,xx) = partition isOption xxoo
+ if length xx < 3 then Nothing else return (oo,xx)
+
+morpho :: [String] -> [String] -> IO ()
+morpho oo xx = do
+ writeFile tmpCommand (script xx)
+ system $ command xx
+ s <- readFile tmpFile
+ putStrLn $ mkFile $ response oo s
+
+script ("!":lang:rest) = "cc -table -unqual " ++ unwords rest
+script (lang: pos: forms) = "cc -table -unqual " ++ fun pos ++ quotes forms
+ where
+ fun pos = "mk" ++ pos
+
+command ("!":args) = command args
+command (lang: pos: forms) =
+ "/usr/local/bin/gf -run -retain -path=alltenses alltenses/Paradigms" ++ lang ++ ".gfo"
+ ++ " < " ++ tmpCommand
+ ++ " > " ++ tmpFile
+
+quotes = unwords . map quote where
+ quote s = case s of
+ '_':tag -> tag
+ _ -> "\"" ++ s ++ "\""
+
+-- html response
+response oo =
+ tag "table border=1" . unlines . map (tag "tr" . unwords) . map cleanTable . grep oo . map words . lines
+
+cleanTable ws = [tag "td" (unwords param), tag "td" (tag "i" (unwords form))] where
+ (param,form) = getOne (map cleant ws)
+ cleant w = case w of
+ "s" -> ""
+ "." -> ""
+ _ -> cleanw w
+ cleanw = filter (flip notElem "()")
+ getOne ws = let ww = filter (/= "=>") ws in (init ww, [last ww]) -- excludes multiwords
+
+responsePlain oo =
+ unlines . map unwords . grep oo . map cleanTablePlain . map words . lines
+
+cleanTablePlain = map clean where
+ clean w = case w of
+ "=>" -> "\t"
+ "s" -> ""
+ "." -> ""
+ _ -> cleanw w
+ cleanw = filter (flip notElem "()")
+
+grep oo wss = filter (\ws -> all (flip matchIn ws) (map tail oo)) wss
+
+matchIn p ws = any (match p) ws where
+ match p w = case (p,w) of
+ ('*':ps,_ ) -> any (match ps) [drop i w | i <- [0..length w]] ---
+ (c:ps, d:ws) -> c == d && match ps ws
+ _ -> p == w
+
+tmpFile = "_gfmorpho.tmp"
+tmpCommand = "_gfcommand.tmp"
+
+isOption = (=='-') . head
+
+tag t s = "<" ++ t ++ ">" ++ s ++ "</" ++ t ++ ">"
+
+
+-- html file with UTF8
+
+mkFile s = unlines $ [
+ "<HTML>",
+ "<HEAD>",
+ "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=utf-8\">",
+ "<TITLE>GF Smart Paradigm Output</TITLE>",
+ "</HEAD>",
+ "<BODY>",
+ s,
+ "</BODY>",
+ "</HTML>"
+ ]
+
diff --git a/src/www/gfmorpho/README b/src/www/gfmorpho/README
new file mode 100644
index 000000000..2090bb5d3
--- /dev/null
+++ b/src/www/gfmorpho/README
@@ -0,0 +1,23 @@
+A service for using smart paradigms on the web.
+
+Works with a cgi script running a Haskell program that calls GF to interprete a query string as a "cc" command on a specified Paradigms file. For instance, if the
+user submits the query
+
+ Eng N baby
+
+the program executes the command
+
+ cc -table -unqual ParadigmsEng.mkN "baby"
+
+The resulting output is converted into an HTML table.
+
+The file gfmorpho.html gives some more information. Open issues in addition to those mentioned there are:
+
+- GFMorpho.hs creates the temporary files _gfcommand.tmp and _gfmorpho.tmp which need to be world-writable; they should be created more properly and removed after use
+- gfmorpho.cgi defines the variable GF_LIB_PATH to reside in /Users/aarne, and must be edited for other environments
+- to work for all languages mentioned, one has to compile some incomplete GF grammars not standardly compiled:
+
+ GF/lib/src$ runghc Make alltenses lang langs=Amh,Ara,Lat,Mlt,Tur
+
+(c) Aarne Ranta 2012 under LGPL/BSD.
+
diff --git a/src/www/gfmorpho/gfmorpho.cgi b/src/www/gfmorpho/gfmorpho.cgi
new file mode 100644
index 000000000..c08a9450c
--- /dev/null
+++ b/src/www/gfmorpho/gfmorpho.cgi
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+echo "Content-type: text/html";
+echo ""
+export LANG=en_US.UTF-8
+runghc GFMorpho "$QUERY_STRING"
+
diff --git a/src/www/gfmorpho/gfmorpho.html b/src/www/gfmorpho/gfmorpho.html
new file mode 100644
index 000000000..3a58d4442
--- /dev/null
+++ b/src/www/gfmorpho/gfmorpho.html
@@ -0,0 +1,100 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html> <head>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+<title>Use GF Smart Paradigms</title>
+</head>
+
+<body>
+<h1>Word inflection with smart paradigms</h1>
+
+Give language, part of speech, and one or more word forms, to obtain
+the inflection table.
+<p>
+<form method=get action="gfmorpho.cgi">
+ <input name=args>
+ <INPUT TYPE=SUBMIT VALUE="Submit">
+</form>
+Examples:
+<pre>
+ Eng N baby
+ Fin V odottaa odotti
+ Fre V manger
+ Ger N Soldat Soldaten _masculine
+ Hin N बच्छा
+ Jpn V 答える _Gr2
+ Lat A vetus veteris
+</pre>
+Thus notice that strings are given without quotes, but features
+are prefixed with an underscore <tt>_</tt> (a temporary hack).
+
+
+<h2>Languages and part of speech tags</h2>
+
+The available languages are:
+<pre>
+ Afr Amh Cat Dan Dut Eng Fin Fre Ger Hin Ina Ita Jpn Lat
+ Lav Nep Nor Pes Pnb Ron Rus Snd Spa Swe Tha Tur Urd
+</pre>
+In addition, the library has the languages <tt>Ara Bul Pol</tt>, but they
+are not yet available in this way; you can however use the full form of
+paradigm applications prefixed by "!" as described below.
+
+<p>
+
+The parts of speech are: N (= noun), A (= adjective), V (= verb).
+
+<p>
+
+The way this works is that the program constructs the most probable
+inflection table from the forms given. For a vast majority of words in
+all languages, it is enough to give just one form. But sometimes more
+forms are needed to get the inflection table right.
+
+
+<h2>Filtering with patterns</h2>
+
+You may not want to see the whole table. Then you can filter it with patterns, each of which works like
+"grep", using <tt>*</tt> to match any substring, either in the
+features or in the forms:
+<pre>
+ Eng N baby -Gen
+ Eng V die -dy*
+</pre>
+This is a front end to the Paradigms modules in the GF Resource Grammar.
+See <a href=http://grammaticalframework.org/lib/doc/synopsis.html>RGL
+Synopsis</a> for available languages and paradigms.
+
+
+<h2>Using custom paradigms</h2>
+
+(Another temporary hack, for GF experts:) If you want to use other paradigms than the smart
+<tt>mk</tt> paradigms, you can prefix your input with <tt>!</tt> and
+use the normal expression syntax of GF. For example:
+<pre>
+ ! Ara brkN "طير" "فَعل" "فُعُول" Masc NoHum
+ ! Bul mkN041 "птица"
+ ! Pol mkRegAdj "duży" "większy" "dużo" "więcej"
+</pre>
+This also allows you to use structured terms:
+<pre>
+ ! Ger prefixV "auf" (mkV "fassen")
+</pre>
+
+
+<h2>To do</h2>
+
+<ul>
+ <li> nicer input helped by menus
+ <li> error handling and reporting when some language doesn't follow
+ the format assumed here
+ <li> better documentation of the paradigms
+</ul>
+
+<p>
+
+Powered by <a href=http://grammaticalframework.org>GF</a>. Aarne Ranta 2012.
+
+<hr>
+<address></address>
+<!-- hhmts start --> Last modified: Wed Sep 12 14:24:51 CEST 2012 <!-- hhmts end -->
+</body> </html>