diff options
| author | aarne <aarne@chalmers.se> | 2012-09-12 12:52:03 +0000 |
|---|---|---|
| committer | aarne <aarne@chalmers.se> | 2012-09-12 12:52:03 +0000 |
| commit | acb3fa961e6a9e548addabf34796f7645832c884 (patch) | |
| tree | 02c7cd92ee65ea7e45fb477692f6813d4c1a35fa | |
| parent | 38db61e34f53c6245f38915d272362ca34711d91 (diff) | |
First version of a web server for morphological paradigms.
| -rw-r--r-- | src/www/gfmorpho/GFMorpho.hs | 108 | ||||
| -rw-r--r-- | src/www/gfmorpho/README | 23 | ||||
| -rw-r--r-- | src/www/gfmorpho/gfmorpho.cgi | 7 | ||||
| -rw-r--r-- | src/www/gfmorpho/gfmorpho.html | 100 |
4 files changed, 238 insertions, 0 deletions
diff --git a/src/www/gfmorpho/GFMorpho.hs b/src/www/gfmorpho/GFMorpho.hs new file mode 100644 index 000000000..e01d1da56 --- /dev/null +++ b/src/www/gfmorpho/GFMorpho.hs @@ -0,0 +1,108 @@ +import Network.HTTP.Base +import Codec.Binary.UTF8.String +import Data.Char +import Data.List +import System + +main = do + xs <- getArgs + let xxoo = lexArgs (unwords xs) + case pArgs xxoo of + Just (oo,xx) -> do + morpho oo xx + _ -> do + putStrLn $ "cannot read " ++ unwords xs ++ "." + putStrLn "<p>" + putStrLn usage + +usage = "usage: gfmorpho LANG POS FORMS OPT*" + +noParse xx = length xx < 3 ---- + +lexArgs = map (decodeString . urlDecode) . words . map unspec . drop 1 . dropWhile (/='=') where + unspec c = case c of + '=' -> ' ' + '+' -> ' ' + _ -> c + +pArgs xxoo = do + let (oo,xx) = partition isOption xxoo + if length xx < 3 then Nothing else return (oo,xx) + +morpho :: [String] -> [String] -> IO () +morpho oo xx = do + writeFile tmpCommand (script xx) + system $ command xx + s <- readFile tmpFile + putStrLn $ mkFile $ response oo s + +script ("!":lang:rest) = "cc -table -unqual " ++ unwords rest +script (lang: pos: forms) = "cc -table -unqual " ++ fun pos ++ quotes forms + where + fun pos = "mk" ++ pos + +command ("!":args) = command args +command (lang: pos: forms) = + "/usr/local/bin/gf -run -retain -path=alltenses alltenses/Paradigms" ++ lang ++ ".gfo" + ++ " < " ++ tmpCommand + ++ " > " ++ tmpFile + +quotes = unwords . map quote where + quote s = case s of + '_':tag -> tag + _ -> "\"" ++ s ++ "\"" + +-- html response +response oo = + tag "table border=1" . unlines . map (tag "tr" . unwords) . map cleanTable . grep oo . map words . lines + +cleanTable ws = [tag "td" (unwords param), tag "td" (tag "i" (unwords form))] where + (param,form) = getOne (map cleant ws) + cleant w = case w of + "s" -> "" + "." -> "" + _ -> cleanw w + cleanw = filter (flip notElem "()") + getOne ws = let ww = filter (/= "=>") ws in (init ww, [last ww]) -- excludes multiwords + +responsePlain oo = + unlines . map unwords . grep oo . map cleanTablePlain . map words . lines + +cleanTablePlain = map clean where + clean w = case w of + "=>" -> "\t" + "s" -> "" + "." -> "" + _ -> cleanw w + cleanw = filter (flip notElem "()") + +grep oo wss = filter (\ws -> all (flip matchIn ws) (map tail oo)) wss + +matchIn p ws = any (match p) ws where + match p w = case (p,w) of + ('*':ps,_ ) -> any (match ps) [drop i w | i <- [0..length w]] --- + (c:ps, d:ws) -> c == d && match ps ws + _ -> p == w + +tmpFile = "_gfmorpho.tmp" +tmpCommand = "_gfcommand.tmp" + +isOption = (=='-') . head + +tag t s = "<" ++ t ++ ">" ++ s ++ "</" ++ t ++ ">" + + +-- html file with UTF8 + +mkFile s = unlines $ [ + "<HTML>", + "<HEAD>", + "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=utf-8\">", + "<TITLE>GF Smart Paradigm Output</TITLE>", + "</HEAD>", + "<BODY>", + s, + "</BODY>", + "</HTML>" + ] + diff --git a/src/www/gfmorpho/README b/src/www/gfmorpho/README new file mode 100644 index 000000000..2090bb5d3 --- /dev/null +++ b/src/www/gfmorpho/README @@ -0,0 +1,23 @@ +A service for using smart paradigms on the web. + +Works with a cgi script running a Haskell program that calls GF to interprete a query string as a "cc" command on a specified Paradigms file. For instance, if the +user submits the query + + Eng N baby + +the program executes the command + + cc -table -unqual ParadigmsEng.mkN "baby" + +The resulting output is converted into an HTML table. + +The file gfmorpho.html gives some more information. Open issues in addition to those mentioned there are: + +- GFMorpho.hs creates the temporary files _gfcommand.tmp and _gfmorpho.tmp which need to be world-writable; they should be created more properly and removed after use +- gfmorpho.cgi defines the variable GF_LIB_PATH to reside in /Users/aarne, and must be edited for other environments +- to work for all languages mentioned, one has to compile some incomplete GF grammars not standardly compiled: + + GF/lib/src$ runghc Make alltenses lang langs=Amh,Ara,Lat,Mlt,Tur + +(c) Aarne Ranta 2012 under LGPL/BSD. + diff --git a/src/www/gfmorpho/gfmorpho.cgi b/src/www/gfmorpho/gfmorpho.cgi new file mode 100644 index 000000000..c08a9450c --- /dev/null +++ b/src/www/gfmorpho/gfmorpho.cgi @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "Content-type: text/html"; +echo "" +export LANG=en_US.UTF-8 +runghc GFMorpho "$QUERY_STRING" + diff --git a/src/www/gfmorpho/gfmorpho.html b/src/www/gfmorpho/gfmorpho.html new file mode 100644 index 000000000..3a58d4442 --- /dev/null +++ b/src/www/gfmorpho/gfmorpho.html @@ -0,0 +1,100 @@ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> +<html> <head> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"> +<title>Use GF Smart Paradigms</title> +</head> + +<body> +<h1>Word inflection with smart paradigms</h1> + +Give language, part of speech, and one or more word forms, to obtain +the inflection table. +<p> +<form method=get action="gfmorpho.cgi"> + <input name=args> + <INPUT TYPE=SUBMIT VALUE="Submit"> +</form> +Examples: +<pre> + Eng N baby + Fin V odottaa odotti + Fre V manger + Ger N Soldat Soldaten _masculine + Hin N बच्छा + Jpn V 答える _Gr2 + Lat A vetus veteris +</pre> +Thus notice that strings are given without quotes, but features +are prefixed with an underscore <tt>_</tt> (a temporary hack). + + +<h2>Languages and part of speech tags</h2> + +The available languages are: +<pre> + Afr Amh Cat Dan Dut Eng Fin Fre Ger Hin Ina Ita Jpn Lat + Lav Nep Nor Pes Pnb Ron Rus Snd Spa Swe Tha Tur Urd +</pre> +In addition, the library has the languages <tt>Ara Bul Pol</tt>, but they +are not yet available in this way; you can however use the full form of +paradigm applications prefixed by "!" as described below. + +<p> + +The parts of speech are: N (= noun), A (= adjective), V (= verb). + +<p> + +The way this works is that the program constructs the most probable +inflection table from the forms given. For a vast majority of words in +all languages, it is enough to give just one form. But sometimes more +forms are needed to get the inflection table right. + + +<h2>Filtering with patterns</h2> + +You may not want to see the whole table. Then you can filter it with patterns, each of which works like +"grep", using <tt>*</tt> to match any substring, either in the +features or in the forms: +<pre> + Eng N baby -Gen + Eng V die -dy* +</pre> +This is a front end to the Paradigms modules in the GF Resource Grammar. +See <a href=http://grammaticalframework.org/lib/doc/synopsis.html>RGL +Synopsis</a> for available languages and paradigms. + + +<h2>Using custom paradigms</h2> + +(Another temporary hack, for GF experts:) If you want to use other paradigms than the smart +<tt>mk</tt> paradigms, you can prefix your input with <tt>!</tt> and +use the normal expression syntax of GF. For example: +<pre> + ! Ara brkN "طير" "فَعل" "فُعُول" Masc NoHum + ! Bul mkN041 "птица" + ! Pol mkRegAdj "duży" "większy" "dużo" "więcej" +</pre> +This also allows you to use structured terms: +<pre> + ! Ger prefixV "auf" (mkV "fassen") +</pre> + + +<h2>To do</h2> + +<ul> + <li> nicer input helped by menus + <li> error handling and reporting when some language doesn't follow + the format assumed here + <li> better documentation of the paradigms +</ul> + +<p> + +Powered by <a href=http://grammaticalframework.org>GF</a>. Aarne Ranta 2012. + +<hr> +<address></address> +<!-- hhmts start --> Last modified: Wed Sep 12 14:24:51 CEST 2012 <!-- hhmts end --> +</body> </html> |
