diff options
| author | hallgren <hallgren@chalmers.se> | 2012-05-29 14:47:59 +0000 |
|---|---|---|
| committer | hallgren <hallgren@chalmers.se> | 2012-05-29 14:47:59 +0000 |
| commit | 7196bc86692d2099cfaee71d15a3f3c180cb3b76 (patch) | |
| tree | b6def742698c21f674c291f7b67f61053d23a226 | |
| parent | 9e34a7f7fa462a5ec1333a364affde84b8e2d703 (diff) | |
translator: segment imported text based on punctuation
| -rw-r--r-- | src/www/translator/about.html | 14 | ||||
| -rw-r--r-- | src/www/translator/translator.css | 15 | ||||
| -rw-r--r-- | src/www/translator/translator.js | 31 |
3 files changed, 42 insertions, 18 deletions
diff --git a/src/www/translator/about.html b/src/www/translator/about.html index 61c805d54..47387011e 100644 --- a/src/www/translator/about.html +++ b/src/www/translator/about.html @@ -16,9 +16,13 @@ <p> This is a simple bilingual document editor. Documents consist of a sequence -of segments that are translated independently. The user can add segments -in the source language and obtain automatically translated segments in -the target language. If an unsatisfactory automatic translation is +of segments that are translated independently. The user can import text +in the source language and obtain automatically translated text in +the target language. Imported text can be segmented based on punctuation. +Optionally, one can also use line breaks or blank lines to indicate segmentation +in imported text. + +<p>If an unsatisfactory automatic translation is obtained, the user can click on it and replace it with a manual translation. If multiple translations are obtained, one of them is shown by default and the other ones are available in a popup menu. @@ -35,8 +39,6 @@ closed and reopened later. <ul> <li>Text can be imported/exported by copying and pasting, but other ways could be added. - <li>Segmentation of imported text based on punctuation. (Currently, segments - must be separated by line breaks or blank lines.) <li>GF's lexer/unlexer is used to allow for more natural looking text, but the unlexer does the wrong thing if the first word of a sentence is supposed to be capitalized, e.g. "I am ready." and "Spanish wine is good." @@ -52,7 +54,7 @@ closed and reopened later. <hr> <div class=modtime><small> -<!-- hhmts start --> Last modified: Mon May 28 18:36:10 CEST 2012 <!-- hhmts end --> +<!-- hhmts start --> Last modified: Tue May 29 16:30:58 CEST 2012 <!-- hhmts end --> </small></div> <address> <a href="http://www.cse.chalmers.se/~hallgren/">TH</a> diff --git a/src/www/translator/translator.css b/src/www/translator/translator.css index 4ee6fc3c8..8d3db7388 100644 --- a/src/www/translator/translator.css +++ b/src/www/translator/translator.css @@ -3,8 +3,12 @@ h1 { float: right; margin: 0; font-size: 150%; } h2 { font-size: 120%; } h3 { font-size: 100%; } -div.pagehead { font-family: sans-serif; - background-color: #ccc; +div.pagehead { + font-family: sans-serif; + /*position: fixed; top: 5px; left: 5px; right: 5px; z-index: 2;*/ + background-color: #d0d0d0; + padding: 1px 5px; + border-radius: 5px; } table.menubar td { padding: 5px; } table.menubar dl, td.options > div > dl, dl.popupmenu { @@ -24,6 +28,7 @@ table.menubar td:hover, table.menubar dt:hover, dl.popupmenu > dt:hover { table table dl { left: 6em; } table.menubar dt { white-space: nowrap; } div.document { + /*margin-top: 7ex;*/ clear: both; background: white; border: 2px solid #009; @@ -50,10 +55,12 @@ td.options > div > dl { white-space: nowrap; } -td.source input[name=it], td.target input[name=it], textarea { - width: 100%; font-family: inherit; font-size: inherit; +td.source input[name=it], td.target input[name=it], textarea, input[name=punctchars] { + font-family: inherit; font-size: inherit; } +textarea { width: 100% } + table.paralleltexts td { vertical-align: baseline; line-height: 130%; diff --git a/src/www/translator/translator.js b/src/www/translator/translator.js index 6a9f4d7dc..a9601ca4c 100644 --- a/src/www/translator/translator.js +++ b/src/www/translator/translator.js @@ -274,28 +274,36 @@ Translator.prototype.import=function(el) { function restore() { t.redraw() } - function done2() { + function done() { var text=inp.value var ls=text.split("\n") - var segs= paras.firstChild.checked ? join_paragraphs(ls) : ls + var segs= punct.firstChild.checked + ? split_punct(text,punctchars.value) + : paras.firstChild.checked + ? join_paragraphs(ls) + : ls for(var i in segs) t.document.segments.push(new_segment(segs[i])) restore() return false } var inp=node("textarea",{name:"it",value:"",rows:"10"}) + var punct=radiobutton("separator","punct", + "Punctuation indicates where segments end: ",null,true) var lines=radiobutton("separator","lines", - "Segments are separated by line breaks",null,true) + "Segments are separated by line breaks",null,false) var paras=radiobutton("separator","paras", "Segments are separated by blank lines",null,false) - var e=node("form",{onsubmit:done2}, - [wrap("h3",text("Import text")), + var punctchars=node("input",{name:"punctchars",value:".?!",size:"5"}) + var lang=concname(t.document.options.from) + var e=node("form",{class:"import"}, + [wrap("h3",text("Import text ("+lang+")")), inp, - wrap("dl",map(dt,[lines,paras])), + wrap("dl",[dt([punct,punctchars]),dt(lines),dt(paras)]), submit(), button("Cancel",restore)]) t.view.appendChild(e) - e.onsubmit=done2 + e.onsubmit=done inp.focus(); } setTimeout(imp,100) @@ -556,10 +564,17 @@ function join_paragraphs(lines) { return paras } +function split_punct(text,punct) { + var ss=text.split(new RegExp("(["+punct+"])")) + var segs=[]; + for(var i=0;i<ss.length;i+=2) segs.push((ss[i]+(ss[i+1]||"")).trim()) + if(segs.length>0 && segs[segs.length-1]=="") segs.pop(); + return segs +} + /* --- DOM Support ---------------------------------------------------------- */ function a(url,linked) { return node("a",{href:url},linked); } -function li(xs) { return wrap("li",xs); } function jsurl(js) { return "javascript:"+js; } function replaceNode(node,ref) { ref.parentNode.replaceChild(node,ref) } |
