1 files changed, 0 insertions, 453 deletions
diff --git a/next-lib/src/parse/oald/asc2gf b/next-lib/src/parse/oald/asc2gf
deleted file mode 100644
index d3db827f8..000000000
--- a/next-lib/src/parse/oald/asc2gf
+++ /dev/null
@@ -1,453 +0,0 @@
-#! /usr/bin/perl -w
-#
-# Perl script to process OALD machine-readable ASCII file
-# into a GF lexicon
-#
-# Usage: ./asc2gf < ascii_0710-1.txt
-#
-# Bjorn Bringert 2008,
-# based on asc2lex by
-# Matthew Purver, 11/2001
-
-use strict;
-
-my %irregular_verbs = ();
-my %words = ();
-
-my $irreg_eng = "../../english/IrregEng.gf";
-
-open(IRREG_ENG,"$irreg_eng") or die "Could not open $irreg_eng\n";
-while (<IRREG_ENG>) {
-  if (s/\s*([a-z\d]+)_V\s*=.*/$1/) {
-    chomp; 
-    $irregular_verbs{$_} = 1;
-  }
-}
-close IRREG_ENG;
-
-print "Known irregular verbs from $irreg_eng:\n";
-print join(",", keys %irregular_verbs) . "\n";
-
-
-# skip header section
-while ( <STDIN> ) {
-    last if /<\/TEIHEADER>/;
-}
-
-# read a line from stdin
-while ( my $line = <STDIN> ) {
-
-    # remove SGML tags
-    $line =~ s/<[^<>]+>//g;
-
-    # split line into fields according to spec (line may be empty now)
-    if ( $line =~ /^(.{23}).{23}(.{23}).{1}(.{58})$/ ) {
-
-        my ( $word, $pos, $cat ) = ( $1, $2, $3 );
-
-  	# trim white space
-	for ( ( $word, $pos, $cat ) ) {
-	    s/\s*$//;
-	}
-
-	# make word lower-case
-	$word =~ tr/A-Z/a-z/;   # lower case
-
-	# translate OALD diacritics
-	$word =~ s/~n/ñ/g;
-	$word =~ s/<c/ç/g;
-	$word =~ s/"a/ä/g;
-	$word =~ s/"o/ö/g;
-	$word =~ s/"u/ü/g;
-	$word =~ s/"i/ï/g;
-	$word =~ s/\^a/â/g;
-	$word =~ s/\^e/ê/g;
-	$word =~ s/\^o/ô/g;
-	$word =~ s/`a/à/g;
-	$word =~ s/`e/è/g;
-	$word =~ s/_e/é/g;
-
-	# make legal identifier
-	# Note: in theory this could cause clashes, but I don't think it does
-	# with the OALD.
-	my $name = $word;
-	$name =~ s/ /_/g;   # space -> _
-	$name =~ s/-/_/g;   # - -> _
-	$name =~ s/\./_/g;  # . -> _
-	$name =~ s/^'//;    # drop initial '
-
-
-	# get PoS & subcat info
-	my @pos = split( /,/, $pos );
-	$cat =~ s/,/\',\'/g;
-	( $cat = "\'$cat\'" ) unless ( $cat eq '' );
-
-	foreach ( @pos ) {
-	    my ( $pcode, $infl, $freq )=split(//);
-
-	    # for verbs, get inflected forms
-	    if ( $pcode =~ /^[GHIJ]/ ) {
-		$pos = 'verb';
-		my ($vbz, $vbg, $vbd);
-
-		# if this is a root form, work out the inflected forms
-		if ( $infl =~ /^\d/ ) {
-		    if ( $infl == 0 ) {
-			( $vbz = $word ) =~ s/$/s/;
-			( $vbg = $word ) =~ s/$/ing/;
-			( $vbd = $word ) =~ s/$/ed/;
-		    }
-		    elsif ( $infl == 1 ) {
-			( $vbz = $word ) =~ s/$/es/;
-			( $vbg = $word ) =~ s/$/ing/;
-			( $vbd = $word ) =~ s/$/ed/;
-		    }
-		    elsif ( $infl == 2 ) {
-			( $vbz = $word ) =~ s/e$/es/;
-			( $vbg = $word ) =~ s/e$/ing/;
-			( $vbd = $word ) =~ s/e$/ed/;
-		    }
-		    elsif ( $infl == 3 ) {
-			( $vbz = $word ) =~ s/y$/ies/;
-			( $vbg = $word ) =~ s/y$/ying/;
-			( $vbd = $word ) =~ s/y$/ied/;
-		    }
-		    elsif ( $infl == 4 ) {
-			( $vbz = $word ) =~ s/$/s/;
-			( $vbg = $word ) =~ s/(\w)$/$1$1ing/;
-			( $vbd = $word ) =~ s/(\w)$/$1$1ed/;
-		    }
-		    elsif ( $infl == 5 ) {
-			# for irregulars, just mark as such for now, we'll guess later
-			$vbz = 'IRREG';
-			$vbg = 'IRREG';
-			$vbd = 'IRREG';
-		    }
-
-		    my $lin = "mkV \"$word\" \"$vbz\" \"$vbd\" \"$vbd\" \"$vbg\"";		    
-
-		    # try to use a verb from IrregEng
-		    if ( $infl == 5 ) {
-		      for (my $i = 0; $i < length($word) - 1; $i++) {
-			my $suffix = substr($word, $i);
-			if ($irregular_verbs{$suffix}) {
-			  if ($i == 0) {
-			    $lin = "IrregEng.${name}_V";
-			  } else {
-			    my $prefix = substr($word, 0, $i);
-			    $lin = "mkV \"$prefix\" IrregEng.${suffix}_V";
-			  }
-			  last;
-			}
-		      }
-		    }
-
-		    if ($pcode eq 'G') {
-		      #add_word("${name}_VX", "mkVX ($lin)");
-		      print STDERR "Ignoring anomalous verb: $name\n";
-		    } 
-		    if ($pcode eq 'I' || $pcode eq 'J') {
-		      add_word("${name}_V", "$lin");
-		    }
-		    if ($pcode eq 'H' || $pcode eq 'J') {
-		      add_word("${name}_V2", "mkV2 ($lin)");
-		    }
-		}
-		# if this is an inflected form, save for guessing irregulars later
-		elsif ( $infl =~ /^a/ ) {
-		    #push( @vbz, $word );
-		}
-		elsif ( $infl =~ /^b/ ) {
-		    #push( @vbg, $word );
-		}
-		elsif ( $infl =~ /^c/ ) {
-		    #push( @vbd, $word );
-		}
-		elsif ( $infl =~ /^d/ ) {
-		    #push( @vbn, $word );
-		}
-	    }
-	    # for nouns, get plural form
-	    elsif( $pcode =~ /^[KLMNY]/ ) {
-		$pos = 'noun';
-		$pcode =~ s/^K/count/;
-		$pcode =~ s/^L/mass/;
-		$pcode =~ s/^M/both/;
-		$pcode =~ s/^N/proper/;
-		if ( $pcode =~ /^Y/ ) {
-		    $pcode = 'count' if $infl =~ /^[>\)\]]/;
-		    $pcode = 'mass' if $infl =~ /^\}/;
-		    $pcode = 'proper' if $infl =~ /^[:=~]/;
-		}
-		# if this is a singular form, work out plural form
-		unless ( $infl =~ /^j/ ) {
-		    my $pl = '-';
-		    if ( $infl eq '6' ) {
-			( $pl = $word ) =~ s/$/s/;
-		    }
-		    elsif ( $infl eq '7' ) {
-			( $pl = $word ) =~ s/$/es/;
-		    }
-		    elsif ( $infl eq '8' ) {
-			( $pl = $word ) =~ s/y$/ies/;
-		    }
-		    elsif ( $infl =~ /^[9k\]]/ ) {
-			$pl = $word;
-		    }
-		    elsif ( $infl =~ /^i/ ) {
-			# for irregulars, let's just make a guess and mark with '*'
-			# this could be done better, as for verbs, but I can't be bothered now
-			$pl = $word;
-  			( $pl =~ s/^((wo)?m)an/$1en\*/ ) or
-  			    ( $pl =~ s/man(-|$)/men$1\*/ ) or
-  			      ( $pl =~ s/-in-law/s-in-law\*/ ) or
-  			      ( $pl =~ s/um$/a\*/ ) or
-  			      ( $pl =~ s/us$/i\*/ ) or
-  			      ( $pl =~ s/a$/ae\*/ ) or
-  			      ( $pl =~ s/on$/a\*/ ) or
-  			      ( $pl =~ s/is$/es\*/ ) or
-  			      ( $pl =~ s/o$/i\*/ ) or
-  			      ( $pl =~ s/child$/children\*/ ) or
-  			      ( $pl =~ s/oot$/eet\*/ ) or
-  			      ( $pl =~ s/ooth$/eeth\*/ ) or
-  			      ( $pl =~ s/([lm])ouse$/$1ice\*/ ) or
-  			      ( $pl =~ s/f(e)?$/ves\*/ ) or
-  			      ( $pl =~ s/[ei]x$/ices\*/ ) or
-  			      ( $pl =~ s/eau$/eaux\*/ ) or
-  			      ( $pl = 'IRREG' );
-		    }
-		    # if plural-only, swap root form & plural
-		    elsif ( $infl =~ /^\)/ ) {
-			$pl = $word;
-			$word = '-';
-		    }
-		    ( $infl =~ s/^[:l]/per/ ) or ( $infl =~ s/^[mn]/loc/ ) or ( $infl = '_' );
-
-		    my $comment = "";
-		    if ( $word eq '-' ) {
-		      $comment .= " {- FIXME: no singular form -}";
-		    } 
-		    if ( $pl eq '-' ) {
-		      $comment .= " {- FIXME: no plural form -}";
-		    }
-		    if ( $pl =~ s/\*$// ) {
-		      $comment .= " {- FIXME: guessed plural form -}";
-		    }
-
-		    if ( $pcode eq 'proper' ) {
-		      add_word("${name}_PN", "mkPN \"$word\"");
-		    } else {
-		      add_word("${name}_N", "mkN \"$word\" \"$pl\"$comment");
-		    }
-		}
-	    }
-	    # for adjectives, get comparative & superlative forms
-	    elsif( $pcode =~ /^O/ ) {
-		$pos = 'adj';
-		# if this is root form, work out inflected forms
-		unless ( $infl =~ /^[rs]/ ) {
-		    my ($comp, $sup);
-		    if ( $infl =~ /^[Apqt]/ ) {
-			$comp = $sup = '-';
-		    }
-		    elsif ( $infl =~ /^B/ ) {
-			( $comp = $word ) =~ s/$/r/;
-			( $sup = $word ) =~ s/$/st/;
-		    }
-		    elsif ( $infl =~ /^C/ ) {
-			( $comp = $word ) =~ s/$/er/;
-			( $sup = $word ) =~ s/$/est/;
-		    }
-		    elsif ( $infl =~ /^D/ ) {
-			( $comp = $word ) =~ s/y$/ier/;
-			( $sup = $word ) =~ s/y$/iest/;
-		    }
-		    elsif ( $infl =~ /^E/ ) {
-			# for irregulars, let's just have a guess and mark with '*'
-			# (there aren't very many of these)
-			( $comp = $word ) =~ s/(\w)$/$1$1er\*/;
-			( $sup = $word ) =~ s/(\w)$/$1$1est\*/;
-		    }
-		    $infl =~ s/^[ABCDE]/normal/;
-		    $infl =~ s/^p/pred/;
-		    $infl =~ s/^q/attr/;
-		    $infl =~ s/^t/affix/;
-
-		    if ( $comp eq '-' ) {
-		      add_word("${name}_A", "compoundA (mkA \"$word\")");
-		    } else {
-		      add_word("${name}_A", "mkA \"$word\" \"$comp\"");
-		    }
-		}
-	    }
-	    # adverb
-	    elsif( $pcode =~ /^P/ ) {
-		$pos = 'adv';
-		$infl =~ s/^[u\+]/normal/;
-		$infl =~ s/^w/whrel/;
-		$infl =~ s/^v/whq/;
-		add_word("${name}_Adv", "mkAdv \"$word\"");
-	    }
-	    # pronoun
-	    elsif( $pcode =~ s/^Q/_/ ) {
-		$pos = 'pron';
-		$infl =~ s/^x/normal/;
-		$infl =~ s/^y/whq/;
-		$infl =~ s/^z/whrel/;
-		my $class = '_';
-		# reflexive pronouns
-		if ( ( $word =~ /self$/ ) or 
-		     ( $word =~ /selves$/ ) ) {
-		    $pcode = 'acc';
-		}
-		# accusative personal pronouns
-		if ( ( $word =~ /^him/ ) or
-		     ( $word =~ /^her/ ) or
-		     ( $word =~ /^them/ ) or
-		     ( $word eq 'us' ) or
-		     ( $word eq 'thee' ) or
-		     ( $word eq 'me' ) ) {
-		    $pcode = 'acc';
-		    $class = 'per';
-		}
-		# nominative personal pronouns
-		if ( ( $word eq 'he' ) or
-		     ( $word eq 'she' ) or
-		     ( $word eq 'they' ) or
-		     ( $word eq 'we' ) or
-		     ( $word eq 'thou' ) or
-		     ( $word eq 'i' ) ) {
-		    $pcode = 'nom';
-		    $class = 'per';
-		}
-		# other personal pronouns
-		if ( ( $word =~ /.+one/ ) or
-		     ( $word =~ /one.+/ ) or
-		     ( $word =~ /body/ ) or
-		     ( $word =~ /^you/ ) or
-		     ( $word =~ /^who/ ) ) {
-		    $class = 'per';
-		}
-		# non-personal pronouns
-		if ( $word =~ /thing/ ) {
-		    $class = 'nper';
-		}
-		# otherwise case/person info will be '_' (anon variable)
-		# add full spec to @pron array
-		#push( @pron, "$pos( \'$word\', $pcode, $infl, $class ).\n" );
-	    }
-	    # for determiners, leave anon variable as placeholder for semantics
-	    elsif( $pcode =~ /^[RS]/ ) {
-		$pos = 'det';
-		$pcode =~ s/^R/def/;
-		$pcode =~ s/^S/indef/;
-		#add_word("${name}_Det","mkDeterminer \"$word\"");
-	    }
-	    # for prepositions - nothing to say
-	    elsif( $pcode =~ s/^T/prep/ ) {
-		$pos = 'prep';
-		add_word("${name}_Prep","mkPrep \"$word\"");
-	    }
-	    # for conjunctions - nothing to say
-	    elsif( $pcode =~ s/^V/conj/ ) {
-		$pos = 'conj';
-		add_word("${name}_Conj","mkConj \"$word\"");
-	    }
-	    # for miscellaneous, leave '-' as placeholder for illocutionary info
-	    elsif( $pcode =~ /^[UWXZ]/ ) {
-		$pos = 'misc';
-		#push( @prefix, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^U/prefix/ );
-		#push( @interj, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^W/interj/ );
-		#push( @partcl, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^X/partcl/ );
-		#push( @unknown, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^Z/unknown/ );
-	    }
-	}
-    }
-}
-
-my $absfile = "Oald.gf";
-my $cncfile = "OaldEng.gf";
-my $abs_structfile = "OaldStructural.gf";
-my $cnc_structfile = "OaldStructuralEng.gf";
-
-open (ABS, '>', $absfile);
-open (CNC, '>', $cncfile);
-
-open (ABS_STRUCTURAL, '>', $abs_structfile);
-open (CNC_STRUCTURAL, '>', $cnc_structfile);
-
-
-
-# print a nice comment at the top
-my $header = "-- English lexicon for GF, produced from:\n"
-           . "-- Oxford advanced learner's dictionary of current English:\n"
-           . "--    expanded 'computer usable' version compiled by Roger Mitton\n"
-           . "-- The computer usable version is transcribed from:\n"
-           . "--   Oxford advanced learner's dictionary of current English\n"
-           . "--   A.S. Hornby ; with the assistance of A.P. Cowie [and] J. Windsor Lewis.\n"
-           . "--   3rd. ed., London : Oxford University Press, 1974.\n"
-           . "-- Distributed as 'dict0710' by:\n"
-           . "--   Oxford Text Archive\n"
-           . "--   Oxford University Computing Services\n"
-           . "--   13 Banbury Road\n"
-           . "--   Oxford\n"
-           . "--   OX2 6NN\n"
-           . "-- Under these conditions:\n"
-           . "--   Freely available for non-commercial use provided that this header is\n"
-           . "--   included in its entirety with any copy distributed.\n"
-           . "--\n"
-           . "-- GF version generated by asc2gf, Bjorn Bringert Nov 2008\n"
-           . "--    based on asc2lex, Matthew Purver Nov 2001\n"
-           . "--    http://www.stanford.edu/~mpurver/software.html\n"
-           . "\n";
-print ABS $header;
-print ABS "abstract Oald = Cat ** {\n";
-
-print CNC $header;
-print CNC "--# -path=.:alltenses\n";
-print CNC "concrete OaldEng of Oald = CatEng ** open ParadigmsEng, IrregEng in {\n";
-
-print ABS_STRUCTURAL $header;
-print ABS_STRUCTURAL "abstract OaldStructural = Cat ** {\n";
-
-print CNC_STRUCTURAL $header;
-print CNC_STRUCTURAL "--# -path=.:alltenses\n";
-print CNC_STRUCTURAL "concrete OaldStructuralEng of OaldStructural = CatEng ** open ParadigmsEng in {\n";
-
-foreach my $name (sort (keys %words)) {
-  (my $cat = $name) =~ s/.*_([A-Z][A-Za-z\d]*)$/$1/;
-  my $lin = $words{$name};
-  if ( $cat =~ /^(A)|(N)|(V)|(V2)$/ ) {
-    print ABS "fun $name : $cat;\n";
-    print CNC "lin $name = $lin;\n";
-  } else {
-    print ABS_STRUCTURAL "fun $name : $cat;\n";
-    print CNC_STRUCTURAL "lin $name = $lin;\n";
-  }
-}
-
-print ABS "}";
-print CNC "}";
-
-print ABS_STRUCTURAL "}";
-print CNC_STRUCTURAL "}";
-
-close(ABS_STRUCTURAL);
-close(CNC_STRUCTURAL);
-
-close(ABS);
-close(CNC);
-
-print "\nWrote open lexicon to $absfile and $cncfile\n";
-print "Wrote closed lexicon to $abs_structfile and $cnc_structfile\n";
-
-
-
-sub add_word {
-  my ($name,$lin) = @_;
-  if (exists $words{$name}) {
-    print STDERR "Duplicate word: $name\n";
-  } else {
-    $words{$name} = $lin;
-  }
-}