#!/usr/bin/perl -w
#Serge Sharoff, University of Leeds
#makes a CWB corpus from LCMC
$bibdb=shift;
open(BIB,$bibdb) or die "Cannot open $bibdb: $!";
while (<BIB>) {
    $bibdb{$id}=$description if (($id,$description)=/(.+?)\t(.+)/);
}

while (<STDIN>) {
    if (($newid)=/^<file id="([A-R]\d\d)"/i) {
	print qq{<text id="$newid" title="$bibdb{$newid}">\n};
    } elsif (/^<\/file>/) {
	print "</text>\n" 
    } elsif (/^<s n/) {
	print "<s>\n";
	@words=split /<\/\w+>/, $_;
	foreach (@words) {
	    if (($pos,$form)=/POS="(.+?)">(.+)/i) {
		print "$form\t$pos\n";
	    };
	};
	print "</s>\n";
    };
}


