#!/usr/bin/perl -w ############################################ # Sub-routine parse($) # # # # Extracts grammar rules and their # # frequencies from each data file # ############################################ sub parse($){ my @string = (); $file = shift; print "processing file : $file\n"; open(FILE,$file) or die "Error : $!"; while (){ chomp; if ($_) { process($_); } } close(FILE); } ################################### # Sub-Routine process($) # # # # gets the grammar rules from one # # sentence and increments freq. # ################################### sub process($){ my $string = shift; my @categories = @_; my $num = @categories; if ($string =~/^[0\*]/){ #this is a form of null element, ignore it #starts with a 0 or @ my $rest = $'; process($rest,@categories); } if ($string =~ /^\( ([A-Z\$\-\,\.\#\`\(\)\"\'\:\|]+) /){ # new category... add category to previous rule # and start new rule for this category... # (CAT my $category = $1; if ($num > 0){ $categories[$num-1]= "$categories[$num-1]"."$category"." "; } my $new_rule = "$category-> "; push @categories, $new_rule; process($',@categories); } elsif ($string =~ /^([A-Za-z0-9\$\£\%\#\'\,\.\-\`\"\?\:\!\@\?\&\/\;\\]+) \)/){ # word ) my $rest = $'; pop @categories; process($rest,@categories); } elsif ($string =~ /^\s*\)/){ if ($num > 0){ my $rule = pop @categories; add_to("rules",$rule); } process($',@categories); } elsif ($string eq ""){ # print "end of sentence\n"; # print "@categories\n"; } else { print "$string\n"; print "oops.. something i haven't thought of\n"; die; } } return 1;