#!/usr/bin/perl -w 

############################################
# Sub-routine parse($)                     #
#                                          #
# Extracts grammar rules and their         #
# frequencies from each data file          #
############################################

sub parse($){

	my @string = ();
	$file = shift;
	print "processing file : $file\n";

	open(FILE,$file) or die "Error : $!";
	
	while (<FILE>){
		chomp;
		if ($_) {
			process($_);
		}
	}
	
	close(FILE);
}

###################################
#  Sub-Routine process($) 	      #
#                                 #
# gets the grammar rules from one #
# sentence and increments freq.   #
###################################

sub process($){
	my $string = shift;

	my @categories = @_;
	my $num = @categories;

	if ($string =~/^[0\*]/){
		#this is a form of null element, ignore it
		#starts with a 0 or @ 
		my $rest = $';
		process($rest,@categories);
	}
	if ($string =~ /^\( ([A-Z\$\-\,\.\#\`\(\)\"\'\:\|]+) /){
		
		# new category... add category to previous rule
		# and start new rule for this category...

		# (CAT
		
		my $category = $1;
		if ($num > 0){
				$categories[$num-1]= "$categories[$num-1]"."$category"." ";
		}
		my $new_rule = "$category-> ";
		push @categories, $new_rule;
		process($',@categories);
	}
	elsif ($string =~ /^([A-Za-z0-9\$\Ł\%\#\'\,\.\-\`\"\?\:\!\@\?\&\/\;\\]+) \)/){

		# word )
		
		my $rest = $';
		pop @categories;
		process($rest,@categories);
	}
	elsif ($string =~ /^\s*\)/){

		if ($num > 0){
			my $rule = pop @categories;
			add_to("rules",$rule);
		}
		process($',@categories);
	}
	elsif ($string eq ""){
	#	print "end of sentence\n";
	#	print "@categories\n";
	}
	else {
		print "$string\n";
		print "oops.. something i haven't thought of\n";
		die;
	}

}

return 1;