#!/usr/bin/perl
######################################################################
#                                                                    #
# Code2HTML                                                          #
# ---------                                                          #
#                                                                    #
# written 1999 by Palfrader Peter palfrader@writeme.com              #
#                                                                    #
# Version 0.1                                                        #
#                                                                    #
# Do not distribute!                                                 #
#                                                                    #
# The final version will be distributed under the GPL                #
#                                                                    #
######################################################################

# global variables:
	$langmode = "c++";   # language mode
	$hidechar = "\x00";  # a character that must not exist in the code and which is used as a placeholder


sub get_config_file
{
	#returns config file as string
	
	open FILEHANDLE, "<config";

	local $result;
	my $lm = quotemeta($langmode);
	
	while (<FILEHANDLE>)
	{
		$_ =~ s/\n//g;
		$_ =~ s/^[ \t]*(.*)[ \t]*/$1/; #remove beginning and trailing spaces / tabs
		
		my $search = "([\[,])$lm([\],])";
		if ($_ =~ /$search/) {last} ;
	};


	while (<FILEHANDLE>)
	{
		$_ =~ s/\n//g;
		$_ =~ s/^[ \t]*(.*)[ \t]*/$1/; #remove beginning and trailing spaces / tabs
		if (substr($_,0,1) eq "[") {last};

		if (($_ ne "")&&(substr($_, 0, 1) ne "#"))
		{
			$result .= $_."\n"
		};
	};
	close FILEHANDLE;

	return $result;
}	

sub parse_config_part
{
	#in: string: config data
	#returns @regexps
	local @regexps;

	while ($_[0] =~ /^[^\-].*?$/gms)
	{
		local %t;
		local $tmp;
		my $us = $&;
		my $after = $';
		my @child = ();
		
		$us =~ s/[ \t]*"([^"]|\\")*"[ \t]*,//m; $tmp = $&; $tmp =~ m/".*[^\\]"/; $tmp = $&; $tmp =~ s/([^\\]|^)"/$1/g; $tmp =~ s/\\"/"/g; $t{"html1"}   = $tmp;
		$us =~ s/[ \t]*"([^"]|\\")*"[ \t]*,//m; $tmp = $&; $tmp =~ m/".*[^\\]"/; $tmp = $&; $tmp =~ s/([^\\]|^)"/$1/g; $tmp =~ s/\\"/"/g; $t{"html2"}   = $tmp;
	    $us =~ s/[ \t]*"([^"]|\\")*"[ \t]*$//m; $tmp = $&; $tmp =~ m/".*[^\\]"/; $tmp = $&; $tmp =~ s/([^\\]|^)"/$1/g; $tmp =~ s/\\"/"/g; $t{"regex"}   = $tmp;

		if ($after =~ s/^\n\-.*?($|\n[^\-])//s)
		{
			$tmp = $&;
			$tmp =~ s/^\n//;
			$tmp =~ s/\n.$//;
			$tmp =~ s/^-[ \t]*//mg;

			@child = &parse_config_part($tmp."\n");

		};

		$t{"childregex"} = \@child;

		push @regexps, \%t;
	};

	return @regexps;
};

sub get_input_file
{
	#returns input file
	local $code;
	
	while (<STDIN>) { $_ =~ s/\n|\r//g; $code = $code.$_."\n"; };
	
	return $code;
};

sub find_all_matches
{
	# in: regexps
	# in: code
	# returns: matches;
	
	my $regexpsptr = $_[0];
	my @regexps = @$regexpsptr;
#	if ($_[1] ne $code){print "child\n";};
	my $code    = $_[1];
	my @matches = ();
	
	my $index = 0;

	for (@regexps)
	{
		while ($code =~ /$$_{"regex"}/gms)
		{
			local %t;
			
			$t{"start"} = pos($code) - length($&);
			$t{"length"} = length($&);
			$t{"type"} = $index;
			$t{"sortby"} = $t{"start"}*($#regexps+1) + $t{"type"};
			


			my $regexptr = $regexps[$t{"type"}];
			my $childregexptr = $$regexptr{"childregex"}; #equals:	my %regex = %$regexptr;	my $childregexptr = $regex{"child"};
			my @childregex = @$childregexptr;
			my @child = find_all_matches(\@childregex, $&);
			$t{"childmatches"} = \@child;

			

			push @matches, \%t;
		};
		$index++;
	}
	
	#sorting matches by start
	@matches = sort {$$a{"sortby"} <=> $$b{"sortby"}} @matches;
	
	return @matches;
};

sub find_valid_matches
{
	# in: matches
	# in: code
	# returns: matches

	my $matchesptr = $_[0];
	my @matches = @$matchesptr;
	my $alreadymatched = $_[1]; # I just need a string with the same length as code

	for (@matches) # go for all matches
	{
		if (substr($alreadymatched, $$_{"start"}, 1) eq $hidechar) # if it is not valid any more, do not take it
		{
			$$_{"takeit"} = 0;

		}
		else
		{
			$$_{"takeit"} = 1;
			
			local $invalid; 
			local $valid;
			for (1..$$_{"length"}) # create a string of 255 bytes with the correct length
			{
				$valid   .= " "; 
				$invalid .= $hidechar; 
			};
			substr($alreadymatched, $$_{"start"}, $$_{"length"}) = $invalid;
			
			my $childmatchesptr = $$_{"childmatches"};
			my @childmatches = @$childmatchesptr;
			@childmatches = &find_valid_matches(\@childmatches, $valid);
			$$_{"childmatches"} = \@childmatches;
		};
	};
	
	return @matches;
};

sub insert_places_characters
{
	# in: matches
	# in: code
	# returns: code

	my $matchesptr = $_[0];
	my @matches = @$matchesptr;
	my $code = $_[1];
	
	for (reverse @matches) # go for all matches
	{
		if ($$_{"takeit"})
		{
			my $childmatchesptr = $$_{"childmatches"};
			my @childmatches = @$childmatchesptr;
			my $tmp = substr($code, $$_{"start"}, $$_{"length"});
			   $tmp = &insert_places_characters(\@childmatches, $tmp);

			$$_{"hide_in_between"} = $tmp =~ s/$hidechar/$hidechar/g;

			$code = substr($code, 0, $$_{"start"}) .
			        $hidechar .
			        $tmp .
			        $hidechar .
			        substr($code, $$_{"start"}+$$_{"length"});
		};
	}

	return $code;
};

sub convert_code_2_html
{
	# in: code
	# returns: code

	my $code = $_[0];

	$code =~ s/&/&amp;/g;
	$code =~ s/>/&gt;/g;
	$code =~ s/</&lt;/g;
	$code =~ s/"/&quot;/g;

	return $code;
};

sub insert_tags
{
	# in: regexps
	# in: matches
	# in: code
	# returns: code

	my $regexpsptr = $_[0];
	my @regexps = @$regexpsptr;
	my $matchesptr = $_[1];
	my @matches = @$matchesptr;
	my $tmp = $_[2];

	my $newcode = "";
	
	for (@matches) # go for all matches
	{
		if ($$_{"takeit"})
		{
			# This goes must faster than
			# $code =~ s/$hidechar/$a/;
			# $code =~ s/$hidechar/$b/;

			my $t = $regexps[$$_{"type"}];
			my $number = $$_{"hide_in_between"};

			my $childmatchesptr = $$_{"childmatches"};
			my @childmatches = @$childmatchesptr;

			my $childregexptr = $$t{"childregex"}; #equals:	my %regex = %$regexptr;	my $childregexptr = $regex{"child"};
			my @childregex = @$childregexptr;

			$tmp =~ m/$hidechar/;
			$newcode .= $`.$$t{"html1"};
			$tmp = $';

			my $begin = "";
			for (1..$number)
			{
				$tmp =~ m/$hidechar/;
				$begin .= $`.$hidechar;
	  			$tmp = $';
			}
			$tmp =~ m/$hidechar/;
			$begin .= $`;
  			$tmp = $';
			
			$childcode = &insert_tags(\@childregex, \@childmatches, $begin);
			$newcode .= $childcode.$$t{"html2"};
		};
	};
	
	return $newcode.$tmp;
};

sub put_output
{
	# in: code
	print "<html><body bgcolor=\"#ffffff\"><pre>\n";
	print $_[0];
	print "</pre></body></html>\n";
};



                                                         print STDERR "parsing config file...\n";
@regexps = &parse_config_part(get_config_file);          print STDERR "loading input file...\n";
$code    = &get_input_file;                              print STDERR "finding all matches...\n";
@matches = &find_all_matches(\@regexps, $code);          print STDERR "verifying matches...\n";
@matches = &find_valid_matches(\@matches, $code);        print STDERR "inserting placeholders...\n";
$code    = &insert_places_characters(\@matches, $code);  print STDERR "converting source code to HTML...\n";
$code    = &convert_code_2_html($code);                  print STDERR "replacing placeholders with appropriate HTML tags...\n";
$code    = &insert_tags(\@regexps, \@matches, $code);    print STDERR "outputting file...\n";
&put_output($code);
