#!/usr/bin/perl -w # $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $ # # Translate one or more ".tbl" files into ".html" files which can be used to # test the charset support in lynx. Each of the ".html" files will use the # charset that corresponds to the input ".tbl" file. use strict; use Getopt::Std; use File::Basename; use POSIX qw(strtod); sub field($$) { my $value = $_[0]; my $count = $_[1]; while ( $count > 0 ) { $count -= 1; $value =~ s/^\S*\s*//; } $value =~ s/\s.*//; return $value; } sub notes($) { my $value = $_[0]; $value =~ s/^[^#]*//; $value =~ s/^#//; $value =~ s/^\s+//; return $value; } sub make_header($$$) { my $source = $_[0]; my $charset = $_[1]; my $official = $_[2]; printf FP "\n"; printf FP "\n"; printf FP "
\n"; printf FP "\n"; printf FP "\n";
printf FP "Code Char Entity Render Description\n";
}
sub make_mark() {
printf FP "---- ---- ------ ------ -----------------------------------\n";
}
sub escaped($) {
my $result = $_[0];
$result =~ s/&/&/g;
$result =~ s/</g;
$result =~ s/>/>/g;
return $result;
}
sub make_row($$$) {
my $old_code = $_[0];
my $new_code = $_[1];
my $comments = $_[2];
# printf "# make_row %d %d %s\n", $old_code, $new_code, $comments;
my $visible = sprintf("&#%d; ", $new_code);
if ($old_code < 256) {
printf FP "%4x %c %.13s %d; %s\n",
$old_code, $old_code,
$visible, $new_code,
&escaped($comments);
} else {
printf FP "%4x . %.13s %d; %s\n",
$old_code,
$visible, $new_code,
&escaped($comments);
}
}
sub null_row($$) {
my $old_code = $_[0];
my $comments = $_[1];
if ($old_code < 256) {
printf FP "%4x %c %s\n",
$old_code, $old_code,
&escaped($comments);
} else {
printf FP "%4x . %s\n",
$old_code,
&escaped($comments);
}
}
sub make_footer() {
printf FP "\n";
printf FP "\n";
printf FP "\n";
}
# return true if the string describes a range
sub is_range($) {
return ($_[0] =~ /.*-.*/);
}
# convert the U+'s to 0x's so strtod() can convert them.
sub zeroxes($) {
my $result = $_[0];
$result =~ s/^U\+/0x/;
$result =~ s/-U\+/-0x/;
return $result;
}
# convert a string to a number (-1's are outside the range of Unicode).
sub value_of($) {
my ($result, $oops) = strtod($_[0]);
$result = -1 if ($oops ne 0);
return $result;
}
# return the first number in a range
sub first_of($) {
my $range = &zeroxes($_[0]);
$range =~ s/-.*//;
return &value_of($range);
}
# return the last number in a range
sub last_of($) {
my $range = &zeroxes($_[0]);
$range =~ s/^.*-//;
return &value_of($range);
}
sub one_many($$$) {
my $oldcode = $_[0];
my $newcode = &zeroxes($_[1]);
my $comment = $_[2];
my $old_code = &value_of($oldcode);
if ( $old_code lt 0 ) {
printf "? Problem with number \"%s\"\n", $oldcode;
} else {
&make_mark if (( $old_code % 8 ) == 0 );
if ( $newcode =~ /^#.*/ ) {
&null_row($old_code, $comment);
} elsif ( &is_range($newcode) ) {
my $first_item = &first_of($newcode);
my $last_item = &last_of($newcode);
my $item;
if ( $first_item lt 0 or $last_item lt 0 ) {
printf "? Problem with one:many numbers \"%s\"\n", $newcode;
} else {
if ( $comment =~ /^$/ ) {
$comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item);
} else {
$comment = $comment . " (range)";
}
for $item ( $first_item..$last_item) {
&make_row($old_code, $item, $comment);
}
}
} else {
my $new_code = &value_of($newcode);
if ( $new_code lt 0 ) {
printf "? Problem with number \"%s\"\n", $newcode;
} else {
if ( $comment =~ /^$/ ) {
$comment = sprintf("mapped: %#x to %#x", $old_code, $new_code);
}
&make_row($old_code, $new_code, $comment);
}
}
}
}
sub many_many($$$) {
my $oldcode = $_[0];
my $newcode = $_[1];
my $comment = $_[2];
my $first_old = &first_of($oldcode);
my $last_old = &last_of($oldcode);
my $item;
if (&is_range($newcode)) {
my $first_new = &first_of($newcode);
my $last_new = &last_of($newcode);
for $item ( $first_old..$last_old) {
&one_many($item, $first_new, $comment);
$first_new += 1;
}
} else {
for $item ( $first_old..$last_old) {
&one_many($item, $newcode, $comment);
}
}
}
sub approximate($$$) {
my $values = $_[0];
my $expect = sprintf("%-8s", $_[1]);
my $comment = $_[2];
my $escaped = &escaped($expect);
my $left;
my $this;
my $next;
$escaped =~ s/\\134/\\/g;
$escaped =~ s/\\015/\
\;/g;
$escaped =~ s/\\012/\
\;/g;
while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) {
$left = $escaped;
$left =~ s/\\[0-7]{3}.*//;
$this = substr $escaped,length($left)+1,3;
$next = substr $escaped,length($left)+4;
$escaped = sprintf("%s%d;%s", $left, oct $this, $next);
}
my $visible = sprintf("&#%d; ", $values);
if ($values < 256) {
printf FP "%4x %c %.13s %d; approx: %s\n",
$values, $values,
$visible,
$values,
$escaped;
} else {
printf FP "%4x . %.13s %d; approx: %s\n",
$values,
$visible,
$values,
$escaped;
}
}
sub doit($) {
my $source = $_[0];
printf "** %s\n", $source;
my $target = basename($source, ".tbl");
# Read the file into an array in memory.
open(FP,$source) || do {
print STDERR "Can't open input $source: $!\n";
return;
};
my (@input) =