|  | #!/usr/bin/perl | 
|  | # | 
|  | # Reads the Unicode 2.0 "unidata2.txt" file and selects encodings | 
|  | # for case pairs, then builds an include file "casemap.h" for the | 
|  | # case conversion routines. | 
|  |  | 
|  | use integer | 
|  |  | 
|  | $INFILE	= "unidata2.txt"; | 
|  | $OUT	= ">casemap.h"; | 
|  | $TEST	= ">allcodes"; | 
|  |  | 
|  | # Open the data file ... | 
|  | open INFILE	or die "Can't open input file $INFILE!\n"; | 
|  | open OUT	or die "Can't open output file $OUT!\n"; | 
|  | open TEST	or die "Can't open output file $OUT!\n"; | 
|  |  | 
|  | #Initialize the upper and lower hashes | 
|  | %lwrtable = (); | 
|  | %uprtable = (); | 
|  | @low = ("0") x 256; | 
|  | @upr = ("0") x 256; | 
|  |  | 
|  | while ($line = <INFILE> ) | 
|  | { | 
|  | # Decode the fields ... | 
|  | ($code, $name, $cat, $comb, $bidi, | 
|  | $decomp, $dec, $dig, $num, $mirror, | 
|  | $oldname, $comment, $upper, $lower, $title) = split /;/, $line; | 
|  |  | 
|  | #Get the high byte of the code | 
|  | $high = substr $code, 0, 2; | 
|  | if ($lower ne "") { | 
|  | $low[hex $high] = "lblk" . $high; | 
|  | $lwrtable{$code} = $lower; | 
|  | } | 
|  | if ($upper ne "") { | 
|  | $upr[hex $high] = "ublk" . $high; | 
|  | $uprtable{$code} = $upper; | 
|  | } | 
|  | #Write everything to the test file | 
|  | printf TEST "%s %s %s\n", $code, | 
|  | $upper ne "" ? $upper : "0000", | 
|  | $lower ne "" ? $lower : "0000"; | 
|  |  | 
|  | } | 
|  | close(FILE); | 
|  | close TEST; | 
|  |  | 
|  | #Generate the header file | 
|  | print OUT "/*\n"; | 
|  | print OUT " * Automatically generated file -- do not edit!\n"; | 
|  | print OUT " * (Use tools/unimap.pl for generation)\n"; | 
|  | print OUT " *\n"; | 
|  | print OUT " * Mapping tables for Unicode case conversion\n"; | 
|  | print OUT " */\n\n"; | 
|  |  | 
|  | #Write out the non-trivial mappings | 
|  | for ($high = 0; $high < 256; $high++) { | 
|  | #Check whether the table is needed | 
|  | if (length $low[$high] < 6) { | 
|  | next; | 
|  | } | 
|  | printf OUT "/* Lowercase mappings %02X00 - %02XFF */\n", | 
|  | $high, $high; | 
|  | printf OUT "static const WCHAR lblk%02X[256] = {\n", $high; | 
|  | for ($low = 0; $low < 256; $low += 8) { | 
|  | @patch = (); | 
|  | for ($i = 0; $i < 8; $i++) { | 
|  | $code = sprintf "%02X%02X", $high, $low + $i; | 
|  | $map = $lwrtable{$code}; | 
|  | if ($map eq "") { | 
|  | $map = $code; | 
|  | } | 
|  | $patch[$i] = "0x" . $map; | 
|  | } | 
|  | printf OUT "\t%s, %s, %s, %s, %s, %s, %s, %s,\n", | 
|  | @patch; | 
|  | } | 
|  | print OUT "};\n\n"; | 
|  | } | 
|  | print OUT "static const WCHAR * const lwrtable[256] = {\n"; | 
|  | for ($i = 0; $i < 256; $i += 8) { | 
|  | @patch = @low[$i+0 .. $i+7]; | 
|  | printf OUT "\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n", | 
|  | @patch; | 
|  | } | 
|  | print OUT "};\n\n"; | 
|  |  | 
|  | for ($high = 0; $high < 256; $high++) { | 
|  | #Check whether the table is needed | 
|  | if (length $upr[$high] < 6) { | 
|  | next; | 
|  | } | 
|  | printf OUT "/* Uppercase mappings %02X00 - %02XFF */\n", | 
|  | $high, $high; | 
|  | printf OUT "static const WCHAR ublk%02X[256] = {\n", $high; | 
|  | for ($low = 0; $low < 256; $low += 8) { | 
|  | @patch = (); | 
|  | for ($i = 0; $i < 8; $i++) { | 
|  | $code = sprintf "%02X%02X", $high, $low + $i; | 
|  | $map = $uprtable{$code}; | 
|  | if ($map eq "") { | 
|  | $map = $code; | 
|  | } | 
|  | $patch[$i] = "0x" . $map; | 
|  | } | 
|  | printf OUT "\t%s, %s, %s, %s, %s, %s, %s, %s,\n", | 
|  | @patch; | 
|  | } | 
|  | print OUT "};\n\n"; | 
|  | } | 
|  | print OUT "static const WCHAR * const uprtable[256] = {\n"; | 
|  | for ($i = 0; $i < 256; $i += 8) { | 
|  | @patch = @upr[$i+0 .. $i+7]; | 
|  | printf OUT "\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n", | 
|  | @patch; | 
|  | } | 
|  | print OUT "};\n\n"; | 
|  |  | 
|  | close(OUT); |