tools/unimap.pl - wine - Git at Google

 #!/usr/bin/perl
 #
 # Reads the Unicode 2.0 "unidata2.txt" file and selects encodings
 # for case pairs, then builds an include file "casemap.h" for the
 # case conversion routines.

 use integer

 $INFILE	= "unidata2.txt";
 $OUT	= ">casemap.h";
 $TEST	= ">allcodes";

 # Open the data file ...
 open INFILE	or die "Can't open input file $INFILE!\n";
 open OUT	or die "Can't open output file $OUT!\n";
 open TEST	or die "Can't open output file $OUT!\n";

 #Initialize the upper and lower hashes
 %lwrtable = ();
 %uprtable = ();
 @low = ("0") x 256;
 @upr = ("0") x 256;

 while ($line = <INFILE> )
 {
 	# Decode the fields ...
 	($code, $name, $cat, $comb, $bidi,
 	 $decomp, $dec, $dig, $num, $mirror,
 	 $oldname, $comment, $upper, $lower, $title) = split /;/, $line;

 	#Get the high byte of the code
 	$high = substr $code, 0, 2;
 	if ($lower ne "") {
 		$low[hex $high] = "lblk" . $high;
 		$lwrtable{$code} = $lower;
 	}
 	if ($upper ne "") {
 		$upr[hex $high] = "ublk" . $high;
 		$uprtable{$code} = $upper;
 	}
 	#Write everything to the test file
 	printf TEST "%s %s %s\n", $code,
 		$upper ne "" ? $upper : "0000",
 		$lower ne "" ? $lower : "0000";

 }
 close(FILE);
 close TEST;

 #Generate the header file
 print OUT "/*\n";
 print OUT " * Automatically generated file -- do not edit!\n";
 print OUT " * (Use tools/unimap.pl for generation)\n";
 print OUT " *\n";
 print OUT " * Mapping tables for Unicode case conversion\n";
 print OUT " */\n\n";

 #Write out the non-trivial mappings
 for ($high = 0; $high < 256; $high++) {
 	#Check whether the table is needed
 	if (length $low[$high] < 6) {
 		next;
 	}
 	printf OUT "/* Lowercase mappings %02X00 - %02XFF */\n",
 		$high, $high;
 	printf OUT "static const WCHAR lblk%02X[256] = {\n", $high;
 	for ($low = 0; $low < 256; $low += 8) {
 		@patch = ();
 		for ($i = 0; $i < 8; $i++) {
 			$code = sprintf "%02X%02X", $high, $low + $i;
 			$map = $lwrtable{$code};
 			if ($map eq "") {
 				$map = $code;
 			}
 			$patch[$i] = "0x" . $map;
 		}
 		printf OUT "\t%s, %s, %s, %s, %s, %s, %s, %s,\n",
 			@patch;
 	}
 	print OUT "};\n\n";
 }
 print OUT "static const WCHAR * const lwrtable[256] = {\n";
 for ($i = 0; $i < 256; $i += 8) {
 	@patch = @low[$i+0 .. $i+7];
 	printf OUT "\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n",
 		@patch;
 }
 print OUT "};\n\n";

 for ($high = 0; $high < 256; $high++) {
 	#Check whether the table is needed
 	if (length $upr[$high] < 6) {
 		next;
 	}
 	printf OUT "/* Uppercase mappings %02X00 - %02XFF */\n",
 		$high, $high;
 	printf OUT "static const WCHAR ublk%02X[256] = {\n", $high;
 	for ($low = 0; $low < 256; $low += 8) {
 		@patch = ();
 		for ($i = 0; $i < 8; $i++) {
 			$code = sprintf "%02X%02X", $high, $low + $i;
 			$map = $uprtable{$code};
 			if ($map eq "") {
 				$map = $code;
 			}
 			$patch[$i] = "0x" . $map;
 		}
 		printf OUT "\t%s, %s, %s, %s, %s, %s, %s, %s,\n",
 			@patch;
 	}
 	print OUT "};\n\n";
 }
 print OUT "static const WCHAR * const uprtable[256] = {\n";
 for ($i = 0; $i < 256; $i += 8) {
 	@patch = @upr[$i+0 .. $i+7];
 	printf OUT "\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n",
 		@patch;
 }
 print OUT "};\n\n";

 close(OUT);
	#!/usr/bin/perl
	#
	# Reads the Unicode 2.0 "unidata2.txt" file and selects encodings
	# for case pairs, then builds an include file "casemap.h" for the
	# case conversion routines.

	use integer

	$INFILE = "unidata2.txt";
	$OUT = ">casemap.h";
	$TEST = ">allcodes";

	# Open the data file ...
	open INFILE or die "Can't open input file $INFILE!\n";
	open OUT or die "Can't open output file $OUT!\n";
	open TEST or die "Can't open output file $OUT!\n";

	#Initialize the upper and lower hashes
	%lwrtable = ();
	%uprtable = ();
	@low = ("0") x 256;
	@upr = ("0") x 256;

	while ($line = <INFILE> )
	{
	# Decode the fields ...
	($code, $name, $cat, $comb, $bidi,
	$decomp, $dec, $dig, $num, $mirror,
	$oldname, $comment, $upper, $lower, $title) = split /;/, $line;

	#Get the high byte of the code
	$high = substr $code, 0, 2;
	if ($lower ne "") {
	$low[hex $high] = "lblk" . $high;
	$lwrtable{$code} = $lower;
	}
	if ($upper ne "") {
	$upr[hex $high] = "ublk" . $high;
	$uprtable{$code} = $upper;
	}
	#Write everything to the test file
	printf TEST "%s %s %s\n", $code,
	$upper ne "" ? $upper : "0000",
	$lower ne "" ? $lower : "0000";

	}
	close(FILE);
	close TEST;

	#Generate the header file
	print OUT "/*\n";
	print OUT " * Automatically generated file -- do not edit!\n";
	print OUT " * (Use tools/unimap.pl for generation)\n";
	print OUT " *\n";
	print OUT " * Mapping tables for Unicode case conversion\n";
	print OUT " */\n\n";

	#Write out the non-trivial mappings
	for ($high = 0; $high < 256; $high++) {
	#Check whether the table is needed
	if (length $low[$high] < 6) {
	next;
	}
	printf OUT "/* Lowercase mappings %02X00 - %02XFF */\n",
	$high, $high;
	printf OUT "static const WCHAR lblk%02X[256] = {\n", $high;
	for ($low = 0; $low < 256; $low += 8) {
	@patch = ();
	for ($i = 0; $i < 8; $i++) {
	$code = sprintf "%02X%02X", $high, $low + $i;
	$map = $lwrtable{$code};
	if ($map eq "") {
	$map = $code;
	}
	$patch[$i] = "0x" . $map;
	}
	printf OUT "\t%s, %s, %s, %s, %s, %s, %s, %s,\n",
	@patch;
	}
	print OUT "};\n\n";
	}
	print OUT "static const WCHAR * const lwrtable[256] = {\n";
	for ($i = 0; $i < 256; $i += 8) {
	@patch = @low[$i+0 .. $i+7];
	printf OUT "\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n",
	@patch;
	}
	print OUT "};\n\n";

	for ($high = 0; $high < 256; $high++) {
	#Check whether the table is needed
	if (length $upr[$high] < 6) {
	next;
	}
	printf OUT "/* Uppercase mappings %02X00 - %02XFF */\n",
	$high, $high;
	printf OUT "static const WCHAR ublk%02X[256] = {\n", $high;
	for ($low = 0; $low < 256; $low += 8) {
	@patch = ();
	for ($i = 0; $i < 8; $i++) {
	$code = sprintf "%02X%02X", $high, $low + $i;
	$map = $uprtable{$code};
	if ($map eq "") {
	$map = $code;
	}
	$patch[$i] = "0x" . $map;
	}
	printf OUT "\t%s, %s, %s, %s, %s, %s, %s, %s,\n",
	@patch;
	}
	print OUT "};\n\n";
	}
	print OUT "static const WCHAR * const uprtable[256] = {\n";
	for ($i = 0; $i < 256; $i += 8) {
	@patch = @upr[$i+0 .. $i+7];
	printf OUT "\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n",
	@patch;
	}
	print OUT "};\n\n";

	close(OUT);