tools/wmc/mcl.c - wine - Git at Google

 /*
  * Wine Message Compiler lexical scanner
  *
  * Copyright 2000 Bertho A. Stultiens (BS)
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  */

 #include "config.h"

 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <assert.h>
 #include <string.h>

 #include "utils.h"
 #include "wmc.h"
 #include "lang.h"

 #include "mcy.tab.h"

 /*
  * Keywords are case insenitive. All normal input is treated as
  * being in codepage iso-8859-1 for ascii input files (unicode
  * page 0) and as equivalent unicode if unicode input is selected.
  * All normal input, which is not part of a message text, is
  * enforced to be unicode page 0. Otherwise an error will be
  * generated. The normal file data should only be ASCII because
  * that is the basic definition of the grammar.
  *
  * Byteorder or unicode input is determined automatically by
  * reading the first 8 bytes and checking them against unicode
  * page 0 byteorder (hibyte must be 0).
  * -- FIXME --
  * Alternatively, the input is checked against a special byte
  * sequence to identify the file.
  * -- FIXME --
  *
  *
  * Keywords:
  *	Codepages
  *	Facility
  *	FacilityNames
  *	LanguageNames
  *	MessageId
  *	MessageIdTypedef
  *	Severity
  *	SeverityNames
  *	SymbolicName
  *
  * Default added identifiers for classes:
  * SeverityNames:
  *	Success		= 0x0
  *	Informational	= 0x1
  *	Warning		= 0x2
  *	Error		= 0x3
  * FacilityNames:
  *	System		= 0x0FF
  *	Application	= 0xFFF
  *
  * The 'Codepages' keyword is a wmc extension.
  */

 static const WCHAR ustr_application[]	= { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
 static const WCHAR ustr_codepages[]	= { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
 static const WCHAR ustr_english[]	= { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
 static const WCHAR ustr_error[]		= { 'E', 'r', 'r', 'o', 'r', 0 };
 static const WCHAR ustr_facility[]	= { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
 static const WCHAR ustr_facilitynames[]	= { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
 static const WCHAR ustr_informational[]	= { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
 static const WCHAR ustr_language[]	= { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
 static const WCHAR ustr_languagenames[]	= { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
 static const WCHAR ustr_messageid[]	= { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
 static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
 static const WCHAR ustr_outputbase[]	= { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
 static const WCHAR ustr_severity[]	= { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
 static const WCHAR ustr_severitynames[]	= { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
 static const WCHAR ustr_success[]	= { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
 static const WCHAR ustr_symbolicname[]	= { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
 static const WCHAR ustr_system[]	= { 'S', 'y', 's', 't', 'e', 'm', 0 };
 static const WCHAR ustr_warning[]	= { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
 static const WCHAR ustr_msg00001[]	= { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
 /*
  * This table is to beat any form of "expression building" to check for
  * correct filename characters. It is also used for ident checks.
  * FIXME: use it more consistently.
  */

 #define CH_SHORTNAME	0x01
 #define CH_LONGNAME	0x02
 #define CH_IDENT	0x04
 #define CH_NUMBER	0x08
 /*#define CH_WILDCARD	0x10*/
 /*#define CH_DOT	0x20*/
 #define CH_PUNCT	0x40
 #define CH_INVALID	0x80

 static const char char_table[256] = {
 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
 	0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
 	0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
 	0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
 	0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
 	0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
 	0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
 	0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
 	0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
 };

 static int isisochar(int ch)
 {
 	return !(ch & (~0xff));
 }

 static int codepage;
 static const union cptable *codepage_def;

 void set_codepage(int cp)
 {
 	codepage = cp;
 	codepage_def = find_codepage(codepage);
 	if(!codepage_def)
 		xyyerror("Codepage %d not found; cannot process", codepage);
 }

 /*
  * Input functions
  */
 static int nungetstack = 0;
 static int allocungetstack = 0;
 static char *ungetstack = NULL;
 static int ninputbuffer = 0;
 static WCHAR *inputbuffer = NULL;
 static char *xlatebuffer = NULL;

 #define INPUTBUFFER_SIZE	2048	/* Must be larger than 4 and approx. large enough to hold a line */

 /*
  * Fill the input buffer with *one* line of input.
  * The line is '\n' terminated so that scanning
  * messages with translation works as expected
  * (otherwise we cannot pre-translate because the
  * language is first known one line before the
  * actual message).
  */
 static int fill_inputbuffer(void)
 {
 	int n;
 	static const char err_fatalread[] = "Fatal: reading input failed";
 	static int endian = -1;

 	if(!inputbuffer)
 	{
 		inputbuffer = xmalloc(INPUTBUFFER_SIZE);
 		xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
 	}

 try_again:
 	if(!unicodein)
 	{
 		char *cptr;
 		cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
 		if(!cptr && ferror(yyin))
 			xyyerror(err_fatalread);
 		else if(!cptr)
 			return 0;
 		assert(codepage_def != NULL);
 		n = wine_cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
 		if(n < 0)
 			internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)", n);
 		if(n <= 1)
 			goto try_again;	/* Should not hapen */
 		n--;	/* Strip added conversion '\0' from input length */
 		/*
 		 * FIXME:
 		 * Detect UTF-8 in the first time we read some bytes by
 		 * checking the special sequence "FE..." or something like
 		 * that. I need to check www.unicode.org for details.
 		 */
 	}
 	else
 	{
 		if(endian == -1)
 		{
 			n = fread(inputbuffer, 1, 8, yyin);
 			if(n != 8)
 			{
 				if(!n && ferror(yyin))
 					xyyerror(err_fatalread);
 				else
 					xyyerror("Fatal: file to short to determine byteorder (should never happen)");
 			}
 			if(isisochar(inputbuffer[0]) &&
 				isisochar(inputbuffer[1]) &&
 				isisochar(inputbuffer[2]) &&
 				isisochar(inputbuffer[3]))
 			{
 #ifdef WORDS_BIGENDIAN
 				endian = WMC_BO_BIG;
 #else
 				endian = WMC_BO_LITTLE;
 #endif
 			}
 			else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
 				isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
 				isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
 				isisochar(BYTESWAP_WORD(inputbuffer[3])))
 			{
 #ifdef WORDS_BIGENDIAN
 				endian = WMC_BO_LITTLE;
 #else
 				endian = WMC_BO_BIG;
 #endif
 			}
 			else
 				xyyerror("Fatal: cannot determine file's byteorder");
 			/* FIXME:
 			 * Determine the file-endian with the leader-bytes
 			 * "FF FE..."; can't remember the exact sequence.
 			 */
 			n /= 2;
 #ifdef WORDS_BIGENDIAN
 			if(endian == WMC_BO_LITTLE)
 #else
 			if(endian == WMC_BO_BIG)
 #endif
 			{
 				inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
 				inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
 				inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
 				inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
 			}

 		}
 		else
 		{
 			int i;
 			n = 0;
 			for(i = 0; i < INPUTBUFFER_SIZE; i++)
 			{
 				int t;
 				t = fread(&inputbuffer[i], 2, 1, yyin);
 				if(!t && ferror(yyin))
 					xyyerror(err_fatalread);
 				else if(!t && n)
 					break;
 				n++;
 #ifdef WORDS_BIGENDIAN
 				if(endian == WMC_BO_LITTLE)
 #else
 				if(endian == WMC_BO_BIG)
 #endif
 				{
 					if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
 						break;
 				}
 				else
 				{
 					if(inputbuffer[i] == '\n')
 						break;
 				}
 			}
 		}

 	}

 	if(!n)
 	{
 		mcy_warning("Re-read line (input was or converted to zilch)");
 		goto try_again;	/* Should not happen, but could be due to stdin reading and a signal */
 	}

 	ninputbuffer += n;
 	return 1;
 }

 static int get_unichar(void)
 {
 	static WCHAR *b = NULL;
 	char_number++;

 	if(nungetstack)
 		return ungetstack[--nungetstack];

 	if(!ninputbuffer)
 	{
 		if(!fill_inputbuffer())
 			return EOF;
 		b = inputbuffer;
 	}

 	ninputbuffer--;
 	return (int)(*b++ & 0xffff);
 }

 static void unget_unichar(int ch)
 {
 	if(ch == EOF)
 		return;

 	char_number--;

 	if(nungetstack == allocungetstack)
 	{
 		allocungetstack += 32;
 		ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
 	}

 	ungetstack[nungetstack++] = (WCHAR)ch;
 }


 /*
  * Normal character stack.
  * Used for number scanning.
  */
 static int ncharstack = 0;
 static int alloccharstack = 0;
 static char *charstack = NULL;

 static void empty_char_stack(void)
 {
 	ncharstack = 0;
 }

 static void push_char(int ch)
 {
 	if(ncharstack == alloccharstack)
 	{
 		alloccharstack += 32;
 		charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
 	}
 	charstack[ncharstack++] = (char)ch;
 }

 static int tos_char_stack(void)
 {
 	if(!ncharstack)
 		return 0;
 	else
 		return (int)(charstack[ncharstack-1] & 0xff);
 }

 static char *get_char_stack(void)
 {
 	return charstack;
 }

 /*
  * Unicode character stack.
  * Used for general scanner.
  */
 static int nunicharstack = 0;
 static int allocunicharstack = 0;
 static WCHAR *unicharstack = NULL;

 static void empty_unichar_stack(void)
 {
 	nunicharstack = 0;
 }

 static void push_unichar(int ch)
 {
 	if(nunicharstack == allocunicharstack)
 	{
 		allocunicharstack += 128;
 		unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
 	}
 	unicharstack[nunicharstack++] = (WCHAR)ch;
 }

 #if 0
 static int tos_unichar_stack(void)
 {
 	if(!nunicharstack)
 		return 0;
 	else
 		return (int)(unicharstack[nunicharstack-1] & 0xffff);
 }
 #endif

 static WCHAR *get_unichar_stack(void)
 {
 	return unicharstack;
 }

 /*
  * Number scanner
  *
  * state |      ch         | next state
  * ------+-----------------+--------------------------
  *   0   | [0]             | 1
  *   0   | [1-9]           | 4
  *   0   | .               | error (should never occur)
  *   1   | [xX]            | 2
  *   1   | [0-7]           | 3
  *   1   | [89a-wyzA-WYZ_] | error invalid digit
  *   1   | .               | return 0
  *   2   | [0-9a-fA-F]     | 2
  *   2   | [g-zG-Z_]       | error invalid hex digit
  *   2   | .               | return (hex-number) if TOS != [xX] else error
  *   3   | [0-7]           | 3
  *   3   | [89a-zA-Z_]     | error invalid octal digit
  *   3   | .               | return (octal-number)
  *   4   | [0-9]           | 4
  *   4   | [a-zA-Z_]       | error invalid decimal digit
  *   4   | .               | return (decimal-number)
  *
  * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
  * and return the value. This is not entirely correct, but close
  * enough (should check punctuators as trailing context, but the
  * char_table is not adapted to that and it is questionable whether
  * it is worth the trouble).
  * All non-iso-8859-1 characters are an error.
  */
 static int scan_number(int ch)
 {
 	int state = 0;
 	int base = 10;
 	empty_char_stack();

 	while(1)
 	{
 		if(!isisochar(ch))
 			xyyerror("Invalid digit");

 		switch(state)
 		{
 		case 0:
 			if(isdigit(ch))
 			{
 				push_char(ch);
 				if(ch == '0')
 					state = 1;
 				else
 					state = 4;
 			}
 			else
 				internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state");
 			break;
 		case 1:
 			if(ch == 'x' || ch == 'X')
 			{
 				push_char(ch);
 				state = 2;
 			}
 			else if(ch >= '0' && ch <= '7')
 			{
 				push_char(ch);
 				state = 3;
 			}
 			else if(isalpha(ch) || ch == '_')
 				xyyerror("Invalid number digit");
 			else
 			{
 				unget_unichar(ch);
 				mcy_lval.num = 0;
 				return tNUMBER;
 			}
 			break;
 		case 2:
 			if(isxdigit(ch))
 				push_char(ch);
 			else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
 				xyyerror("Invalid hex digit");
 			else
 			{
 				base = 16;
 				goto finish;
 			}
 			break;
 		case 3:
 			if(ch >= '0' && ch <= '7')
 				push_char(ch);
 			else if(isalnum(ch) || ch == '_')
 				xyyerror("Invalid octal digit");
 			else
 			{
 				base = 8;
 				goto finish;
 			}
 			break;
 		case 4:
 			if(isdigit(ch))
 				push_char(ch);
 			else if(isalnum(ch) || ch == '_')
 				xyyerror("Invalid decimal digit");
 			else
 			{
 				base = 10;
 				goto finish;
 			}
 			break;
 		default:
 			internal_error(__FILE__, __LINE__, "Invalid state in number-scanner");
 		}
 		ch = get_unichar();
 	}
 finish:
 	unget_unichar(ch);
 	push_char(0);
 	mcy_lval.num = strtoul(get_char_stack(), NULL, base);
 	return tNUMBER;
 }

 static void newline(void)
 {
 	line_number++;
 	char_number = 1;
 }

 static int unisort(const void *p1, const void *p2)
 {
 	return unistricmp(((const token_t *)p1)->name, ((const token_t *)p2)->name);
 }

 static token_t *tokentable = NULL;
 static int ntokentable = 0;

 token_t *lookup_token(const WCHAR *s)
 {
 	token_t tok;

 	tok.name = s;
 	return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
 }

 void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
 {
 	ntokentable++;
 	tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
 	tokentable[ntokentable-1].type = type;
 	tokentable[ntokentable-1].name = name;
 	tokentable[ntokentable-1].token = tok;
 	tokentable[ntokentable-1].codepage = cp;
 	tokentable[ntokentable-1].alias = alias;
 	tokentable[ntokentable-1].fixed = fix;
 	qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
 }

 void get_tokentable(token_t **tab, int *len)
 {
 	assert(tab != NULL);
 	assert(len != NULL);
 	*tab = tokentable;
 	*len = ntokentable;
 }

 /*
  * The scanner
  *
  */
 int mcy_lex(void)
 {
 	static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
 	static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
 	static int isinit = 0;
 	int ch;

 	if(!isinit)
 	{
 		isinit++;
 		set_codepage(WMC_DEFAULT_CODEPAGE);
 		add_token(tok_keyword,	ustr_codepages,		tCODEPAGE,	0, NULL, 0);
 		add_token(tok_keyword,	ustr_facility,		tFACILITY,	0, NULL, 1);
 		add_token(tok_keyword,	ustr_facilitynames,	tFACNAMES,	0, NULL, 1);
 		add_token(tok_keyword,	ustr_language,		tLANGUAGE,	0, NULL, 1);
 		add_token(tok_keyword,	ustr_languagenames,	tLANNAMES,	0, NULL, 1);
 		add_token(tok_keyword,	ustr_messageid,		tMSGID,		0, NULL, 1);
 		add_token(tok_keyword,	ustr_messageidtypedef,	tTYPEDEF,	0, NULL, 1);
 		add_token(tok_keyword,	ustr_outputbase,	tBASE,		0, NULL, 1);
 		add_token(tok_keyword,	ustr_severity,		tSEVERITY,	0, NULL, 1);
 		add_token(tok_keyword,	ustr_severitynames,	tSEVNAMES,	0, NULL, 1);
 		add_token(tok_keyword,	ustr_symbolicname,	tSYMNAME,	0, NULL, 1);
 		add_token(tok_severity,	ustr_error,		0x03,		0, NULL, 0);
 		add_token(tok_severity,	ustr_warning,		0x02,		0, NULL, 0);
 		add_token(tok_severity,	ustr_informational,	0x01,		0, NULL, 0);
 		add_token(tok_severity,	ustr_success,		0x00,		0, NULL, 0);
 		add_token(tok_facility,	ustr_application,	0xFFF,		0, NULL, 0);
 		add_token(tok_facility,	ustr_system,		0x0FF,		0, NULL, 0);
 		add_token(tok_language,	ustr_english,		0x409,		437, ustr_msg00001, 0);
 	}

 	empty_unichar_stack();

 	while(1)
 	{
 		if(want_line)
 		{
 			while((ch = get_unichar()) != '\n')
 			{
 				if(ch == EOF)
 					xyyerror("Unexpected EOF");
 				push_unichar(ch);
 			}
 			newline();
 			push_unichar(ch);
 			push_unichar(0);
 			if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
 			{
 				want_line = 0;
 				/* Reset the codepage to our default after each message */
 				set_codepage(WMC_DEFAULT_CODEPAGE);
 				return tMSGEND;
 			}
 			mcy_lval.str = xunistrdup(get_unichar_stack());
 			return tLINE;
 		}

 		ch = get_unichar();

 		if(ch == EOF)
 			return EOF;

 		if(ch == '\n')
 		{
 			newline();
 			if(want_nl)
 			{
 				want_nl = 0;
 				return tNL;
 			}
 			continue;
 		}

 		if(isisochar(ch))
 		{
 			if(want_file)
 			{
 				int n = 0;
 				while(n < 8 && isisochar(ch))
 				{
 					int t = char_table[ch];
 					if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
 						break;

 					push_unichar(ch);
 					n++;
 					ch = get_unichar();
 				}
 				unget_unichar(ch);
 				push_unichar(0);
 				want_file = 0;
 				mcy_lval.str = xunistrdup(get_unichar_stack());
 				return tFILE;
 			}

 			if(char_table[ch] & CH_IDENT)
 			{
 				token_t *tok;
 				while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
 				{
 					push_unichar(ch);
 					ch = get_unichar();
 				}
 				unget_unichar(ch);
 				push_unichar(0);
 				if(!(tok = lookup_token(get_unichar_stack())))
 				{
 					mcy_lval.str = xunistrdup(get_unichar_stack());
 					return tIDENT;
 				}
 				switch(tok->type)
 				{
 				case tok_keyword:
 					return tok->token;

 				case tok_language:
 					codepage = tok->codepage;
 					/* Fall through */
 				case tok_severity:
 				case tok_facility:
 					mcy_lval.tok = tok;
 					return tTOKEN;

 				default:
 					internal_error(__FILE__, __LINE__, "Invalid token type encountered");
 				}
 			}

 			if(isspace(ch))	/* Ignore space */
 				continue;

 			if(isdigit(ch))
 				return scan_number(ch);
 		}

 		switch(ch)
 		{
 		case ':':
 		case '=':
 		case '+':
 		case '(':
 		case ')':
 			return ch;
 		case ';':
 			while(ch != '\n' && ch != EOF)
 			{
 				push_unichar(ch);
 				ch = get_unichar();
 			}
 			newline();
 			push_unichar(ch);	/* Include the newline */
 			push_unichar(0);
 			mcy_lval.str = xunistrdup(get_unichar_stack());
 			return tCOMMENT;
 		default:
 			xyyerror("Invalid character '%c' (0x%04x)", isisochar(ch) && isprint(ch) ? ch : '.', ch);
 		}
 	}
 }
	/*
	* Wine Message Compiler lexical scanner
	*
	* Copyright 2000 Bertho A. Stultiens (BS)
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
	*/

	#include "config.h"

	#include <stdio.h>
	#include <stdlib.h>
	#include <ctype.h>
	#include <assert.h>
	#include <string.h>

	#include "utils.h"
	#include "wmc.h"
	#include "lang.h"

	#include "mcy.tab.h"

	/*
	* Keywords are case insenitive. All normal input is treated as
	* being in codepage iso-8859-1 for ascii input files (unicode
	* page 0) and as equivalent unicode if unicode input is selected.
	* All normal input, which is not part of a message text, is
	* enforced to be unicode page 0. Otherwise an error will be
	* generated. The normal file data should only be ASCII because
	* that is the basic definition of the grammar.
	*
	* Byteorder or unicode input is determined automatically by
	* reading the first 8 bytes and checking them against unicode
	* page 0 byteorder (hibyte must be 0).
	* -- FIXME --
	* Alternatively, the input is checked against a special byte
	* sequence to identify the file.
	* -- FIXME --
	*
	*
	* Keywords:
	* Codepages
	* Facility
	* FacilityNames
	* LanguageNames
	* MessageId
	* MessageIdTypedef
	* Severity
	* SeverityNames
	* SymbolicName
	*
	* Default added identifiers for classes:
	* SeverityNames:
	* Success = 0x0
	* Informational = 0x1
	* Warning = 0x2
	* Error = 0x3
	* FacilityNames:
	* System = 0x0FF
	* Application = 0xFFF
	*
	* The 'Codepages' keyword is a wmc extension.
	*/

	static const WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
	static const WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
	static const WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
	static const WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
	static const WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
	static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
	static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
	static const WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
	static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
	static const WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
	static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
	static const WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
	static const WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
	static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
	static const WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
	static const WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
	static const WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
	static const WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
	static const WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
	/*
	* This table is to beat any form of "expression building" to check for
	* correct filename characters. It is also used for ident checks.
	* FIXME: use it more consistently.
	*/

	#define CH_SHORTNAME 0x01
	#define CH_LONGNAME 0x02
	#define CH_IDENT 0x04
	#define CH_NUMBER 0x08
	/#define CH_WILDCARD 0x10/
	/#define CH_DOT 0x20/
	#define CH_PUNCT 0x40
	#define CH_INVALID 0x80

	static const char char_table[256] = {
	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
	0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
	0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()+,-./" /
	0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
	0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
	0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
	0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
	0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
	0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{\|}~ " */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
	};

	static int isisochar(int ch)
	{
	return !(ch & (~0xff));
	}

	static int codepage;
	static const union cptable *codepage_def;

	void set_codepage(int cp)
	{
	codepage = cp;
	codepage_def = find_codepage(codepage);
	if(!codepage_def)
	xyyerror("Codepage %d not found; cannot process", codepage);
	}

	/*
	* Input functions
	*/
	static int nungetstack = 0;
	static int allocungetstack = 0;
	static char *ungetstack = NULL;
	static int ninputbuffer = 0;
	static WCHAR *inputbuffer = NULL;
	static char *xlatebuffer = NULL;

	#define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */

	/*
	* Fill the input buffer with one line of input.
	* The line is '\n' terminated so that scanning
	* messages with translation works as expected
	* (otherwise we cannot pre-translate because the
	* language is first known one line before the
	* actual message).
	*/
	static int fill_inputbuffer(void)
	{
	int n;
	static const char err_fatalread[] = "Fatal: reading input failed";
	static int endian = -1;

	if(!inputbuffer)
	{
	inputbuffer = xmalloc(INPUTBUFFER_SIZE);
	xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
	}

	try_again:
	if(!unicodein)
	{
	char *cptr;
	cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
	if(!cptr && ferror(yyin))
	xyyerror(err_fatalread);
	else if(!cptr)
	return 0;
	assert(codepage_def != NULL);
	n = wine_cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
	if(n < 0)
	internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)", n);
	if(n <= 1)
	goto try_again; /* Should not hapen */
	n--; /* Strip added conversion '\0' from input length */
	/*
	* FIXME:
	* Detect UTF-8 in the first time we read some bytes by
	* checking the special sequence "FE..." or something like
	* that. I need to check www.unicode.org for details.
	*/
	}
	else
	{
	if(endian == -1)
	{
	n = fread(inputbuffer, 1, 8, yyin);
	if(n != 8)
	{
	if(!n && ferror(yyin))
	xyyerror(err_fatalread);
	else
	xyyerror("Fatal: file to short to determine byteorder (should never happen)");
	}
	if(isisochar(inputbuffer[0]) &&
	isisochar(inputbuffer[1]) &&
	isisochar(inputbuffer[2]) &&
	isisochar(inputbuffer[3]))
	{
	#ifdef WORDS_BIGENDIAN
	endian = WMC_BO_BIG;
	#else
	endian = WMC_BO_LITTLE;
	#endif
	}
	else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
	isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
	isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
	isisochar(BYTESWAP_WORD(inputbuffer[3])))
	{
	#ifdef WORDS_BIGENDIAN
	endian = WMC_BO_LITTLE;
	#else
	endian = WMC_BO_BIG;
	#endif
	}
	else
	xyyerror("Fatal: cannot determine file's byteorder");
	/* FIXME:
	* Determine the file-endian with the leader-bytes
	* "FF FE..."; can't remember the exact sequence.
	*/
	n /= 2;
	#ifdef WORDS_BIGENDIAN
	if(endian == WMC_BO_LITTLE)
	#else
	if(endian == WMC_BO_BIG)
	#endif
	{
	inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
	inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
	inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
	inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
	}

	}
	else
	{
	int i;
	n = 0;
	for(i = 0; i < INPUTBUFFER_SIZE; i++)
	{
	int t;
	t = fread(&inputbuffer[i], 2, 1, yyin);
	if(!t && ferror(yyin))
	xyyerror(err_fatalread);
	else if(!t && n)
	break;
	n++;
	#ifdef WORDS_BIGENDIAN
	if(endian == WMC_BO_LITTLE)
	#else
	if(endian == WMC_BO_BIG)
	#endif
	{
	if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
	break;
	}
	else
	{
	if(inputbuffer[i] == '\n')
	break;
	}
	}
	}

	}

	if(!n)
	{
	mcy_warning("Re-read line (input was or converted to zilch)");
	goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
	}

	ninputbuffer += n;
	return 1;
	}

	static int get_unichar(void)
	{
	static WCHAR *b = NULL;
	char_number++;

	if(nungetstack)
	return ungetstack[--nungetstack];

	if(!ninputbuffer)
	{
	if(!fill_inputbuffer())
	return EOF;
	b = inputbuffer;
	}

	ninputbuffer--;
	return (int)(*b++ & 0xffff);
	}

	static void unget_unichar(int ch)
	{
	if(ch == EOF)
	return;

	char_number--;

	if(nungetstack == allocungetstack)
	{
	allocungetstack += 32;
	ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
	}

	ungetstack[nungetstack++] = (WCHAR)ch;
	}


	/*
	* Normal character stack.
	* Used for number scanning.
	*/
	static int ncharstack = 0;
	static int alloccharstack = 0;
	static char *charstack = NULL;

	static void empty_char_stack(void)
	{
	ncharstack = 0;
	}

	static void push_char(int ch)
	{
	if(ncharstack == alloccharstack)
	{
	alloccharstack += 32;
	charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
	}
	charstack[ncharstack++] = (char)ch;
	}

	static int tos_char_stack(void)
	{
	if(!ncharstack)
	return 0;
	else
	return (int)(charstack[ncharstack-1] & 0xff);
	}

	static char *get_char_stack(void)
	{
	return charstack;
	}

	/*
	* Unicode character stack.
	* Used for general scanner.
	*/
	static int nunicharstack = 0;
	static int allocunicharstack = 0;
	static WCHAR *unicharstack = NULL;

	static void empty_unichar_stack(void)
	{
	nunicharstack = 0;
	}

	static void push_unichar(int ch)
	{
	if(nunicharstack == allocunicharstack)
	{
	allocunicharstack += 128;
	unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
	}
	unicharstack[nunicharstack++] = (WCHAR)ch;
	}

	#if 0
	static int tos_unichar_stack(void)
	{
	if(!nunicharstack)
	return 0;
	else
	return (int)(unicharstack[nunicharstack-1] & 0xffff);
	}
	#endif

	static WCHAR *get_unichar_stack(void)
	{
	return unicharstack;
	}

	/*
	* Number scanner
	*
	* state \| ch \| next state
	* ------+-----------------+--------------------------
	* 0 \| [0] \| 1
	* 0 \| [1-9] \| 4
	* 0 \| . \| error (should never occur)
	* 1 \| [xX] \| 2
	* 1 \| [0-7] \| 3
	* 1 \| [89a-wyzA-WYZ_] \| error invalid digit
	* 1 \| . \| return 0
	* 2 \| [0-9a-fA-F] \| 2
	* 2 \| [g-zG-Z_] \| error invalid hex digit
	* 2 \| . \| return (hex-number) if TOS != [xX] else error
	* 3 \| [0-7] \| 3
	* 3 \| [89a-zA-Z_] \| error invalid octal digit
	* 3 \| . \| return (octal-number)
	* 4 \| [0-9] \| 4
	* 4 \| [a-zA-Z_] \| error invalid decimal digit
	* 4 \| . \| return (decimal-number)
	*
	* All non-identifier characters [^a-zA-Z_0-9] terminate the scan
	* and return the value. This is not entirely correct, but close
	* enough (should check punctuators as trailing context, but the
	* char_table is not adapted to that and it is questionable whether
	* it is worth the trouble).
	* All non-iso-8859-1 characters are an error.
	*/
	static int scan_number(int ch)
	{
	int state = 0;
	int base = 10;
	empty_char_stack();

	while(1)
	{
	if(!isisochar(ch))
	xyyerror("Invalid digit");

	switch(state)
	{
	case 0:
	if(isdigit(ch))
	{
	push_char(ch);
	if(ch == '0')
	state = 1;
	else
	state = 4;
	}
	else
	internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state");
	break;
	case 1:
	if(ch == 'x' \|\| ch == 'X')
	{
	push_char(ch);
	state = 2;
	}
	else if(ch >= '0' && ch <= '7')
	{
	push_char(ch);
	state = 3;
	}
	else if(isalpha(ch) \|\| ch == '_')
	xyyerror("Invalid number digit");
	else
	{
	unget_unichar(ch);
	mcy_lval.num = 0;
	return tNUMBER;
	}
	break;
	case 2:
	if(isxdigit(ch))
	push_char(ch);
	else if(isalpha(ch) \|\| ch == '_' \|\| !isxdigit(tos_char_stack()))
	xyyerror("Invalid hex digit");
	else
	{
	base = 16;
	goto finish;
	}
	break;
	case 3:
	if(ch >= '0' && ch <= '7')
	push_char(ch);
	else if(isalnum(ch) \|\| ch == '_')
	xyyerror("Invalid octal digit");
	else
	{
	base = 8;
	goto finish;
	}
	break;
	case 4:
	if(isdigit(ch))
	push_char(ch);
	else if(isalnum(ch) \|\| ch == '_')
	xyyerror("Invalid decimal digit");
	else
	{
	base = 10;
	goto finish;
	}
	break;
	default:
	internal_error(__FILE__, __LINE__, "Invalid state in number-scanner");
	}
	ch = get_unichar();
	}
	finish:
	unget_unichar(ch);
	push_char(0);
	mcy_lval.num = strtoul(get_char_stack(), NULL, base);
	return tNUMBER;
	}

	static void newline(void)
	{
	line_number++;
	char_number = 1;
	}

	static int unisort(const void p1, const void p2)
	{
	return unistricmp(((const token_t )p1)->name, ((const token_t )p2)->name);
	}

	static token_t *tokentable = NULL;
	static int ntokentable = 0;

	token_t lookup_token(const WCHAR s)
	{
	token_t tok;

	tok.name = s;
	return (token_t )bsearch(&tok, tokentable, ntokentable, sizeof(tokentable), unisort);
	}

	void add_token(tok_e type, const WCHAR name, int tok, int cp, const WCHAR alias, int fix)
	{
	ntokentable++;
	tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
	tokentable[ntokentable-1].type = type;
	tokentable[ntokentable-1].name = name;
	tokentable[ntokentable-1].token = tok;
	tokentable[ntokentable-1].codepage = cp;
	tokentable[ntokentable-1].alias = alias;
	tokentable[ntokentable-1].fixed = fix;
	qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
	}

	void get_tokentable(token_t *tab, int len)
	{
	assert(tab != NULL);
	assert(len != NULL);
	*tab = tokentable;
	*len = ntokentable;
	}

	/*
	* The scanner
	*
	*/
	int mcy_lex(void)
	{
	static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
	static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
	static int isinit = 0;
	int ch;

	if(!isinit)
	{
	isinit++;
	set_codepage(WMC_DEFAULT_CODEPAGE);
	add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
	add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
	add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
	add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
	add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
	add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
	add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
	add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
	add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
	add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
	add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
	add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
	add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
	add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
	add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
	add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
	add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
	add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
	}

	empty_unichar_stack();

	while(1)
	{
	if(want_line)
	{
	while((ch = get_unichar()) != '\n')
	{
	if(ch == EOF)
	xyyerror("Unexpected EOF");
	push_unichar(ch);
	}
	newline();
	push_unichar(ch);
	push_unichar(0);
	if(!unistrcmp(ustr_dot1, get_unichar_stack()) \|\| !unistrcmp(ustr_dot2, get_unichar_stack()))
	{
	want_line = 0;
	/* Reset the codepage to our default after each message */
	set_codepage(WMC_DEFAULT_CODEPAGE);
	return tMSGEND;
	}
	mcy_lval.str = xunistrdup(get_unichar_stack());
	return tLINE;
	}

	ch = get_unichar();

	if(ch == EOF)
	return EOF;

	if(ch == '\n')
	{
	newline();
	if(want_nl)
	{
	want_nl = 0;
	return tNL;
	}
	continue;
	}

	if(isisochar(ch))
	{
	if(want_file)
	{
	int n = 0;
	while(n < 8 && isisochar(ch))
	{
	int t = char_table[ch];
	if((t & CH_PUNCT) \|\| !(t & CH_SHORTNAME))
	break;

	push_unichar(ch);
	n++;
	ch = get_unichar();
	}
	unget_unichar(ch);
	push_unichar(0);
	want_file = 0;
	mcy_lval.str = xunistrdup(get_unichar_stack());
	return tFILE;
	}

	if(char_table[ch] & CH_IDENT)
	{
	token_t *tok;
	while(isisochar(ch) && (char_table[ch] & (CH_IDENT\|CH_NUMBER)))
	{
	push_unichar(ch);
	ch = get_unichar();
	}
	unget_unichar(ch);
	push_unichar(0);
	if(!(tok = lookup_token(get_unichar_stack())))
	{
	mcy_lval.str = xunistrdup(get_unichar_stack());
	return tIDENT;
	}
	switch(tok->type)
	{
	case tok_keyword:
	return tok->token;

	case tok_language:
	codepage = tok->codepage;
	/* Fall through */
	case tok_severity:
	case tok_facility:
	mcy_lval.tok = tok;
	return tTOKEN;

	default:
	internal_error(__FILE__, __LINE__, "Invalid token type encountered");
	}
	}

	if(isspace(ch)) /* Ignore space */
	continue;

	if(isdigit(ch))
	return scan_number(ch);
	}

	switch(ch)
	{
	case ':':
	case '=':
	case '+':
	case '(':
	case ')':
	return ch;
	case ';':
	while(ch != '\n' && ch != EOF)
	{
	push_unichar(ch);
	ch = get_unichar();
	}
	newline();
	push_unichar(ch); /* Include the newline */
	push_unichar(0);
	mcy_lval.str = xunistrdup(get_unichar_stack());
	return tCOMMENT;
	default:
	xyyerror("Invalid character '%c' (0x%04x)", isisochar(ch) && isprint(ch) ? ch : '.', ch);
	}
	}
	}