Alexandre Julliard | d16319c | 1999-11-25 21:30:24 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Unicode routines for use inside the server |
| 3 | * |
| 4 | * Copyright (C) 1999 Alexandre Julliard |
Alexandre Julliard | 0799c1a | 2002-03-09 23:29:33 +0000 | [diff] [blame] | 5 | * |
| 6 | * This library is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * This library is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with this library; if not, write to the Free Software |
Jonathan Ernst | 360a3f9 | 2006-05-18 14:49:52 +0200 | [diff] [blame] | 18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA |
Alexandre Julliard | d16319c | 1999-11-25 21:30:24 +0000 | [diff] [blame] | 19 | */ |
| 20 | |
Alexandre Julliard | 5769d1d | 2002-04-26 19:05:15 +0000 | [diff] [blame] | 21 | #include "config.h" |
| 22 | #include "wine/port.h" |
| 23 | |
Alexandre Julliard | 7e495e1 | 2000-07-25 21:01:59 +0000 | [diff] [blame] | 24 | #include <ctype.h> |
Alexandre Julliard | d16319c | 1999-11-25 21:30:24 +0000 | [diff] [blame] | 25 | #include <stdio.h> |
| 26 | |
| 27 | #include "unicode.h" |
| 28 | |
Alexandre Julliard | 1271739 | 2008-01-07 20:17:24 +0100 | [diff] [blame] | 29 | /* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */ |
| 30 | static const char utf8_length[128] = |
| 31 | { |
| 32 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */ |
| 33 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */ |
| 34 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */ |
| 35 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */ |
| 36 | 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */ |
| 37 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */ |
| 38 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */ |
| 39 | 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */ |
| 40 | }; |
| 41 | |
| 42 | /* first byte mask depending on UTF-8 sequence length */ |
| 43 | static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 }; |
| 44 | |
| 45 | /* minimum Unicode value depending on UTF-8 sequence length */ |
| 46 | static const unsigned int utf8_minval[4] = { 0x0, 0x80, 0x800, 0x10000 }; |
| 47 | |
| 48 | static inline char to_hex( char ch ) |
| 49 | { |
| 50 | if (isdigit(ch)) return ch - '0'; |
| 51 | return tolower(ch) - 'a' + 10; |
| 52 | } |
| 53 | |
| 54 | /* parse an escaped string back into Unicode */ |
| 55 | /* return the number of chars read from the input, or -1 on output overflow */ |
| 56 | int parse_strW( WCHAR *buffer, data_size_t *len, const char *src, char endchar ) |
| 57 | { |
| 58 | WCHAR *dest = buffer; |
| 59 | WCHAR *end = buffer + *len / sizeof(WCHAR); |
| 60 | const char *p = src; |
| 61 | unsigned char ch; |
| 62 | |
| 63 | while (*p && *p != endchar && dest < end) |
| 64 | { |
| 65 | if (*p == '\\') |
| 66 | { |
| 67 | p++; |
| 68 | if (!*p) break; |
| 69 | switch(*p) |
| 70 | { |
| 71 | case 'a': *dest++ = '\a'; p++; continue; |
| 72 | case 'b': *dest++ = '\b'; p++; continue; |
| 73 | case 'e': *dest++ = '\e'; p++; continue; |
| 74 | case 'f': *dest++ = '\f'; p++; continue; |
| 75 | case 'n': *dest++ = '\n'; p++; continue; |
| 76 | case 'r': *dest++ = '\r'; p++; continue; |
| 77 | case 't': *dest++ = '\t'; p++; continue; |
| 78 | case 'v': *dest++ = '\v'; p++; continue; |
| 79 | case 'x': /* hex escape */ |
| 80 | p++; |
| 81 | if (!isxdigit(*p)) *dest = 'x'; |
| 82 | else |
| 83 | { |
| 84 | *dest = to_hex(*p++); |
| 85 | if (isxdigit(*p)) *dest = (*dest * 16) + to_hex(*p++); |
| 86 | if (isxdigit(*p)) *dest = (*dest * 16) + to_hex(*p++); |
| 87 | if (isxdigit(*p)) *dest = (*dest * 16) + to_hex(*p++); |
| 88 | } |
| 89 | dest++; |
| 90 | continue; |
| 91 | case '0': |
| 92 | case '1': |
| 93 | case '2': |
| 94 | case '3': |
| 95 | case '4': |
| 96 | case '5': |
| 97 | case '6': |
| 98 | case '7': /* octal escape */ |
| 99 | *dest = *p++ - '0'; |
| 100 | if (*p >= '0' && *p <= '7') *dest = (*dest * 8) + (*p++ - '0'); |
| 101 | if (*p >= '0' && *p <= '7') *dest = (*dest * 8) + (*p++ - '0'); |
| 102 | dest++; |
| 103 | continue; |
| 104 | } |
| 105 | /* unrecognized escape: fall through to normal char handling */ |
| 106 | } |
| 107 | |
| 108 | |
| 109 | ch = *p++; |
| 110 | if (ch < 0x80) *dest++ = ch; |
| 111 | else /* parse utf8 char */ |
| 112 | { |
| 113 | int charlen = utf8_length[ch-0x80]; |
| 114 | unsigned int res = ch & utf8_mask[charlen]; |
| 115 | |
| 116 | switch(charlen) |
| 117 | { |
| 118 | case 3: |
| 119 | if ((ch = *p ^ 0x80) >= 0x40) break; |
| 120 | res = (res << 6) | ch; |
| 121 | p++; |
| 122 | case 2: |
| 123 | if ((ch = *p ^ 0x80) >= 0x40) break; |
| 124 | res = (res << 6) | ch; |
| 125 | p++; |
| 126 | case 1: |
| 127 | if ((ch = *p ^ 0x80) >= 0x40) break; |
| 128 | res = (res << 6) | ch; |
| 129 | p++; |
| 130 | if (res < utf8_minval[charlen]) break; |
| 131 | if (res > 0x10ffff) break; |
| 132 | if (res <= 0xffff) *dest++ = res; |
| 133 | else /* we need surrogates */ |
| 134 | { |
| 135 | res -= 0x10000; |
| 136 | *dest++ = 0xd800 | (res >> 10); |
| 137 | if (dest < end) *dest++ = 0xdc00 | (res & 0x3ff); |
| 138 | } |
| 139 | continue; |
| 140 | } |
| 141 | /* ignore invalid char */ |
| 142 | } |
| 143 | } |
| 144 | if (dest >= end) return -1; /* overflow */ |
| 145 | *dest++ = 0; |
| 146 | if (!*p) return -1; /* delimiter not found */ |
| 147 | *len = (dest - buffer) * sizeof(WCHAR); |
| 148 | return p + 1 - src; |
| 149 | } |
| 150 | |
Alexandre Julliard | d16319c | 1999-11-25 21:30:24 +0000 | [diff] [blame] | 151 | /* dump a Unicode string with proper escaping */ |
Alexandre Julliard | 1271739 | 2008-01-07 20:17:24 +0100 | [diff] [blame] | 152 | int dump_strW( const WCHAR *str, data_size_t len, FILE *f, const char escape[2] ) |
Alexandre Julliard | d16319c | 1999-11-25 21:30:24 +0000 | [diff] [blame] | 153 | { |
| 154 | static const char escapes[32] = ".......abtnvfr.............e...."; |
| 155 | char buffer[256]; |
| 156 | char *pos = buffer; |
| 157 | int count = 0; |
| 158 | |
| 159 | for (; len; str++, len--) |
| 160 | { |
| 161 | if (pos > buffer + sizeof(buffer) - 8) |
| 162 | { |
| 163 | fwrite( buffer, pos - buffer, 1, f ); |
| 164 | count += pos - buffer; |
| 165 | pos = buffer; |
| 166 | } |
| 167 | if (*str > 127) /* hex escape */ |
| 168 | { |
| 169 | if (len > 1 && str[1] < 128 && isxdigit((char)str[1])) |
| 170 | pos += sprintf( pos, "\\x%04x", *str ); |
| 171 | else |
| 172 | pos += sprintf( pos, "\\x%x", *str ); |
| 173 | continue; |
| 174 | } |
| 175 | if (*str < 32) /* octal or C escape */ |
| 176 | { |
| 177 | if (!*str && len == 1) continue; /* do not output terminating NULL */ |
| 178 | if (escapes[*str] != '.') |
| 179 | pos += sprintf( pos, "\\%c", escapes[*str] ); |
| 180 | else if (len > 1 && str[1] >= '0' && str[1] <= '7') |
| 181 | pos += sprintf( pos, "\\%03o", *str ); |
| 182 | else |
| 183 | pos += sprintf( pos, "\\%o", *str ); |
| 184 | continue; |
| 185 | } |
| 186 | if (*str == '\\' || *str == escape[0] || *str == escape[1]) *pos++ = '\\'; |
| 187 | *pos++ = *str; |
| 188 | } |
| 189 | fwrite( buffer, pos - buffer, 1, f ); |
| 190 | count += pos - buffer; |
| 191 | return count; |
| 192 | } |