blob: 726889354feacb710e5a347a7bd9ac5a5d2d38da [file] [log] [blame]
Alexandre Julliardfb270dd2000-06-09 05:27:21 +00001/*
2 * WideCharToMultiByte implementation
3 *
4 * Copyright 2000 Alexandre Julliard
Alexandre Julliard0799c1a2002-03-09 23:29:33 +00005 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Alexandre Julliardfb270dd2000-06-09 05:27:21 +000019 */
20
21#include <string.h>
22
23#include "winnls.h"
24#include "wine/unicode.h"
25
Alexandre Julliarde709cdb2000-12-29 03:56:06 +000026/* search for a character in the unicode_compose_table; helper for compose() */
27static inline int binary_search( WCHAR ch, int low, int high )
28{
29 extern const WCHAR unicode_compose_table[];
30 while (low <= high)
31 {
32 int pos = (low + high) / 2;
33 if (unicode_compose_table[2*pos] < ch)
34 {
35 low = pos + 1;
36 continue;
37 }
38 if (unicode_compose_table[2*pos] > ch)
39 {
40 high = pos - 1;
41 continue;
42 }
43 return pos;
44 }
45 return -1;
46}
47
48/* return the result of the composition of two Unicode chars, or 0 if none */
49static WCHAR compose( const WCHAR *str )
50{
51 extern const WCHAR unicode_compose_table[];
52 extern const unsigned int unicode_compose_table_size;
53
54 int idx = 1, low = 0, high = unicode_compose_table_size - 1;
55 for (;;)
56 {
57 int pos = binary_search( str[idx], low, high );
58 if (pos == -1) return 0;
59 if (!idx--) return unicode_compose_table[2*pos+1];
60 low = unicode_compose_table[2*pos+1];
61 high = unicode_compose_table[2*pos+3] - 1;
62 }
63}
64
65
66/****************************************************************/
67/* sbcs support */
68
69/* check if 'ch' is an acceptable sbcs mapping for 'wch' */
70static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
71 WCHAR wch, unsigned char ch )
72{
73 if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
74 if (ch != (unsigned char)table->info.def_char) return 1;
75 return (wch == table->info.def_unicode_char);
76}
77
78/* query necessary dst length for src string */
Alexandre Julliard8a374502002-04-11 17:36:09 +000079static int get_length_sbcs( const struct sbcs_table *table, int flags,
80 const WCHAR *src, unsigned int srclen, int *used )
Alexandre Julliarde709cdb2000-12-29 03:56:06 +000081{
Alexandre Julliard8a374502002-04-11 17:36:09 +000082 const unsigned char * const uni2cp_low = table->uni2cp_low;
83 const unsigned short * const uni2cp_high = table->uni2cp_high;
84 int ret, tmp;
85 WCHAR composed;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +000086
Alexandre Julliard8a374502002-04-11 17:36:09 +000087 if (!used) used = &tmp; /* avoid checking on every char */
88 *used = 0;
89
90 for (ret = 0; srclen; ret++, src++, srclen--)
Alexandre Julliarde709cdb2000-12-29 03:56:06 +000091 {
Alexandre Julliard8a374502002-04-11 17:36:09 +000092 WCHAR wch = *src;
93 unsigned char ch;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +000094
Alexandre Julliard8a374502002-04-11 17:36:09 +000095 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
Alexandre Julliarde709cdb2000-12-29 03:56:06 +000096 {
Alexandre Julliard8a374502002-04-11 17:36:09 +000097 /* now check if we can use the composed char */
98 ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
99 if (is_valid_sbcs_mapping( table, flags, composed, ch ))
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000100 {
Alexandre Julliard8a374502002-04-11 17:36:09 +0000101 /* we have a good mapping, use it */
102 src++;
103 srclen--;
104 continue;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000105 }
Alexandre Julliard8a374502002-04-11 17:36:09 +0000106 /* no mapping for the composed char, check the other flags */
107 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
108 {
109 *used = 1;
110 src++; /* skip the non-spacing char */
111 srclen--;
112 continue;
113 }
114 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
115 {
116 src++;
117 srclen--;
118 }
119 /* WC_SEPCHARS is the default */
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000120 }
Alexandre Julliard8a374502002-04-11 17:36:09 +0000121 if (!*used)
122 {
123 ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
124 *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
125 }
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000126 }
127 return ret;
128}
129
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000130/* wcstombs for single-byte code page */
131static inline int wcstombs_sbcs( const struct sbcs_table *table,
Alexandre Julliard261abcd2000-06-12 01:16:11 +0000132 const WCHAR *src, unsigned int srclen,
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000133 char *dst, unsigned int dstlen )
134{
135 const unsigned char * const uni2cp_low = table->uni2cp_low;
136 const unsigned short * const uni2cp_high = table->uni2cp_high;
137 int ret = srclen;
138
139 if (dstlen < srclen)
140 {
141 /* buffer too small: fill it up to dstlen and return error */
142 srclen = dstlen;
143 ret = -1;
144 }
145
146 for (;;)
147 {
148 switch(srclen)
149 {
150 default:
151 case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
152 case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
153 case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
154 case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
155 case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
156 case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
157 case 10: dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
158 case 9: dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
159 case 8: dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
160 case 7: dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
161 case 6: dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
162 case 5: dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
163 case 4: dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
164 case 3: dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
165 case 2: dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
166 case 1: dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
167 case 0: break;
168 }
169 if (srclen < 16) return ret;
170 dst += 16;
171 src += 16;
172 srclen -= 16;
173 }
174}
175
Alexandre Julliard66593082000-06-10 04:29:16 +0000176/* slow version of wcstombs_sbcs that handles the various flags */
177static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
Alexandre Julliard261abcd2000-06-12 01:16:11 +0000178 const WCHAR *src, unsigned int srclen,
Alexandre Julliard66593082000-06-10 04:29:16 +0000179 char *dst, unsigned int dstlen,
180 const char *defchar, int *used )
181{
Alexandre Julliard66593082000-06-10 04:29:16 +0000182 const unsigned char * const uni2cp_low = table->uni2cp_low;
183 const unsigned short * const uni2cp_high = table->uni2cp_high;
184 const unsigned char table_default = table->info.def_char & 0xff;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000185 unsigned int len;
186 int tmp;
187 WCHAR composed;
Alexandre Julliard66593082000-06-10 04:29:16 +0000188
189 if (!defchar) defchar = &table_default;
190 if (!used) used = &tmp; /* avoid checking on every char */
James Hathewaye21ead402001-04-04 18:31:13 +0000191 *used = 0;
Alexandre Julliard66593082000-06-10 04:29:16 +0000192
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000193 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
Alexandre Julliard66593082000-06-10 04:29:16 +0000194 {
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000195 WCHAR wch = *src;
196
197 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
Alexandre Julliard66593082000-06-10 04:29:16 +0000198 {
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000199 /* now check if we can use the composed char */
200 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
201 if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
202 {
203 /* we have a good mapping, use it */
204 src++;
205 srclen--;
206 continue;
207 }
208 /* no mapping for the composed char, check the other flags */
209 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
210 {
211 *dst = *defchar;
212 *used = 1;
213 src++; /* skip the non-spacing char */
214 srclen--;
215 continue;
216 }
217 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
218 {
219 src++;
220 srclen--;
221 }
222 /* WC_SEPCHARS is the default */
223 }
224
225 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
226 if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
227 {
228 *dst = *defchar;
Alexandre Julliard66593082000-06-10 04:29:16 +0000229 *used = 1;
230 }
Alexandre Julliard66593082000-06-10 04:29:16 +0000231 }
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000232 if (srclen) return -1; /* overflow */
233 return dstlen - len;
234}
235
236
237/****************************************************************/
238/* dbcs support */
239
240/* check if 'ch' is an acceptable dbcs mapping for 'wch' */
241static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
242 WCHAR wch, unsigned short ch )
243{
244 if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
245 if (flags & WC_NO_BEST_FIT_CHARS)
246 {
247 /* check if char maps back to the same Unicode value */
248 if (ch & 0xff00)
249 {
250 unsigned char off = table->cp2uni_leadbytes[ch >> 8];
251 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
252 }
253 return (table->cp2uni[ch & 0xff] == wch);
254 }
255 return 1;
Alexandre Julliard66593082000-06-10 04:29:16 +0000256}
257
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000258/* query necessary dst length for src string */
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000259static int get_length_dbcs( const struct dbcs_table *table, int flags,
260 const WCHAR *src, unsigned int srclen,
Alexandre Julliard8a374502002-04-11 17:36:09 +0000261 const char *defchar, int *used )
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000262{
263 const unsigned short * const uni2cp_low = table->uni2cp_low;
264 const unsigned short * const uni2cp_high = table->uni2cp_high;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000265 WCHAR defchar_value = table->info.def_char;
266 WCHAR composed;
Alexandre Julliard8a374502002-04-11 17:36:09 +0000267 int len, tmp;
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000268
Alexandre Julliard8a374502002-04-11 17:36:09 +0000269 if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000270 {
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000271 for (len = 0; srclen; srclen--, src++, len++)
272 {
273 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
274 }
275 return len;
276 }
277
278 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
Alexandre Julliard8a374502002-04-11 17:36:09 +0000279 if (!used) used = &tmp; /* avoid checking on every char */
280 *used = 0;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000281 for (len = 0; srclen; len++, srclen--, src++)
282 {
283 unsigned short res;
284 WCHAR wch = *src;
285
286 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
287 {
288 /* now check if we can use the composed char */
289 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
290
291 if (is_valid_dbcs_mapping( table, flags, composed, res ))
292 {
293 /* we have a good mapping for the composed char, use it */
294 if (res & 0xff00) len++;
295 src++;
296 srclen--;
297 continue;
298 }
299 /* no mapping for the composed char, check the other flags */
300 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
301 {
302 if (defchar_value & 0xff00) len++;
Alexandre Julliard8a374502002-04-11 17:36:09 +0000303 *used = 1;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000304 src++; /* skip the non-spacing char */
305 srclen--;
306 continue;
307 }
308 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
309 {
310 src++;
311 srclen--;
312 }
313 /* WC_SEPCHARS is the default */
314 }
315
316 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
Alexandre Julliard8a374502002-04-11 17:36:09 +0000317 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
318 {
319 res = defchar_value;
320 *used = 1;
321 }
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000322 if (res & 0xff00) len++;
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000323 }
324 return len;
325}
326
327/* wcstombs for double-byte code page */
328static inline int wcstombs_dbcs( const struct dbcs_table *table,
Alexandre Julliard261abcd2000-06-12 01:16:11 +0000329 const WCHAR *src, unsigned int srclen,
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000330 char *dst, unsigned int dstlen )
331{
332 const unsigned short * const uni2cp_low = table->uni2cp_low;
333 const unsigned short * const uni2cp_high = table->uni2cp_high;
334 int len;
335
336 for (len = dstlen; srclen && len; len--, srclen--, src++)
337 {
338 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
339 if (res & 0xff00)
340 {
341 if (len == 1) break; /* do not output a partial char */
342 len--;
343 *dst++ = res >> 8;
344 }
345 *dst++ = (char)res;
346 }
347 if (srclen) return -1; /* overflow */
348 return dstlen - len;
349}
350
Alexandre Julliard66593082000-06-10 04:29:16 +0000351/* slow version of wcstombs_dbcs that handles the various flags */
352static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
Alexandre Julliard261abcd2000-06-12 01:16:11 +0000353 const WCHAR *src, unsigned int srclen,
Alexandre Julliard66593082000-06-10 04:29:16 +0000354 char *dst, unsigned int dstlen,
355 const char *defchar, int *used )
356{
357 const unsigned short * const uni2cp_low = table->uni2cp_low;
358 const unsigned short * const uni2cp_high = table->uni2cp_high;
Alexandre Julliard261abcd2000-06-12 01:16:11 +0000359 WCHAR defchar_value = table->info.def_char;
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000360 WCHAR composed;
Alexandre Julliard66593082000-06-10 04:29:16 +0000361 int len, tmp;
362
363 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
364 if (!used) used = &tmp; /* avoid checking on every char */
James Hathewaye21ead402001-04-04 18:31:13 +0000365 *used = 0;
Alexandre Julliard66593082000-06-10 04:29:16 +0000366
367 for (len = dstlen; srclen && len; len--, srclen--, src++)
368 {
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000369 unsigned short res;
370 WCHAR wch = *src;
Alexandre Julliard66593082000-06-10 04:29:16 +0000371
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000372 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
373 {
374 /* now check if we can use the composed char */
375 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
376
377 if (is_valid_dbcs_mapping( table, flags, composed, res ))
378 {
379 /* we have a good mapping for the composed char, use it */
380 src++;
381 srclen--;
382 goto output_char;
383 }
384 /* no mapping for the composed char, check the other flags */
385 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
386 {
387 res = defchar_value;
388 *used = 1;
389 src++; /* skip the non-spacing char */
390 srclen--;
391 goto output_char;
392 }
393 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
394 {
395 src++;
396 srclen--;
397 }
398 /* WC_SEPCHARS is the default */
399 }
400
401 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
402 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
Alexandre Julliard66593082000-06-10 04:29:16 +0000403 {
404 res = defchar_value;
405 *used = 1;
406 }
Alexandre Julliard66593082000-06-10 04:29:16 +0000407
Alexandre Julliarde709cdb2000-12-29 03:56:06 +0000408 output_char:
Alexandre Julliard66593082000-06-10 04:29:16 +0000409 if (res & 0xff00)
410 {
411 if (len == 1) break; /* do not output a partial char */
412 len--;
413 *dst++ = res >> 8;
414 }
415 *dst++ = (char)res;
416 }
417 if (srclen) return -1; /* overflow */
418 return dstlen - len;
419}
420
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000421/* wide char to multi byte string conversion */
422/* return -1 on dst buffer overflow */
423int cp_wcstombs( const union cptable *table, int flags,
Alexandre Julliard261abcd2000-06-12 01:16:11 +0000424 const WCHAR *src, int srclen,
Alexandre Julliard66593082000-06-10 04:29:16 +0000425 char *dst, int dstlen, const char *defchar, int *used )
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000426{
427 if (table->info.char_size == 1)
428 {
Alexandre Julliard66593082000-06-10 04:29:16 +0000429 if (flags || defchar || used)
Alexandre Julliard8a374502002-04-11 17:36:09 +0000430 {
431 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
Alexandre Julliard66593082000-06-10 04:29:16 +0000432 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
433 dst, dstlen, defchar, used );
Alexandre Julliard8a374502002-04-11 17:36:09 +0000434 }
435 if (!dstlen) return srclen;
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000436 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
437 }
438 else /* mbcs */
439 {
Alexandre Julliard8a374502002-04-11 17:36:09 +0000440 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
Alexandre Julliard66593082000-06-10 04:29:16 +0000441 if (flags || defchar || used)
442 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
443 dst, dstlen, defchar, used );
Bertho Stultiens30855912000-06-13 04:34:41 +0000444 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
Alexandre Julliardfb270dd2000-06-09 05:27:21 +0000445 }
446}