| /* |
| * Unicode Bidirectional Algorithm implementation |
| * |
| * Copyright 2003 Shachar Shemesh |
| * Copyright 2007 Maarten Lankhorst |
| * Copyright 2010 CodeWeavers, Aric Stewart |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA |
| * |
| * Code derived from the modified reference implementation |
| * that was found in revision 17 of http://unicode.org/reports/tr9/ |
| * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM" |
| * |
| * -- Copyright (C) 1999-2005, ASMUS, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of the Unicode data files and any associated documentation (the |
| * "Data Files") or Unicode software and any associated documentation (the |
| * "Software") to deal in the Data Files or Software without restriction, |
| * including without limitation the rights to use, copy, modify, merge, |
| * publish, distribute, and/or sell copies of the Data Files or Software, |
| * and to permit persons to whom the Data Files or Software are furnished |
| * to do so, provided that (a) the above copyright notice(s) and this |
| * permission notice appear with all copies of the Data Files or Software, |
| * (b) both the above copyright notice(s) and this permission notice appear |
| * in associated documentation, and (c) there is clear notice in each |
| * modified Data File or in the Software as well as in the documentation |
| * associated with the Data File(s) or Software that the data or software |
| * has been modified. |
| */ |
| |
| #include <stdarg.h> |
| |
| #include "windef.h" |
| #include "winbase.h" |
| #include "wine/debug.h" |
| |
| #include "dwrite_private.h" |
| |
| WINE_DEFAULT_DEBUG_CHANNEL(bidi); |
| |
| extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN; |
| |
| #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0) |
| #define MAX_DEPTH 125 |
| |
| #define odd(x) ((x) & 1) |
| |
| /*------------------------------------------------------------------------ |
| Bidirectional Character Types |
| |
| as defined by the Unicode Bidirectional Algorithm Table 3-7. |
| |
| Note: |
| |
| The list of bidirectional character types here is not grouped the |
| same way as the table 3-7, since the numberic values for the types |
| are chosen to keep the state and action tables compact. |
| ------------------------------------------------------------------------*/ |
| enum directions |
| { |
| /* input types */ |
| /* ON MUST be zero, code relies on ON = NI = 0 */ |
| ON = 0, /* Other Neutral */ |
| L, /* Left Letter */ |
| R, /* Right Letter */ |
| AN, /* Arabic Number */ |
| EN, /* European Number */ |
| AL, /* Arabic Letter (Right-to-left) */ |
| NSM, /* Non-spacing Mark */ |
| CS, /* Common Separator */ |
| ES, /* European Separator */ |
| ET, /* European Terminator (post/prefix e.g. $ and %) */ |
| |
| /* resolved types */ |
| BN, /* Boundary neutral (type of RLE etc after explicit levels) */ |
| |
| /* input types, */ |
| S, /* Segment Separator (TAB) // used only in L1 */ |
| WS, /* White space // used only in L1 */ |
| B, /* Paragraph Separator (aka as PS) */ |
| |
| /* types for explicit controls */ |
| RLO, /* these are used only in X1-X9 */ |
| RLE, |
| LRO, |
| LRE, |
| PDF, |
| |
| LRI, /* Isolate formatting characters new with 6.3 */ |
| RLI, |
| FSI, |
| PDI, |
| |
| /* resolved types, also resolved directions */ |
| NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */ |
| }; |
| |
| static const char debug_type[][4] = |
| { |
| "ON", /* Other Neutral */ |
| "L", /* Left Letter */ |
| "R", /* Right Letter */ |
| "AN", /* Arabic Number */ |
| "EN", /* European Number */ |
| "AL", /* Arabic Letter (Right-to-left) */ |
| "NSM", /* Non-spacing Mark */ |
| "CS", /* Common Separator */ |
| "ES", /* European Separator */ |
| "ET", /* European Terminator (post/prefix e.g. $ and %) */ |
| "BN", /* Boundary neutral (type of RLE etc after explicit levels) */ |
| "S", /* Segment Separator (TAB) used only in L1 */ |
| "WS", /* White space used only in L1 */ |
| "B", /* Paragraph Separator (aka as PS) */ |
| "RLO", /* these are used only in X1-X9 */ |
| "RLE", |
| "LRO", |
| "LRE", |
| "PDF", |
| "LRI", /* Isolate formatting characters new with 6.3 */ |
| "RLI", |
| "FSI", |
| "PDI", |
| }; |
| |
| static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end) |
| { |
| int i, len = 0; |
| TRACE("%s:", header); |
| for (i = start; i < end && len < 200; i++) { |
| TRACE(" %s", debug_type[types[i]]); |
| len += strlen(debug_type[types[i]])+1; |
| } |
| if (i != end) |
| TRACE("..."); |
| TRACE("\n"); |
| } |
| |
| /* Convert the libwine information to the direction enum */ |
| static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count) |
| { |
| static const enum directions dir_map[16] = |
| { |
| L, /* unassigned defaults to L */ |
| L, |
| R, |
| EN, |
| ES, |
| ET, |
| AN, |
| CS, |
| B, |
| S, |
| WS, |
| ON, |
| AL, |
| NSM, |
| BN, |
| PDF /* also LRE, LRO, RLE, RLO */ |
| }; |
| |
| UINT32 i; |
| |
| for (i = 0; i < count; ++i) { |
| chartype[i] = dir_map[get_char_typeW(string[i]) >> 12]; |
| |
| switch (chartype[i]) { |
| case ES: |
| break; |
| case PDF: |
| switch (string[i]) { |
| case 0x202a: chartype[i] = LRE; break; |
| case 0x202b: chartype[i] = RLE; break; |
| case 0x202c: chartype[i] = PDF; break; |
| case 0x202d: chartype[i] = LRO; break; |
| case 0x202e: chartype[i] = RLO; break; |
| case 0x2066: chartype[i] = LRI; break; |
| case 0x2067: chartype[i] = RLI; break; |
| case 0x2068: chartype[i] = FSI; break; |
| case 0x2069: chartype[i] = PDI; break; |
| } |
| break; |
| } |
| } |
| } |
| |
| WCHAR bidi_get_mirrored_char(WCHAR ch) |
| { |
| extern const WCHAR wine_mirror_map[] DECLSPEC_HIDDEN; |
| return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)]; |
| } |
| |
| /* RESOLVE EXPLICIT */ |
| |
| static inline UINT8 get_greater_even_level(UINT8 level) |
| { |
| return odd(level) ? level + 1 : level + 2; |
| } |
| |
| static inline UINT8 get_greater_odd_level(UINT8 level) |
| { |
| return odd(level) ? level + 2 : level + 1; |
| } |
| |
| static inline UINT8 get_embedding_direction(UINT8 level) |
| { |
| return odd(level) ? R : L; |
| } |
| |
| /*------------------------------------------------------------------------ |
| Function: bidi_resolve_explicit |
| |
| Recursively resolves explicit embedding levels and overrides. |
| Implements rules X1-X9, of the Unicode Bidirectional Algorithm. |
| |
| Input: Base embedding level and direction |
| Character count |
| |
| Output: Array of embedding levels |
| |
| In/Out: Array of direction classes |
| |
| |
| Note: The function uses two simple counters to keep track of |
| matching explicit codes and PDF. Use the default argument for |
| the outermost call. The nesting counter counts the recursion |
| depth and not the embedding level. |
| ------------------------------------------------------------------------*/ |
| typedef struct tagStackItem |
| { |
| UINT8 level; |
| UINT8 override; |
| BOOL isolate; |
| } StackItem; |
| |
| #define push_stack(l,o,i) \ |
| do { stack_top--; \ |
| stack[stack_top].level = l; \ |
| stack[stack_top].override = o; \ |
| stack[stack_top].isolate = i;} while(0) |
| |
| #define pop_stack() do { stack_top++; } while(0) |
| |
| #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) |
| |
| static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count) |
| { |
| /* X1 */ |
| int overflow_isolate_count = 0; |
| int overflow_embedding_count = 0; |
| int valid_isolate_count = 0; |
| UINT32 i; |
| |
| StackItem stack[MAX_DEPTH+2]; |
| int stack_top = MAX_DEPTH+1; |
| |
| stack[stack_top].level = baselevel; |
| stack[stack_top].override = NI; |
| stack[stack_top].isolate = FALSE; |
| |
| for (i = 0; i < count; i++) { |
| UINT8 least_odd, least_even; |
| |
| switch (classes[i]) { |
| |
| /* X2 */ |
| case RLE: |
| least_odd = get_greater_odd_level(stack[stack_top].level); |
| levels[i] = valid_level(least_odd) ? least_odd : stack[stack_top].level; |
| if (valid_level(least_odd)) |
| push_stack(least_odd, NI, FALSE); |
| else if (overflow_isolate_count == 0) |
| overflow_embedding_count++; |
| break; |
| |
| /* X3 */ |
| case LRE: |
| least_even = get_greater_even_level(stack[stack_top].level); |
| levels[i] = valid_level(least_even) ? least_even : stack[stack_top].level; |
| if (valid_level(least_even)) |
| push_stack(least_even, NI, FALSE); |
| else if (overflow_isolate_count == 0) |
| overflow_embedding_count++; |
| break; |
| |
| /* X4 */ |
| case RLO: |
| least_odd = get_greater_odd_level(stack[stack_top].level); |
| levels[i] = stack[stack_top].level; |
| if (valid_level(least_odd)) |
| push_stack(least_odd, R, FALSE); |
| else if (overflow_isolate_count == 0) |
| overflow_embedding_count++; |
| break; |
| |
| /* X5 */ |
| case LRO: |
| least_even = get_greater_even_level(stack[stack_top].level); |
| levels[i] = stack[stack_top].level; |
| if (valid_level(least_even)) |
| push_stack(least_even, L, FALSE); |
| else if (overflow_isolate_count == 0) |
| overflow_embedding_count++; |
| break; |
| |
| /* X5a */ |
| case RLI: |
| least_odd = get_greater_odd_level(stack[stack_top].level); |
| levels[i] = stack[stack_top].level; |
| if (valid_level(least_odd)) |
| { |
| valid_isolate_count++; |
| push_stack(least_odd, NI, TRUE); |
| } |
| else |
| overflow_isolate_count++; |
| break; |
| |
| /* X5b */ |
| case LRI: |
| least_even = get_greater_even_level(stack[stack_top].level); |
| levels[i] = stack[stack_top].level; |
| if (valid_level(least_even)) |
| { |
| valid_isolate_count++; |
| push_stack(least_even, NI, TRUE); |
| } |
| else |
| overflow_isolate_count++; |
| break; |
| |
| /* X5c */ |
| case FSI: |
| { |
| UINT8 new_level = 0; |
| int skipping = 0; |
| int j; |
| |
| levels[i] = stack[stack_top].level; |
| for (j = i+1; j < count; j++) |
| { |
| if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI) |
| { |
| skipping++; |
| continue; |
| } |
| else if (classes[j] == PDI) |
| { |
| if (skipping) |
| skipping --; |
| else |
| break; |
| continue; |
| } |
| |
| if (skipping) continue; |
| |
| if (classes[j] == L) |
| { |
| new_level = 0; |
| break; |
| } |
| else if (classes[j] == R || classes[j] == AL) |
| { |
| new_level = 1; |
| break; |
| } |
| } |
| if (odd(new_level)) |
| { |
| least_odd = get_greater_odd_level(stack[stack_top].level); |
| if (valid_level(least_odd)) |
| { |
| valid_isolate_count++; |
| push_stack(least_odd, NI, TRUE); |
| } |
| else |
| overflow_isolate_count++; |
| } |
| else |
| { |
| least_even = get_greater_even_level(stack[stack_top].level); |
| if (valid_level(least_even)) |
| { |
| valid_isolate_count++; |
| push_stack(least_even, NI, TRUE); |
| } |
| else |
| overflow_isolate_count++; |
| } |
| break; |
| } |
| |
| /* X6 */ |
| case ON: |
| case L: |
| case R: |
| case AN: |
| case EN: |
| case AL: |
| case NSM: |
| case CS: |
| case ES: |
| case ET: |
| case S: |
| case WS: |
| levels[i] = stack[stack_top].level; |
| if (stack[stack_top].override != NI) |
| classes[i] = stack[stack_top].override; |
| break; |
| |
| /* X6a */ |
| case PDI: |
| if (overflow_isolate_count) overflow_isolate_count--; |
| else if (!valid_isolate_count) {/* do nothing */} |
| else |
| { |
| overflow_embedding_count = 0; |
| while (!stack[stack_top].isolate) pop_stack(); |
| pop_stack(); |
| valid_isolate_count--; |
| } |
| levels[i] = stack[stack_top].level; |
| break; |
| |
| /* X7 */ |
| case PDF: |
| levels[i] = stack[stack_top].level; |
| if (overflow_isolate_count) {/* do nothing */} |
| else if (overflow_embedding_count) overflow_embedding_count--; |
| else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1)) |
| pop_stack(); |
| break; |
| |
| /* X8 */ |
| default: |
| levels[i] = baselevel; |
| break; |
| } |
| } |
| /* X9: Based on 5.2 Retaining Explicit Formatting Characters */ |
| for (i = 0; i < count ; i++) |
| if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF) |
| classes[i] = BN; |
| } |
| |
| static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence) |
| { |
| if (index == -1 || index == back_fence) return index; |
| index--; |
| while (index > back_fence && classes[index] == BN) index--; |
| return index; |
| } |
| |
| static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence) |
| { |
| if (index == front_fence) return index; |
| index++; |
| while (index < front_fence && classes[index] == BN) index++; |
| return index; |
| } |
| |
| typedef struct tagRun |
| { |
| int start; |
| int end; |
| UINT8 e; |
| } Run; |
| |
| typedef struct tagRunChar |
| { |
| WCHAR ch; |
| UINT8 *class; |
| } RunChar; |
| |
| typedef struct tagIsolatedRun |
| { |
| struct list entry; |
| int length; |
| UINT8 sos; |
| UINT8 eos; |
| UINT8 e; |
| |
| RunChar item[1]; |
| } IsolatedRun; |
| |
| static inline int get_next_valid_char_from_run(IsolatedRun *run, int index) |
| { |
| if (index >= (run->length-1)) return -1; |
| index++; |
| while (index < run->length && *run->item[index].class == BN) index++; |
| if (index == run->length) return -1; |
| return index; |
| } |
| |
| static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index) |
| { |
| if (index <= 0) return -1; |
| index--; |
| while (index > -1 && *run->item[index].class == BN) index--; |
| return index; |
| } |
| |
| static inline void iso_dump_types(const char* header, IsolatedRun *run) |
| { |
| int i, len = 0; |
| TRACE("%s:",header); |
| TRACE("[ "); |
| for (i = 0; i < run->length && len < 200; i++) { |
| TRACE(" %s", debug_type[*run->item[i].class]); |
| len += strlen(debug_type[*run->item[i].class])+1; |
| } |
| if (i != run->length) |
| TRACE("..."); |
| TRACE(" ]\n"); |
| } |
| |
| /*------------------------------------------------------------------------ |
| Function: bidi_resolve_weak |
| |
| Resolves the directionality of numeric and other weak character types |
| |
| Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. |
| |
| Input: Array of embedding levels |
| Character count |
| |
| In/Out: Array of directional classes |
| |
| Note: On input only these directional classes are expected |
| AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS, |
| ------------------------------------------------------------------------*/ |
| static BOOL bidi_is_isolate(UINT8 class) |
| { |
| return class == LRI || class == RLI || class == FSI || class == PDI; |
| } |
| |
| static void bidi_resolve_weak(IsolatedRun *iso_run) |
| { |
| int i; |
| |
| /* W1 */ |
| for (i=0; i < iso_run->length; i++) { |
| if (*iso_run->item[i].class == NSM) { |
| int j = get_prev_valid_char_from_run(iso_run, i); |
| if (j == -1) |
| *iso_run->item[i].class = iso_run->sos; |
| else if (bidi_is_isolate(*iso_run->item[j].class)) |
| *iso_run->item[i].class = ON; |
| else |
| *iso_run->item[i].class = *iso_run->item[j].class; |
| } |
| } |
| |
| /* W2 */ |
| for (i = 0; i < iso_run->length; i++) { |
| if (*iso_run->item[i].class == EN) { |
| int j = get_prev_valid_char_from_run(iso_run, i); |
| while (j > -1) { |
| if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) { |
| if (*iso_run->item[j].class == AL) |
| *iso_run->item[i].class = AN; |
| break; |
| } |
| j = get_prev_valid_char_from_run(iso_run, j); |
| } |
| } |
| } |
| |
| /* W3 */ |
| for (i = 0; i < iso_run->length; i++) { |
| if (*iso_run->item[i].class == AL) |
| *iso_run->item[i].class = R; |
| } |
| |
| /* W4 */ |
| for (i = 0; i < iso_run->length; i++) { |
| if (*iso_run->item[i].class == ES) { |
| int b = get_prev_valid_char_from_run(iso_run, i); |
| int f = get_next_valid_char_from_run(iso_run, i); |
| |
| if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN) |
| *iso_run->item[i].class = EN; |
| } |
| else if (*iso_run->item[i].class == CS) { |
| int b = get_prev_valid_char_from_run(iso_run, i); |
| int f = get_next_valid_char_from_run(iso_run, i); |
| |
| if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN) |
| *iso_run->item[i].class = EN; |
| else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN) |
| *iso_run->item[i].class = AN; |
| } |
| } |
| |
| /* W5 */ |
| for (i = 0; i < iso_run->length; i++) { |
| if (*iso_run->item[i].class == ET) { |
| int j; |
| for (j = i-1 ; j > -1; j--) { |
| if (*iso_run->item[j].class == BN) continue; |
| if (*iso_run->item[j].class == ET) continue; |
| else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN; |
| else break; |
| } |
| if (*iso_run->item[i].class == ET) { |
| for (j = i+1; j < iso_run->length; j++) { |
| if (*iso_run->item[j].class == BN) continue; |
| if (*iso_run->item[j].class == ET) continue; |
| else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN; |
| else break; |
| } |
| } |
| } |
| } |
| |
| /* W6 */ |
| for (i = 0; i < iso_run->length; i++) { |
| if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON) |
| { |
| int b = i-1; |
| int f = i+1; |
| if (b > -1 && *iso_run->item[b].class == BN) |
| *iso_run->item[b].class = ON; |
| if (f < iso_run->length && *iso_run->item[f].class == BN) |
| *iso_run->item[f].class = ON; |
| |
| *iso_run->item[i].class = ON; |
| } |
| } |
| |
| /* W7 */ |
| for (i = 0; i < iso_run->length; i++) { |
| if (*iso_run->item[i].class == EN) { |
| int j; |
| for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j)) |
| if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) { |
| if (*iso_run->item[j].class == L) |
| *iso_run->item[i].class = L; |
| break; |
| } |
| if (iso_run->sos == L && j == -1) |
| *iso_run->item[i].class = L; |
| } |
| } |
| } |
| |
| typedef struct tagBracketPair |
| { |
| int start; |
| int end; |
| } BracketPair; |
| |
| static int bracketpair_compr(const void *a, const void* b) |
| { |
| return ((BracketPair*)a)->start - ((BracketPair*)b)->start; |
| } |
| |
| static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run) |
| { |
| WCHAR *open_stack; |
| int *stack_index; |
| int stack_top = iso_run->length; |
| BracketPair *out = NULL; |
| int pair_count = 0; |
| int i; |
| |
| open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length); |
| stack_index = heap_alloc(sizeof(int) * iso_run->length); |
| |
| for (i = 0; i < iso_run->length; i++) { |
| unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch); |
| if (ubv) { |
| if (!out) { |
| out = heap_alloc(sizeof(BracketPair)); |
| out[0].start = -1; |
| } |
| |
| if ((ubv >> 8) == 0) { |
| stack_top--; |
| open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff); |
| /* deal with canonical equivalent U+2329/232A and U+3008/3009 */ |
| if (open_stack[stack_top] == 0x232A) |
| open_stack[stack_top] = 0x3009; |
| stack_index[stack_top] = i; |
| } |
| else if ((ubv >> 8) == 1) { |
| int j; |
| |
| if (stack_top == iso_run->length) continue; |
| for (j = stack_top; j < iso_run->length; j++) { |
| WCHAR c = iso_run->item[i].ch; |
| if (c == 0x232A) c = 0x3009; |
| if (c == open_stack[j]) { |
| out[pair_count].start = stack_index[j]; |
| out[pair_count].end = i; |
| pair_count++; |
| out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1)); |
| out[pair_count].start = -1; |
| stack_top = j+1; |
| break; |
| } |
| } |
| } |
| } |
| } |
| if (pair_count == 0) { |
| heap_free(out); |
| out = NULL; |
| } |
| else if (pair_count > 1) |
| qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr); |
| |
| heap_free(open_stack); |
| heap_free(stack_index); |
| return out; |
| } |
| |
| static inline UINT8 get_rule_N0_class(UINT8 class) |
| { |
| return (class == AN || class == EN) ? R : class; |
| } |
| |
| /*------------------------------------------------------------------------ |
| Function: bidi_resolve_neutrals |
| |
| Resolves the directionality of neutral character types. |
| |
| Implements rules N1 and N2 of the Unicode Bidi Algorithm. |
| |
| Input: Array of embedding levels |
| Character count |
| Baselevel |
| |
| In/Out: Array of directional classes |
| |
| Note: On input only these directional classes are expected |
| R, L, NI, AN, EN and BN |
| |
| W8 resolves a number of ENs to L |
| ------------------------------------------------------------------------*/ |
| static void bidi_resolve_neutrals(IsolatedRun *run) |
| { |
| BracketPair *pairs; |
| int i; |
| |
| /* Translate isolates into NI */ |
| for (i = 0; i < run->length; i++) { |
| switch (*run->item[i].class) { |
| case B: |
| case S: |
| case WS: |
| case FSI: |
| case LRI: |
| case RLI: |
| case PDI: *run->item[i].class = NI; |
| } |
| |
| /* "Only NI, L, R, AN, EN and BN are allowed" */ |
| ASSERT(*run->item[i].class <= EN || *run->item[i].class == BN); |
| } |
| |
| /* N0: Skipping bracketed pairs for now */ |
| pairs = bidi_compute_bracket_pairs(run); |
| if (pairs) { |
| BracketPair *p = pairs; |
| int i = 0; |
| while (p->start >= 0) { |
| UINT8 e = get_embedding_direction(run->e); |
| UINT8 o = get_embedding_direction(run->e + 1); |
| BOOL flag_o = FALSE; |
| int j; |
| |
| TRACE("Bracket Pair [%i - %i]\n", p->start, p->end); |
| |
| /* N0.b */ |
| for (j = p->start+1; j < p->end; j++) { |
| if (get_rule_N0_class(*run->item[j].class) == e) { |
| *run->item[p->start].class = e; |
| *run->item[p->end].class = e; |
| break; |
| } |
| else if (get_rule_N0_class(*run->item[j].class) == o) |
| flag_o = TRUE; |
| } |
| /* N0.c */ |
| if (j == p->end && flag_o) { |
| for (j = p->start; j >= 0; j--) { |
| if (get_rule_N0_class(*run->item[j].class) == o) { |
| *run->item[p->start].class = o; |
| *run->item[p->end].class = o; |
| break; |
| } |
| else if (get_rule_N0_class(*run->item[j].class) == e) { |
| *run->item[p->start].class = e; |
| *run->item[p->end].class = e; |
| break; |
| } |
| } |
| if (j < 0) { |
| *run->item[p->start].class = run->sos; |
| *run->item[p->end].class = run->sos; |
| } |
| } |
| |
| i++; |
| p = &pairs[i]; |
| } |
| heap_free(pairs); |
| } |
| |
| /* N1 */ |
| for (i = 0; i < run->length; i++) { |
| UINT8 l, r; |
| |
| if (*run->item[i].class == NI) { |
| int b = get_prev_valid_char_from_run(run, i); |
| int j; |
| |
| if (b == -1) { |
| l = run->sos; |
| b = 0; |
| } |
| else { |
| if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN) |
| l = R; |
| else if (*run->item[b].class == L) |
| l = L; |
| else /* No string type */ |
| continue; |
| } |
| j = get_next_valid_char_from_run(run, i); |
| while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j); |
| if (j == -1) { |
| r = run->eos; |
| j = run->length; |
| } |
| else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN) |
| r = R; |
| else if (*run->item[j].class == L) |
| r = L; |
| else /* No string type */ |
| continue; |
| |
| if (r == l) { |
| for (b = i; b < j && b < run->length; b++) |
| *run->item[b].class = r; |
| } |
| } |
| } |
| |
| /* N2 */ |
| for (i = 0; i < run->length; i++) { |
| if (*run->item[i].class == NI) { |
| int b = i-1; |
| int f = i+1; |
| |
| *run->item[i].class = get_embedding_direction(run->e); |
| if (b > -1 && *run->item[b].class == BN) |
| *run->item[b].class = get_embedding_direction(run->e); |
| if (f < run->length && *run->item[f].class == BN) |
| *run->item[f].class = get_embedding_direction(run->e); |
| } |
| } |
| } |
| |
| /*------------------------------------------------------------------------ |
| Function: bidi_resolve_implicit |
| |
| Recursively resolves implicit embedding levels. |
| Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. |
| |
| Input: Array of direction classes |
| Character count |
| Base level |
| |
| In/Out: Array of embedding levels |
| |
| Note: levels may exceed 15 on output. |
| Accepted subset of direction classes |
| R, L, AN, EN |
| ------------------------------------------------------------------------*/ |
| static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos) |
| { |
| int i; |
| |
| /* I1/2 */ |
| for (i = sos; i <= eos; i++) { |
| if (classes[i] == BN) |
| continue; |
| |
| ASSERT(classes[i] != ON); /* "No Neutrals allowed to survive here." */ |
| ASSERT(classes[i] <= EN); /* "Out of range." */ |
| |
| if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN)) |
| levels[i]++; |
| else if (!odd(levels[i]) && classes[i] == R) |
| levels[i]++; |
| else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN)) |
| levels[i] += 2; |
| } |
| } |
| |
| static inline BOOL is_rule_L1_reset_class(UINT8 class) |
| { |
| switch (class) { |
| case WS: |
| case FSI: |
| case LRI: |
| case RLI: |
| case PDI: |
| case LRE: |
| case RLE: |
| case LRO: |
| case RLO: |
| case PDF: |
| case BN: |
| return TRUE; |
| default: |
| return FALSE; |
| } |
| } |
| |
| static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos) |
| { |
| int i; |
| |
| /* L1 */ |
| for (i = sos; i <= eos; i++) { |
| if (classes[i] == B || classes[i] == S) { |
| int j = i - 1; |
| while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j])) |
| levels[j--] = baselevel; |
| levels[i] = baselevel; |
| } |
| else if (classes[i] == LRE || classes[i] == RLE || classes[i] == LRO || classes[i] == RLO || |
| classes[i] == PDF || classes[i] == BN) { |
| levels[i] = i ? levels[i - 1] : baselevel; |
| } |
| if (i == eos && is_rule_L1_reset_class(classes[i])) { |
| int j = i; |
| while (j >= sos && is_rule_L1_reset_class(classes[j])) |
| levels[j--] = baselevel; |
| } |
| } |
| } |
| |
| static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set) |
| { |
| int run_start, run_end, i; |
| int run_count = 0; |
| HRESULT hr = S_OK; |
| Run *runs; |
| |
| runs = heap_alloc(count * sizeof(Run)); |
| if (!runs) |
| return E_OUTOFMEMORY; |
| |
| list_init(set); |
| |
| /* Build Runs */ |
| run_start = 0; |
| while (run_start < count) { |
| run_end = get_next_valid_char_index(classes, run_start, count); |
| while (run_end < count && levels[run_end] == levels[run_start]) |
| run_end = get_next_valid_char_index(classes, run_end, count); |
| run_end--; |
| runs[run_count].start = run_start; |
| runs[run_count].end = run_end; |
| runs[run_count].e = levels[run_start]; |
| run_start = get_next_valid_char_index(classes, run_end, count); |
| run_count++; |
| } |
| |
| /* Build Isolating Runs */ |
| i = 0; |
| while (i < run_count) { |
| int k = i; |
| if (runs[k].start >= 0) { |
| IsolatedRun *current_isolated; |
| int type_fence, real_end; |
| int j; |
| |
| current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count); |
| if (!current_isolated) { |
| hr = E_OUTOFMEMORY; |
| break; |
| } |
| |
| run_start = runs[k].start; |
| current_isolated->e = runs[k].e; |
| current_isolated->length = (runs[k].end - runs[k].start)+1; |
| |
| for (j = 0; j < current_isolated->length; j++) { |
| current_isolated->item[j].class = &classes[runs[k].start+j]; |
| current_isolated->item[j].ch = string[runs[k].start+j]; |
| } |
| |
| run_end = runs[k].end; |
| |
| TRACE("{ [%i -- %i]",run_start, run_end); |
| |
| if (classes[run_end] == BN) |
| run_end = get_prev_valid_char_index(classes, run_end, runs[k].start); |
| |
| while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) { |
| j = k+1; |
| search: |
| while (j < run_count && classes[runs[j].start] != PDI) j++; |
| if (j < run_count && runs[i].e != runs[j].e) { |
| j++; |
| goto search; |
| } |
| |
| if (j != run_count) { |
| int l = current_isolated->length; |
| int m; |
| |
| current_isolated->length += (runs[j].end - runs[j].start)+1; |
| for (m = 0; l < current_isolated->length; l++, m++) { |
| current_isolated->item[l].class = &classes[runs[j].start+m]; |
| current_isolated->item[l].ch = string[runs[j].start+m]; |
| } |
| |
| TRACE("[%i -- %i]", runs[j].start, runs[j].end); |
| |
| run_end = runs[j].end; |
| if (classes[run_end] == BN) |
| run_end = get_prev_valid_char_index(classes, run_end, runs[i].start); |
| runs[j].start = -1; |
| k = j; |
| } |
| else { |
| run_end = count; |
| break; |
| } |
| } |
| |
| type_fence = get_prev_valid_char_index(classes, run_start, -1); |
| |
| if (type_fence == -1) |
| current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start]; |
| else |
| current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start]; |
| |
| current_isolated->sos = get_embedding_direction(current_isolated->sos); |
| |
| if (run_end == count) |
| current_isolated->eos = current_isolated->sos; |
| else { |
| /* eos could be an BN */ |
| if (classes[run_end] == BN) { |
| real_end = get_prev_valid_char_index(classes, run_end, run_start-1); |
| if (real_end < run_start) |
| real_end = run_start; |
| } |
| else |
| real_end = run_end; |
| |
| type_fence = get_next_valid_char_index(classes, run_end, count); |
| if (type_fence == count) |
| current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end]; |
| else |
| current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end]; |
| |
| current_isolated->eos = get_embedding_direction(current_isolated->eos); |
| } |
| |
| list_add_tail(set, ¤t_isolated->entry); |
| TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]); |
| } |
| i++; |
| } |
| |
| heap_free(runs); |
| return hr; |
| } |
| |
| HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels) |
| { |
| IsolatedRun *iso_run, *next; |
| struct list IsolatingRuns; |
| UINT8 *chartype; |
| HRESULT hr; |
| |
| TRACE("%s, %u\n", debugstr_wn(string, count), count); |
| |
| chartype = heap_alloc(count*sizeof(*chartype)); |
| if (!chartype) |
| return E_OUTOFMEMORY; |
| |
| bidi_classify(string, chartype, count); |
| if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count); |
| |
| bidi_resolve_explicit(baselevel, chartype, levels, count); |
| memcpy(explicit, levels, count*sizeof(*explicit)); |
| |
| if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count); |
| |
| /* X10/BD13: Compute Isolating runs */ |
| hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns); |
| if (FAILED(hr)) |
| goto done; |
| |
| LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry) |
| { |
| if (TRACE_ON(bidi)) iso_dump_types("run", iso_run); |
| |
| bidi_resolve_weak(iso_run); |
| if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run); |
| |
| bidi_resolve_neutrals(iso_run); |
| if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run); |
| |
| list_remove(&iso_run->entry); |
| heap_free(iso_run); |
| } |
| |
| if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count); |
| bidi_resolve_implicit(chartype, levels, 0, count-1); |
| |
| bidi_classify(string, chartype, count); |
| bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1); |
| |
| done: |
| heap_free(chartype); |
| return hr; |
| } |