blob: d4ee2ca0f7ccc8d14519a5a93d124c5b6aafef59 [file] [log] [blame]
/*
* Unicode Bidirectional Algorithm implementation
*
* Copyright 2003 Shachar Shemesh
* Copyright 2007 Maarten Lankhorst
* Copyright 2010 CodeWeavers, Aric Stewart
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*
* Code derived from the modified reference implementation
* that was found in revision 17 of http://unicode.org/reports/tr9/
* "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
*
* -- Copyright (C) 1999-2005, ASMUS, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of the Unicode data files and any associated documentation (the
* "Data Files") or Unicode software and any associated documentation (the
* "Software") to deal in the Data Files or Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, and/or sell copies of the Data Files or Software,
* and to permit persons to whom the Data Files or Software are furnished
* to do so, provided that (a) the above copyright notice(s) and this
* permission notice appear with all copies of the Data Files or Software,
* (b) both the above copyright notice(s) and this permission notice appear
* in associated documentation, and (c) there is clear notice in each
* modified Data File or in the Software as well as in the documentation
* associated with the Data File(s) or Software that the data or software
* has been modified.
*/
#include <stdarg.h>
#include "windef.h"
#include "winbase.h"
#include "wine/debug.h"
#include "dwrite_private.h"
WINE_DEFAULT_DEBUG_CHANNEL(bidi);
extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
#define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
#define MAX_DEPTH 125
#define odd(x) ((x) & 1)
/*------------------------------------------------------------------------
Bidirectional Character Types
as defined by the Unicode Bidirectional Algorithm Table 3-7.
Note:
The list of bidirectional character types here is not grouped the
same way as the table 3-7, since the numberic values for the types
are chosen to keep the state and action tables compact.
------------------------------------------------------------------------*/
enum directions
{
/* input types */
/* ON MUST be zero, code relies on ON = NI = 0 */
ON = 0, /* Other Neutral */
L, /* Left Letter */
R, /* Right Letter */
AN, /* Arabic Number */
EN, /* European Number */
AL, /* Arabic Letter (Right-to-left) */
NSM, /* Non-spacing Mark */
CS, /* Common Separator */
ES, /* European Separator */
ET, /* European Terminator (post/prefix e.g. $ and %) */
/* resolved types */
BN, /* Boundary neutral (type of RLE etc after explicit levels) */
/* input types, */
S, /* Segment Separator (TAB) // used only in L1 */
WS, /* White space // used only in L1 */
B, /* Paragraph Separator (aka as PS) */
/* types for explicit controls */
RLO, /* these are used only in X1-X9 */
RLE,
LRO,
LRE,
PDF,
LRI, /* Isolate formatting characters new with 6.3 */
RLI,
FSI,
PDI,
/* resolved types, also resolved directions */
NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
};
static const char debug_type[][4] =
{
"ON", /* Other Neutral */
"L", /* Left Letter */
"R", /* Right Letter */
"AN", /* Arabic Number */
"EN", /* European Number */
"AL", /* Arabic Letter (Right-to-left) */
"NSM", /* Non-spacing Mark */
"CS", /* Common Separator */
"ES", /* European Separator */
"ET", /* European Terminator (post/prefix e.g. $ and %) */
"BN", /* Boundary neutral (type of RLE etc after explicit levels) */
"S", /* Segment Separator (TAB) used only in L1 */
"WS", /* White space used only in L1 */
"B", /* Paragraph Separator (aka as PS) */
"RLO", /* these are used only in X1-X9 */
"RLE",
"LRO",
"LRE",
"PDF",
"LRI", /* Isolate formatting characters new with 6.3 */
"RLI",
"FSI",
"PDI",
};
static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
{
int i, len = 0;
TRACE("%s:", header);
for (i = start; i < end && len < 200; i++) {
TRACE(" %s", debug_type[types[i]]);
len += strlen(debug_type[types[i]])+1;
}
if (i != end)
TRACE("...");
TRACE("\n");
}
/* Convert the libwine information to the direction enum */
static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
{
static const enum directions dir_map[16] =
{
L, /* unassigned defaults to L */
L,
R,
EN,
ES,
ET,
AN,
CS,
B,
S,
WS,
ON,
AL,
NSM,
BN,
PDF /* also LRE, LRO, RLE, RLO */
};
UINT32 i;
for (i = 0; i < count; ++i) {
chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
switch (chartype[i]) {
case ES:
break;
case PDF:
switch (string[i]) {
case 0x202a: chartype[i] = LRE; break;
case 0x202b: chartype[i] = RLE; break;
case 0x202c: chartype[i] = PDF; break;
case 0x202d: chartype[i] = LRO; break;
case 0x202e: chartype[i] = RLO; break;
case 0x2066: chartype[i] = LRI; break;
case 0x2067: chartype[i] = RLI; break;
case 0x2068: chartype[i] = FSI; break;
case 0x2069: chartype[i] = PDI; break;
}
break;
}
}
}
WCHAR bidi_get_mirrored_char(WCHAR ch)
{
extern const WCHAR wine_mirror_map[] DECLSPEC_HIDDEN;
return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
}
/* RESOLVE EXPLICIT */
static inline UINT8 get_greater_even_level(UINT8 level)
{
return odd(level) ? level + 1 : level + 2;
}
static inline UINT8 get_greater_odd_level(UINT8 level)
{
return odd(level) ? level + 2 : level + 1;
}
static inline UINT8 get_embedding_direction(UINT8 level)
{
return odd(level) ? R : L;
}
/*------------------------------------------------------------------------
Function: bidi_resolve_explicit
Recursively resolves explicit embedding levels and overrides.
Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
Input: Base embedding level and direction
Character count
Output: Array of embedding levels
In/Out: Array of direction classes
Note: The function uses two simple counters to keep track of
matching explicit codes and PDF. Use the default argument for
the outermost call. The nesting counter counts the recursion
depth and not the embedding level.
------------------------------------------------------------------------*/
typedef struct tagStackItem
{
UINT8 level;
UINT8 override;
BOOL isolate;
} StackItem;
#define push_stack(l,o,i) \
do { stack_top--; \
stack[stack_top].level = l; \
stack[stack_top].override = o; \
stack[stack_top].isolate = i;} while(0)
#define pop_stack() do { stack_top++; } while(0)
#define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
{
/* X1 */
int overflow_isolate_count = 0;
int overflow_embedding_count = 0;
int valid_isolate_count = 0;
UINT32 i;
StackItem stack[MAX_DEPTH+2];
int stack_top = MAX_DEPTH+1;
stack[stack_top].level = baselevel;
stack[stack_top].override = NI;
stack[stack_top].isolate = FALSE;
for (i = 0; i < count; i++) {
UINT8 least_odd, least_even;
switch (classes[i]) {
/* X2 */
case RLE:
least_odd = get_greater_odd_level(stack[stack_top].level);
levels[i] = valid_level(least_odd) ? least_odd : stack[stack_top].level;
if (valid_level(least_odd))
push_stack(least_odd, NI, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
break;
/* X3 */
case LRE:
least_even = get_greater_even_level(stack[stack_top].level);
levels[i] = valid_level(least_even) ? least_even : stack[stack_top].level;
if (valid_level(least_even))
push_stack(least_even, NI, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
break;
/* X4 */
case RLO:
least_odd = get_greater_odd_level(stack[stack_top].level);
levels[i] = stack[stack_top].level;
if (valid_level(least_odd))
push_stack(least_odd, R, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
break;
/* X5 */
case LRO:
least_even = get_greater_even_level(stack[stack_top].level);
levels[i] = stack[stack_top].level;
if (valid_level(least_even))
push_stack(least_even, L, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
break;
/* X5a */
case RLI:
least_odd = get_greater_odd_level(stack[stack_top].level);
levels[i] = stack[stack_top].level;
if (valid_level(least_odd))
{
valid_isolate_count++;
push_stack(least_odd, NI, TRUE);
}
else
overflow_isolate_count++;
break;
/* X5b */
case LRI:
least_even = get_greater_even_level(stack[stack_top].level);
levels[i] = stack[stack_top].level;
if (valid_level(least_even))
{
valid_isolate_count++;
push_stack(least_even, NI, TRUE);
}
else
overflow_isolate_count++;
break;
/* X5c */
case FSI:
{
UINT8 new_level = 0;
int skipping = 0;
int j;
levels[i] = stack[stack_top].level;
for (j = i+1; j < count; j++)
{
if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
{
skipping++;
continue;
}
else if (classes[j] == PDI)
{
if (skipping)
skipping --;
else
break;
continue;
}
if (skipping) continue;
if (classes[j] == L)
{
new_level = 0;
break;
}
else if (classes[j] == R || classes[j] == AL)
{
new_level = 1;
break;
}
}
if (odd(new_level))
{
least_odd = get_greater_odd_level(stack[stack_top].level);
if (valid_level(least_odd))
{
valid_isolate_count++;
push_stack(least_odd, NI, TRUE);
}
else
overflow_isolate_count++;
}
else
{
least_even = get_greater_even_level(stack[stack_top].level);
if (valid_level(least_even))
{
valid_isolate_count++;
push_stack(least_even, NI, TRUE);
}
else
overflow_isolate_count++;
}
break;
}
/* X6 */
case ON:
case L:
case R:
case AN:
case EN:
case AL:
case NSM:
case CS:
case ES:
case ET:
case S:
case WS:
levels[i] = stack[stack_top].level;
if (stack[stack_top].override != NI)
classes[i] = stack[stack_top].override;
break;
/* X6a */
case PDI:
if (overflow_isolate_count) overflow_isolate_count--;
else if (!valid_isolate_count) {/* do nothing */}
else
{
overflow_embedding_count = 0;
while (!stack[stack_top].isolate) pop_stack();
pop_stack();
valid_isolate_count--;
}
levels[i] = stack[stack_top].level;
break;
/* X7 */
case PDF:
levels[i] = stack[stack_top].level;
if (overflow_isolate_count) {/* do nothing */}
else if (overflow_embedding_count) overflow_embedding_count--;
else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
pop_stack();
break;
/* X8 */
default:
levels[i] = baselevel;
break;
}
}
/* X9: Based on 5.2 Retaining Explicit Formatting Characters */
for (i = 0; i < count ; i++)
if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
classes[i] = BN;
}
static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
{
if (index == -1 || index == back_fence) return index;
index--;
while (index > back_fence && classes[index] == BN) index--;
return index;
}
static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
{
if (index == front_fence) return index;
index++;
while (index < front_fence && classes[index] == BN) index++;
return index;
}
typedef struct tagRun
{
int start;
int end;
UINT8 e;
} Run;
typedef struct tagRunChar
{
WCHAR ch;
UINT8 *class;
} RunChar;
typedef struct tagIsolatedRun
{
struct list entry;
int length;
UINT8 sos;
UINT8 eos;
UINT8 e;
RunChar item[1];
} IsolatedRun;
static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
{
if (index >= (run->length-1)) return -1;
index++;
while (index < run->length && *run->item[index].class == BN) index++;
if (index == run->length) return -1;
return index;
}
static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
{
if (index <= 0) return -1;
index--;
while (index > -1 && *run->item[index].class == BN) index--;
return index;
}
static inline void iso_dump_types(const char* header, IsolatedRun *run)
{
int i, len = 0;
TRACE("%s:",header);
TRACE("[ ");
for (i = 0; i < run->length && len < 200; i++) {
TRACE(" %s", debug_type[*run->item[i].class]);
len += strlen(debug_type[*run->item[i].class])+1;
}
if (i != run->length)
TRACE("...");
TRACE(" ]\n");
}
/*------------------------------------------------------------------------
Function: bidi_resolve_weak
Resolves the directionality of numeric and other weak character types
Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
Input: Array of embedding levels
Character count
In/Out: Array of directional classes
Note: On input only these directional classes are expected
AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
------------------------------------------------------------------------*/
static BOOL bidi_is_isolate(UINT8 class)
{
return class == LRI || class == RLI || class == FSI || class == PDI;
}
static void bidi_resolve_weak(IsolatedRun *iso_run)
{
int i;
/* W1 */
for (i=0; i < iso_run->length; i++) {
if (*iso_run->item[i].class == NSM) {
int j = get_prev_valid_char_from_run(iso_run, i);
if (j == -1)
*iso_run->item[i].class = iso_run->sos;
else if (bidi_is_isolate(*iso_run->item[j].class))
*iso_run->item[i].class = ON;
else
*iso_run->item[i].class = *iso_run->item[j].class;
}
}
/* W2 */
for (i = 0; i < iso_run->length; i++) {
if (*iso_run->item[i].class == EN) {
int j = get_prev_valid_char_from_run(iso_run, i);
while (j > -1) {
if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
if (*iso_run->item[j].class == AL)
*iso_run->item[i].class = AN;
break;
}
j = get_prev_valid_char_from_run(iso_run, j);
}
}
}
/* W3 */
for (i = 0; i < iso_run->length; i++) {
if (*iso_run->item[i].class == AL)
*iso_run->item[i].class = R;
}
/* W4 */
for (i = 0; i < iso_run->length; i++) {
if (*iso_run->item[i].class == ES) {
int b = get_prev_valid_char_from_run(iso_run, i);
int f = get_next_valid_char_from_run(iso_run, i);
if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
*iso_run->item[i].class = EN;
}
else if (*iso_run->item[i].class == CS) {
int b = get_prev_valid_char_from_run(iso_run, i);
int f = get_next_valid_char_from_run(iso_run, i);
if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
*iso_run->item[i].class = EN;
else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
*iso_run->item[i].class = AN;
}
}
/* W5 */
for (i = 0; i < iso_run->length; i++) {
if (*iso_run->item[i].class == ET) {
int j;
for (j = i-1 ; j > -1; j--) {
if (*iso_run->item[j].class == BN) continue;
if (*iso_run->item[j].class == ET) continue;
else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
else break;
}
if (*iso_run->item[i].class == ET) {
for (j = i+1; j < iso_run->length; j++) {
if (*iso_run->item[j].class == BN) continue;
if (*iso_run->item[j].class == ET) continue;
else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
else break;
}
}
}
}
/* W6 */
for (i = 0; i < iso_run->length; i++) {
if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
{
int b = i-1;
int f = i+1;
if (b > -1 && *iso_run->item[b].class == BN)
*iso_run->item[b].class = ON;
if (f < iso_run->length && *iso_run->item[f].class == BN)
*iso_run->item[f].class = ON;
*iso_run->item[i].class = ON;
}
}
/* W7 */
for (i = 0; i < iso_run->length; i++) {
if (*iso_run->item[i].class == EN) {
int j;
for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
if (*iso_run->item[j].class == L)
*iso_run->item[i].class = L;
break;
}
if (iso_run->sos == L && j == -1)
*iso_run->item[i].class = L;
}
}
}
typedef struct tagBracketPair
{
int start;
int end;
} BracketPair;
static int bracketpair_compr(const void *a, const void* b)
{
return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
}
static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
{
WCHAR *open_stack;
int *stack_index;
int stack_top = iso_run->length;
BracketPair *out = NULL;
int pair_count = 0;
int i;
open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length);
stack_index = heap_alloc(sizeof(int) * iso_run->length);
for (i = 0; i < iso_run->length; i++) {
unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
if (ubv) {
if (!out) {
out = heap_alloc(sizeof(BracketPair));
out[0].start = -1;
}
if ((ubv >> 8) == 0) {
stack_top--;
open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
/* deal with canonical equivalent U+2329/232A and U+3008/3009 */
if (open_stack[stack_top] == 0x232A)
open_stack[stack_top] = 0x3009;
stack_index[stack_top] = i;
}
else if ((ubv >> 8) == 1) {
int j;
if (stack_top == iso_run->length) continue;
for (j = stack_top; j < iso_run->length; j++) {
WCHAR c = iso_run->item[i].ch;
if (c == 0x232A) c = 0x3009;
if (c == open_stack[j]) {
out[pair_count].start = stack_index[j];
out[pair_count].end = i;
pair_count++;
out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1));
out[pair_count].start = -1;
stack_top = j+1;
break;
}
}
}
}
}
if (pair_count == 0) {
heap_free(out);
out = NULL;
}
else if (pair_count > 1)
qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
heap_free(open_stack);
heap_free(stack_index);
return out;
}
static inline UINT8 get_rule_N0_class(UINT8 class)
{
return (class == AN || class == EN) ? R : class;
}
/*------------------------------------------------------------------------
Function: bidi_resolve_neutrals
Resolves the directionality of neutral character types.
Implements rules N1 and N2 of the Unicode Bidi Algorithm.
Input: Array of embedding levels
Character count
Baselevel
In/Out: Array of directional classes
Note: On input only these directional classes are expected
R, L, NI, AN, EN and BN
W8 resolves a number of ENs to L
------------------------------------------------------------------------*/
static void bidi_resolve_neutrals(IsolatedRun *run)
{
BracketPair *pairs;
int i;
/* Translate isolates into NI */
for (i = 0; i < run->length; i++) {
switch (*run->item[i].class) {
case B:
case S:
case WS:
case FSI:
case LRI:
case RLI:
case PDI: *run->item[i].class = NI;
}
/* "Only NI, L, R, AN, EN and BN are allowed" */
ASSERT(*run->item[i].class <= EN || *run->item[i].class == BN);
}
/* N0: Skipping bracketed pairs for now */
pairs = bidi_compute_bracket_pairs(run);
if (pairs) {
BracketPair *p = pairs;
int i = 0;
while (p->start >= 0) {
UINT8 e = get_embedding_direction(run->e);
UINT8 o = get_embedding_direction(run->e + 1);
BOOL flag_o = FALSE;
int j;
TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
/* N0.b */
for (j = p->start+1; j < p->end; j++) {
if (get_rule_N0_class(*run->item[j].class) == e) {
*run->item[p->start].class = e;
*run->item[p->end].class = e;
break;
}
else if (get_rule_N0_class(*run->item[j].class) == o)
flag_o = TRUE;
}
/* N0.c */
if (j == p->end && flag_o) {
for (j = p->start; j >= 0; j--) {
if (get_rule_N0_class(*run->item[j].class) == o) {
*run->item[p->start].class = o;
*run->item[p->end].class = o;
break;
}
else if (get_rule_N0_class(*run->item[j].class) == e) {
*run->item[p->start].class = e;
*run->item[p->end].class = e;
break;
}
}
if (j < 0) {
*run->item[p->start].class = run->sos;
*run->item[p->end].class = run->sos;
}
}
i++;
p = &pairs[i];
}
heap_free(pairs);
}
/* N1 */
for (i = 0; i < run->length; i++) {
UINT8 l, r;
if (*run->item[i].class == NI) {
int b = get_prev_valid_char_from_run(run, i);
int j;
if (b == -1) {
l = run->sos;
b = 0;
}
else {
if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
l = R;
else if (*run->item[b].class == L)
l = L;
else /* No string type */
continue;
}
j = get_next_valid_char_from_run(run, i);
while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
if (j == -1) {
r = run->eos;
j = run->length;
}
else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
r = R;
else if (*run->item[j].class == L)
r = L;
else /* No string type */
continue;
if (r == l) {
for (b = i; b < j && b < run->length; b++)
*run->item[b].class = r;
}
}
}
/* N2 */
for (i = 0; i < run->length; i++) {
if (*run->item[i].class == NI) {
int b = i-1;
int f = i+1;
*run->item[i].class = get_embedding_direction(run->e);
if (b > -1 && *run->item[b].class == BN)
*run->item[b].class = get_embedding_direction(run->e);
if (f < run->length && *run->item[f].class == BN)
*run->item[f].class = get_embedding_direction(run->e);
}
}
}
/*------------------------------------------------------------------------
Function: bidi_resolve_implicit
Recursively resolves implicit embedding levels.
Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
Input: Array of direction classes
Character count
Base level
In/Out: Array of embedding levels
Note: levels may exceed 15 on output.
Accepted subset of direction classes
R, L, AN, EN
------------------------------------------------------------------------*/
static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
{
int i;
/* I1/2 */
for (i = sos; i <= eos; i++) {
if (classes[i] == BN)
continue;
ASSERT(classes[i] != ON); /* "No Neutrals allowed to survive here." */
ASSERT(classes[i] <= EN); /* "Out of range." */
if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
levels[i]++;
else if (!odd(levels[i]) && classes[i] == R)
levels[i]++;
else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
levels[i] += 2;
}
}
static inline BOOL is_rule_L1_reset_class(UINT8 class)
{
switch (class) {
case WS:
case FSI:
case LRI:
case RLI:
case PDI:
case LRE:
case RLE:
case LRO:
case RLO:
case PDF:
case BN:
return TRUE;
default:
return FALSE;
}
}
static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
{
int i;
/* L1 */
for (i = sos; i <= eos; i++) {
if (classes[i] == B || classes[i] == S) {
int j = i - 1;
while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
levels[j--] = baselevel;
levels[i] = baselevel;
}
else if (classes[i] == LRE || classes[i] == RLE || classes[i] == LRO || classes[i] == RLO ||
classes[i] == PDF || classes[i] == BN) {
levels[i] = i ? levels[i - 1] : baselevel;
}
if (i == eos && is_rule_L1_reset_class(classes[i])) {
int j = i;
while (j >= sos && is_rule_L1_reset_class(classes[j]))
levels[j--] = baselevel;
}
}
}
static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
{
int run_start, run_end, i;
int run_count = 0;
HRESULT hr = S_OK;
Run *runs;
runs = heap_alloc(count * sizeof(Run));
if (!runs)
return E_OUTOFMEMORY;
list_init(set);
/* Build Runs */
run_start = 0;
while (run_start < count) {
run_end = get_next_valid_char_index(classes, run_start, count);
while (run_end < count && levels[run_end] == levels[run_start])
run_end = get_next_valid_char_index(classes, run_end, count);
run_end--;
runs[run_count].start = run_start;
runs[run_count].end = run_end;
runs[run_count].e = levels[run_start];
run_start = get_next_valid_char_index(classes, run_end, count);
run_count++;
}
/* Build Isolating Runs */
i = 0;
while (i < run_count) {
int k = i;
if (runs[k].start >= 0) {
IsolatedRun *current_isolated;
int type_fence, real_end;
int j;
current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count);
if (!current_isolated) {
hr = E_OUTOFMEMORY;
break;
}
run_start = runs[k].start;
current_isolated->e = runs[k].e;
current_isolated->length = (runs[k].end - runs[k].start)+1;
for (j = 0; j < current_isolated->length; j++) {
current_isolated->item[j].class = &classes[runs[k].start+j];
current_isolated->item[j].ch = string[runs[k].start+j];
}
run_end = runs[k].end;
TRACE("{ [%i -- %i]",run_start, run_end);
if (classes[run_end] == BN)
run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
j = k+1;
search:
while (j < run_count && classes[runs[j].start] != PDI) j++;
if (j < run_count && runs[i].e != runs[j].e) {
j++;
goto search;
}
if (j != run_count) {
int l = current_isolated->length;
int m;
current_isolated->length += (runs[j].end - runs[j].start)+1;
for (m = 0; l < current_isolated->length; l++, m++) {
current_isolated->item[l].class = &classes[runs[j].start+m];
current_isolated->item[l].ch = string[runs[j].start+m];
}
TRACE("[%i -- %i]", runs[j].start, runs[j].end);
run_end = runs[j].end;
if (classes[run_end] == BN)
run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
runs[j].start = -1;
k = j;
}
else {
run_end = count;
break;
}
}
type_fence = get_prev_valid_char_index(classes, run_start, -1);
if (type_fence == -1)
current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
else
current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
current_isolated->sos = get_embedding_direction(current_isolated->sos);
if (run_end == count)
current_isolated->eos = current_isolated->sos;
else {
/* eos could be an BN */
if (classes[run_end] == BN) {
real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
if (real_end < run_start)
real_end = run_start;
}
else
real_end = run_end;
type_fence = get_next_valid_char_index(classes, run_end, count);
if (type_fence == count)
current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
else
current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
current_isolated->eos = get_embedding_direction(current_isolated->eos);
}
list_add_tail(set, &current_isolated->entry);
TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
}
i++;
}
heap_free(runs);
return hr;
}
HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
{
IsolatedRun *iso_run, *next;
struct list IsolatingRuns;
UINT8 *chartype;
HRESULT hr;
TRACE("%s, %u\n", debugstr_wn(string, count), count);
chartype = heap_alloc(count*sizeof(*chartype));
if (!chartype)
return E_OUTOFMEMORY;
bidi_classify(string, chartype, count);
if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
bidi_resolve_explicit(baselevel, chartype, levels, count);
memcpy(explicit, levels, count*sizeof(*explicit));
if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
/* X10/BD13: Compute Isolating runs */
hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
if (FAILED(hr))
goto done;
LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
{
if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
bidi_resolve_weak(iso_run);
if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
bidi_resolve_neutrals(iso_run);
if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
list_remove(&iso_run->entry);
heap_free(iso_run);
}
if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
bidi_resolve_implicit(chartype, levels, 0, count-1);
bidi_classify(string, chartype, count);
bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
done:
heap_free(chartype);
return hr;
}