msxml3: Switch parser encoding manually when it won't be able to detect it (UTF-16 case).
diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c
index a5b7a32..913eeaf 100644
--- a/dlls/msxml3/saxreader.c
+++ b/dlls/msxml3/saxreader.c
@@ -2144,7 +2144,7 @@
HRESULT hr;
hr = SAXLocator_create(This, &locator, vbInterface);
- if(FAILED(hr))
+ if (FAILED(hr))
return hr;
if (size >= 4)
@@ -2163,22 +2163,44 @@
}
}
+ /* if libxml2 detection failed try to guess */
+ if (encoding == XML_CHAR_ENCODING_NONE)
+ {
+ const WCHAR *ptr = (WCHAR*)buffer;
+ /* xml declaration with possibly specfied encoding will be still handled by parser */
+ if ((size >= 2) && *ptr == '<' && ptr[1] != '?')
+ {
+ enc_name = (xmlChar*)xmlGetCharEncodingName(XML_CHAR_ENCODING_UTF16LE);
+ encoding = XML_CHAR_ENCODING_UTF16LE;
+ }
+ }
+ else if (encoding == XML_CHAR_ENCODING_UTF8)
+ enc_name = (xmlChar*)xmlGetCharEncodingName(encoding);
+ else
+ enc_name = NULL;
+
locator->pParserCtxt = xmlCreateMemoryParserCtxt(buffer, size);
- if(!locator->pParserCtxt)
+ if (!locator->pParserCtxt)
{
ISAXLocator_Release(&locator->ISAXLocator_iface);
return E_FAIL;
}
- if (encoding == XML_CHAR_ENCODING_UTF8)
+ if (enc_name)
+ {
locator->pParserCtxt->encoding = xmlStrdup(enc_name);
+ if (encoding == XML_CHAR_ENCODING_UTF16LE) {
+ TRACE("switching to %s\n", enc_name);
+ xmlSwitchEncoding(locator->pParserCtxt, encoding);
+ }
+ }
xmlFree(locator->pParserCtxt->sax);
locator->pParserCtxt->sax = &locator->saxreader->sax;
locator->pParserCtxt->userData = locator;
This->isParsing = TRUE;
- if(xmlParseDocument(locator->pParserCtxt)==-1 && locator->ret==S_OK)
+ if(xmlParseDocument(locator->pParserCtxt) == -1 && locator->ret == S_OK)
hr = E_FAIL;
else
hr = locator->ret;
diff --git a/dlls/msxml3/tests/saxreader.c b/dlls/msxml3/tests/saxreader.c
index 00a3553..5db2d11 100644
--- a/dlls/msxml3/tests/saxreader.c
+++ b/dlls/msxml3/tests/saxreader.c
@@ -2317,18 +2317,20 @@
{ 0 }
};
-static void test_encoding(void)
+static void test_saxreader_encoding(void)
{
const struct enc_test_entry_t *entry = encoding_test_data;
static const WCHAR testXmlW[] = {'t','e','s','t','.','x','m','l',0};
static const CHAR testXmlA[] = "test.xml";
- ISAXXMLReader *reader;
- DWORD written;
- HANDLE file;
- HRESULT hr;
while (entry->guid)
{
+ ISAXXMLReader *reader;
+ VARIANT input;
+ DWORD written;
+ HANDLE file;
+ HRESULT hr;
+
hr = CoCreateInstance(entry->guid, NULL, CLSCTX_INPROC_SERVER, &IID_ISAXXMLReader, (void**)&reader);
if (hr != S_OK)
{
@@ -2349,8 +2351,16 @@
ok(hr == entry->hr, "Expected 0x%08x, got 0x%08x. CLSID %s\n", entry->hr, hr, entry->clsid);
DeleteFileA(testXmlA);
+
+ /* try BSTR input with no BOM or '<?xml' instruction */
+ V_VT(&input) = VT_BSTR;
+ V_BSTR(&input) = _bstr_("<element></element>");
+ hr = ISAXXMLReader_parse(reader, input);
+ EXPECT_HR(hr, S_OK);
+
ISAXXMLReader_Release(reader);
+ free_bstrs();
entry++;
}
}
@@ -4474,7 +4484,7 @@
test_saxreader();
test_saxreader_properties();
test_saxreader_features();
- test_encoding();
+ test_saxreader_encoding();
test_dispex();
/* MXXMLWriter tests */