msxml3: Hint parser to use UTF-8 if it's specfied as BOM.
diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c
index a1df5c7..7d8caf4 100644
--- a/dlls/msxml3/saxreader.c
+++ b/dlls/msxml3/saxreader.c
@@ -1770,6 +1770,8 @@
/*** SAXXMLReader internal functions ***/
static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int size, BOOL vbInterface)
{
+ xmlCharEncoding encoding = XML_CHAR_ENCODING_NONE;
+ xmlChar *enc_name = NULL;
saxlocator *locator;
HRESULT hr;
@@ -1777,6 +1779,22 @@
if(FAILED(hr))
return hr;
+ if (size >= 4)
+ {
+ const unsigned char *buff = (unsigned char*)buffer;
+
+ encoding = xmlDetectCharEncoding((xmlChar*)buffer, 4);
+ enc_name = (xmlChar*)xmlGetCharEncodingName(encoding);
+ TRACE("detected encoding: %s\n", enc_name);
+ /* skip BOM, parser won't switch encodings and so won't skip it on its own */
+ if ((encoding == XML_CHAR_ENCODING_UTF8) &&
+ buff[0] == 0xEF && buff[1] == 0xBB && buff[2] == 0xBF)
+ {
+ buffer += 3;
+ size -= 3;
+ }
+ }
+
locator->pParserCtxt = xmlCreateMemoryParserCtxt(buffer, size);
if(!locator->pParserCtxt)
{
@@ -1784,6 +1802,9 @@
return E_FAIL;
}
+ if (encoding == XML_CHAR_ENCODING_UTF8)
+ locator->pParserCtxt->encoding = xmlStrdup(enc_name);
+
xmlFree(locator->pParserCtxt->sax);
locator->pParserCtxt->sax = &locator->saxreader->sax;
locator->pParserCtxt->userData = locator;