diff --git a/.gitignore b/.gitignore
index 16a436b..a007dcc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,9 +44,9 @@
 dlls/advapi32/svcctl.h
 dlls/advapi32/svcctl_c.c
 dlls/atl/atliface.h
-dlls/d3dx9_36/asmshader.tab.c
-dlls/d3dx9_36/asmshader.tab.h
-dlls/d3dx9_36/asmshader.yy.c
+dlls/d3dcompiler_43/asmshader.tab.c
+dlls/d3dcompiler_43/asmshader.tab.h
+dlls/d3dcompiler_43/asmshader.yy.c
 dlls/dispex/disp_ex.h
 dlls/dispex/disp_ex_p.c
 dlls/dxdiagn/fil_data.h
diff --git a/dlls/d3dcompiler_43/Makefile.in b/dlls/d3dcompiler_43/Makefile.in
index 3e6a7d6..c48c407 100644
--- a/dlls/d3dcompiler_43/Makefile.in
+++ b/dlls/d3dcompiler_43/Makefile.in
@@ -5,12 +5,19 @@
 MODULE    = d3dcompiler_43.dll
 IMPORTLIB = d3dcompiler
 IMPORTS   = dxguid uuid
+EXTRALIBS = $(LIBWPP)
 
 C_SRCS = \
+	asmparser.c \
+	asmutils.c \
 	blob.c \
+	bytecodewriter.c \
 	compiler.c \
 	d3dcompiler_43_main.c
 
+LEX_SRCS = asmshader.l
+BISON_SRCS = asmshader.y
+
 RC_SRCS = version.rc
 
 @MAKE_DLL_RULES@
diff --git a/dlls/d3dx9_36/asmparser.c b/dlls/d3dcompiler_43/asmparser.c
similarity index 99%
rename from dlls/d3dx9_36/asmparser.c
rename to dlls/d3dcompiler_43/asmparser.c
index b6f4fde..bcef8d4 100644
--- a/dlls/d3dx9_36/asmparser.c
+++ b/dlls/d3dcompiler_43/asmparser.c
@@ -24,7 +24,7 @@
 #include "wine/port.h"
 #include "wine/debug.h"
 
-#include "d3dx9_36_private.h"
+#include "d3dcompiler_private.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
 WINE_DECLARE_DEBUG_CHANNEL(parsed_shader);
diff --git a/dlls/d3dx9_36/asmshader.l b/dlls/d3dcompiler_43/asmshader.l
similarity index 99%
rename from dlls/d3dx9_36/asmshader.l
rename to dlls/d3dcompiler_43/asmshader.l
index 385449e..0ba7e7a 100644
--- a/dlls/d3dx9_36/asmshader.l
+++ b/dlls/d3dcompiler_43/asmshader.l
@@ -24,7 +24,7 @@
 #include "wine/port.h"
 #include "wine/debug.h"
 
-#include "d3dx9_36_private.h"
+#include "d3dcompiler_private.h"
 #include "asmshader.tab.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
diff --git a/dlls/d3dx9_36/asmshader.y b/dlls/d3dcompiler_43/asmshader.y
similarity index 99%
rename from dlls/d3dx9_36/asmshader.y
rename to dlls/d3dcompiler_43/asmshader.y
index d1a05db..764010e 100644
--- a/dlls/d3dx9_36/asmshader.y
+++ b/dlls/d3dcompiler_43/asmshader.y
@@ -24,7 +24,7 @@
 #include "wine/port.h"
 #include "wine/debug.h"
 
-#include "d3dx9_36_private.h"
+#include "d3dcompiler_private.h"
 
 #include <stdio.h>
 
diff --git a/dlls/d3dx9_36/asmutils.c b/dlls/d3dcompiler_43/asmutils.c
similarity index 99%
rename from dlls/d3dx9_36/asmutils.c
rename to dlls/d3dcompiler_43/asmutils.c
index 968cb84..68b25cf 100644
--- a/dlls/d3dx9_36/asmutils.c
+++ b/dlls/d3dcompiler_43/asmutils.c
@@ -23,7 +23,8 @@
 #include "config.h"
 #include "wine/debug.h"
 
-#include "d3dx9_36_private.h"
+#include "d3d9types.h"
+#include "d3dcompiler_private.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
 
diff --git a/dlls/d3dx9_36/bytecodewriter.c b/dlls/d3dcompiler_43/bytecodewriter.c
similarity index 99%
rename from dlls/d3dx9_36/bytecodewriter.c
rename to dlls/d3dcompiler_43/bytecodewriter.c
index e7bd8a5..01038fb 100644
--- a/dlls/d3dx9_36/bytecodewriter.c
+++ b/dlls/d3dcompiler_43/bytecodewriter.c
@@ -24,7 +24,8 @@
 #include "wine/port.h"
 #include "wine/debug.h"
 
-#include "d3dx9_36_private.h"
+#include "d3d9types.h"
+#include "d3dcompiler_private.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
 
diff --git a/dlls/d3dcompiler_43/compiler.c b/dlls/d3dcompiler_43/compiler.c
index edb2118..1c5c32b 100644
--- a/dlls/d3dcompiler_43/compiler.c
+++ b/dlls/d3dcompiler_43/compiler.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright 2009 Matteo Bruni
  * Copyright 2010 Matteo Bruni for CodeWeavers
  *
  * This library is free software; you can redistribute it and/or
@@ -16,19 +17,477 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  */
 
+#define COBJMACROS
 #include "config.h"
 #include "wine/port.h"
 #include "wine/debug.h"
 #include "wine/unicode.h"
 
 #include "d3dcompiler_private.h"
+#include "wine/wpp.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(d3dcompiler);
 
+#define D3DXERR_INVALIDDATA                      0x88760b59
+
+#define BUFFER_INITIAL_CAPACITY 256
+
+struct mem_file_desc
+{
+    const char *buffer;
+    unsigned int size;
+    unsigned int pos;
+};
+
+struct mem_file_desc current_shader;
+LPD3DINCLUDE current_include;
+
+#define INCLUDES_INITIAL_CAPACITY 4
+
+struct loaded_include
+{
+    const char *name;
+    const char *data;
+};
+
+struct loaded_include *includes;
+int includes_capacity, includes_size;
+const char *parent_include;
+
+char *wpp_output;
+int wpp_output_capacity, wpp_output_size;
+
+char *wpp_messages;
+int wpp_messages_capacity, wpp_messages_size;
+
+/* Mutex used to guarantee a single invocation
+   of the D3DXAssembleShader function (or its variants) at a time.
+   This is needed as wpp isn't thread-safe */
+static CRITICAL_SECTION wpp_mutex;
+static CRITICAL_SECTION_DEBUG wpp_mutex_debug =
+{
+    0, 0, &wpp_mutex,
+    { &wpp_mutex_debug.ProcessLocksList,
+      &wpp_mutex_debug.ProcessLocksList },
+      0, 0, { (DWORD_PTR)(__FILE__ ": wpp_mutex") }
+};
+static CRITICAL_SECTION wpp_mutex = { &wpp_mutex_debug, -1, 0, 0, 0, 0 };
+
+/* Preprocessor error reporting functions */
+static void wpp_write_message(const char *fmt, va_list args)
+{
+    char* newbuffer;
+    int rc, newsize;
+
+    if(wpp_messages_capacity == 0)
+    {
+        wpp_messages = HeapAlloc(GetProcessHeap(), 0, MESSAGEBUFFER_INITIAL_SIZE);
+        if(wpp_messages == NULL)
+        {
+            ERR("Error allocating memory for parser messages\n");
+            return;
+        }
+        wpp_messages_capacity = MESSAGEBUFFER_INITIAL_SIZE;
+    }
+
+    while(1)
+    {
+        rc = vsnprintf(wpp_messages + wpp_messages_size,
+                       wpp_messages_capacity - wpp_messages_size, fmt, args);
+
+        if (rc < 0 ||                                           /* C89 */
+            rc >= wpp_messages_capacity - wpp_messages_size) {  /* C99 */
+            /* Resize the buffer */
+            newsize = wpp_messages_capacity * 2;
+            newbuffer = HeapReAlloc(GetProcessHeap(), 0, wpp_messages, newsize);
+            if(newbuffer == NULL)
+            {
+                ERR("Error reallocating memory for parser messages\n");
+                return;
+            }
+            wpp_messages = newbuffer;
+            wpp_messages_capacity = newsize;
+        }
+        else
+        {
+            wpp_messages_size += rc;
+            return;
+        }
+    }
+}
+
+static void PRINTF_ATTR(1,2) wpp_write_message_var(const char *fmt, ...)
+{
+    va_list args;
+
+    va_start(args, fmt);
+    wpp_write_message(fmt, args);
+    va_end(args);
+}
+
+static void wpp_error(const char *file, int line, int col, const char *near,
+                      const char *msg, va_list ap)
+{
+    wpp_write_message_var("%s:%d:%d: %s: ", file ? file : "'main file'",
+                          line, col, "Error");
+    wpp_write_message(msg, ap);
+    wpp_write_message_var("\n");
+}
+
+static void wpp_warning(const char *file, int line, int col, const char *near,
+                        const char *msg, va_list ap)
+{
+    wpp_write_message_var("%s:%d:%d: %s: ", file ? file : "'main file'",
+                          line, col, "Warning");
+    wpp_write_message(msg, ap);
+    wpp_write_message_var("\n");
+}
+
+static char *wpp_lookup_mem(const char *filename, const char *parent_name,
+                            char **include_path, int include_path_count)
+{
+    /* Here we return always ok. We will maybe fail on the next wpp_open_mem */
+    char *path;
+    int i;
+
+    parent_include = NULL;
+    if(parent_name[0] != '\0')
+    {
+        for(i = 0; i < includes_size; i++)
+        {
+            if(!strcmp(parent_name, includes[i].name))
+            {
+                parent_include = includes[i].data;
+                break;
+            }
+        }
+        if(parent_include == NULL)
+        {
+            ERR("Parent include file missing\n");
+            return NULL;
+        }
+    }
+
+    path = malloc(strlen(filename) + 1);
+    if(path)
+        memcpy(path, filename, strlen(filename) + 1);
+    return path;
+}
+
+static void *wpp_open_mem(const char *filename, int type)
+{
+    struct mem_file_desc *desc;
+    HRESULT hr;
+
+    if(filename[0] == '\0') /* "" means to load the initial shader */
+    {
+        current_shader.pos = 0;
+        return &current_shader;
+    }
+
+    if(current_include == NULL) return NULL;
+    desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*desc));
+    if(!desc)
+    {
+        ERR("Error allocating memory\n");
+        return NULL;
+    }
+    hr = ID3DInclude_Open(current_include,
+                          type ? D3D_INCLUDE_SYSTEM : D3D_INCLUDE_LOCAL,
+                          filename, parent_include, (LPCVOID *)&desc->buffer,
+                          &desc->size);
+    if(FAILED(hr))
+    {
+        HeapFree(GetProcessHeap(), 0, desc);
+        return NULL;
+    }
+
+    if(includes_capacity == includes_size)
+    {
+        if(includes_capacity == 0)
+        {
+            includes = HeapAlloc(GetProcessHeap(), 0, INCLUDES_INITIAL_CAPACITY);
+            if(includes == NULL)
+            {
+                ERR("Error allocating memory for the loaded includes structure\n");
+                goto error;
+            }
+            includes_capacity = INCLUDES_INITIAL_CAPACITY;
+        }
+        else
+        {
+            int newcapacity = includes_capacity * 2;
+            struct loaded_include *newincludes =
+                HeapReAlloc(GetProcessHeap(), 0, includes, newcapacity);
+            if(newincludes == NULL)
+            {
+                ERR("Error reallocating memory for the loaded includes structure\n");
+                goto error;
+            }
+            includes = newincludes;
+            includes_capacity = newcapacity;
+        }
+    }
+    includes[includes_size].name = filename;
+    includes[includes_size++].data = desc->buffer;
+
+    desc->pos = 0;
+    return desc;
+
+error:
+    ID3DInclude_Close(current_include, desc->buffer);
+    HeapFree(GetProcessHeap(), 0, desc);
+    return NULL;
+}
+
+static void wpp_close_mem(void *file)
+{
+    struct mem_file_desc *desc = file;
+
+    if(desc != &current_shader)
+    {
+        if(current_include)
+            ID3DInclude_Close(current_include, desc->buffer);
+        else
+            ERR("current_include == NULL, desc == %p, buffer = %s\n",
+                desc, desc->buffer);
+
+        HeapFree(GetProcessHeap(), 0, desc);
+    }
+}
+
+static int wpp_read_mem(void *file, char *buffer, unsigned int len)
+{
+    struct mem_file_desc *desc = file;
+
+    len = min(len, desc->size - desc->pos);
+    memcpy(buffer, desc->buffer + desc->pos, len);
+    desc->pos += len;
+    return len;
+}
+
+static void wpp_write_mem(const char *buffer, unsigned int len)
+{
+    char *new_wpp_output;
+
+    if(wpp_output_capacity == 0)
+    {
+        wpp_output = HeapAlloc(GetProcessHeap(), 0, BUFFER_INITIAL_CAPACITY);
+        if(!wpp_output)
+        {
+            ERR("Error allocating memory\n");
+            return;
+        }
+        wpp_output_capacity = BUFFER_INITIAL_CAPACITY;
+    }
+    if(len > wpp_output_capacity - wpp_output_size)
+    {
+        while(len > wpp_output_capacity - wpp_output_size)
+        {
+            wpp_output_capacity *= 2;
+        }
+        new_wpp_output = HeapReAlloc(GetProcessHeap(), 0, wpp_output,
+                                     wpp_output_capacity);
+        if(!new_wpp_output)
+        {
+            ERR("Error allocating memory\n");
+            return;
+        }
+        wpp_output = new_wpp_output;
+    }
+    memcpy(wpp_output + wpp_output_size, buffer, len);
+    wpp_output_size += len;
+}
+
+static int wpp_close_output(void)
+{
+    char *new_wpp_output = HeapReAlloc(GetProcessHeap(), 0, wpp_output,
+                                       wpp_output_size + 1);
+    if(!new_wpp_output) return 0;
+    wpp_output = new_wpp_output;
+    wpp_output[wpp_output_size]='\0';
+    return 1;
+}
+
+static HRESULT assemble_shader(const char *preprocShader, const char *preprocMessages,
+                               LPD3DBLOB* ppShader, LPD3DBLOB* ppErrorMsgs)
+{
+    struct bwriter_shader *shader;
+    char *messages = NULL;
+    HRESULT hr;
+    DWORD *res;
+    LPD3DBLOB buffer;
+    int size;
+    char *pos;
+
+    shader = SlAssembleShader(preprocShader, &messages);
+
+    if(messages || preprocMessages)
+    {
+        if(preprocMessages)
+        {
+            TRACE("Preprocessor messages:\n");
+            TRACE("%s", preprocMessages);
+        }
+        if(messages)
+        {
+            TRACE("Assembler messages:\n");
+            TRACE("%s", messages);
+        }
+
+        TRACE("Shader source:\n");
+        TRACE("%s\n", debugstr_a(preprocShader));
+
+        if(ppErrorMsgs)
+        {
+            size = (messages ? strlen(messages) : 0) +
+                (preprocMessages ? strlen(preprocMessages) : 0) + 1;
+            hr = D3DCreateBlob(size, &buffer);
+            if(FAILED(hr))
+            {
+                HeapFree(GetProcessHeap(), 0, messages);
+                if(shader) SlDeleteShader(shader);
+                return hr;
+            }
+            pos = ID3D10Blob_GetBufferPointer(buffer);
+            if(preprocMessages)
+            {
+                CopyMemory(pos, preprocMessages, strlen(preprocMessages) + 1);
+                pos += strlen(preprocMessages);
+            }
+            if(messages)
+                CopyMemory(pos, messages, strlen(messages) + 1);
+
+            *ppErrorMsgs = buffer;
+        }
+
+        HeapFree(GetProcessHeap(), 0, messages);
+    }
+
+    if(shader == NULL)
+    {
+        ERR("Asm reading failed\n");
+        return D3DXERR_INVALIDDATA;
+    }
+
+    hr = SlWriteBytecode(shader, 9, &res);
+    SlDeleteShader(shader);
+    if(FAILED(hr))
+    {
+        ERR("SlWriteBytecode failed with 0x%08x\n", hr);
+        return D3DXERR_INVALIDDATA;
+    }
+
+    if(ppShader)
+    {
+        size = HeapSize(GetProcessHeap(), 0, res);
+        hr = D3DCreateBlob(size, &buffer);
+        if(FAILED(hr))
+        {
+            HeapFree(GetProcessHeap(), 0, res);
+            return hr;
+        }
+        CopyMemory(ID3D10Blob_GetBufferPointer(buffer), res, size);
+        *ppShader = buffer;
+    }
+
+    HeapFree(GetProcessHeap(), 0, res);
+
+    return S_OK;
+}
+
 HRESULT WINAPI D3DAssemble(LPCVOID data, SIZE_T datasize, LPCSTR filename,
                            const D3D_SHADER_MACRO *defines, ID3DInclude *include,
                            UINT flags,
                            ID3DBlob **shader, ID3DBlob **error_messages){
-    FIXME("stub\n");
-    return D3DERR_INVALIDCALL;
+    int ret;
+    HRESULT hr;
+    CONST D3D_SHADER_MACRO* def = defines;
+
+    static const struct wpp_callbacks wpp_callbacks = {
+        wpp_lookup_mem,
+        wpp_open_mem,
+        wpp_close_mem,
+        wpp_read_mem,
+        wpp_write_mem,
+        wpp_error,
+        wpp_warning,
+    };
+
+    EnterCriticalSection(&wpp_mutex);
+
+    /* TODO: flags */
+    if(flags) FIXME("flags: %x\n", flags);
+
+    if(def != NULL)
+    {
+        while(def->Name != NULL)
+        {
+            wpp_add_define(def->Name, def->Definition);
+            def++;
+        }
+    }
+    current_include = include;
+    includes_size = 0;
+
+    if(shader) *shader = NULL;
+    if(error_messages) *error_messages = NULL;
+    wpp_output_size = wpp_output_capacity = 0;
+    wpp_output = NULL;
+
+    /* Preprocess shader */
+    wpp_set_callbacks(&wpp_callbacks);
+    wpp_messages_size = wpp_messages_capacity = 0;
+    wpp_messages = NULL;
+    current_shader.buffer = data;
+    current_shader.size = datasize;
+
+    ret = wpp_parse("", NULL);
+    if(!wpp_close_output())
+        ret = 1;
+    if(ret)
+    {
+        TRACE("Error during shader preprocessing\n");
+        if(wpp_messages)
+        {
+            int size;
+            LPD3DBLOB buffer;
+
+            TRACE("Preprocessor messages:\n");
+            TRACE("%s", wpp_messages);
+
+            if(error_messages)
+            {
+                size = strlen(wpp_messages) + 1;
+                hr = D3DCreateBlob(size, &buffer);
+                if(FAILED(hr)) goto cleanup;
+                CopyMemory(ID3D10Blob_GetBufferPointer(buffer), wpp_messages, size);
+                *error_messages = buffer;
+            }
+        }
+        if(data)
+        {
+            TRACE("Shader source:\n");
+            TRACE("%s\n", debugstr_an(data, datasize));
+        }
+        hr = D3DXERR_INVALIDDATA;
+        goto cleanup;
+    }
+
+    hr = assemble_shader(wpp_output, wpp_messages, shader, error_messages);
+
+cleanup:
+    /* Remove the previously added defines */
+    if(defines != NULL)
+    {
+        while(defines->Name != NULL)
+        {
+            wpp_del_define(defines->Name);
+            defines++;
+        }
+    }
+    HeapFree(GetProcessHeap(), 0, wpp_messages);
+    HeapFree(GetProcessHeap(), 0, wpp_output);
+    LeaveCriticalSection(&wpp_mutex);
+    return hr;
 }
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h
index 0c7cba1..c085c90 100644
--- a/dlls/d3dcompiler_43/d3dcompiler_private.h
+++ b/dlls/d3dcompiler_43/d3dcompiler_private.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright 2008 Stefan Dösinger
+ * Copyright 2009 Matteo Bruni
  * Copyright 2010 Rico Schüller
  *
  * This library is free software; you can redistribute it and/or
@@ -47,4 +49,527 @@
 
 HRESULT d3dcompiler_blob_init(struct d3dcompiler_blob *blob, SIZE_T data_size) DECLSPEC_HIDDEN;
 
+/* Shader assembler definitions */
+typedef enum _shader_type {
+    ST_VERTEX,
+    ST_PIXEL,
+} shader_type;
+
+typedef enum BWRITER_COMPARISON_TYPE {
+    BWRITER_COMPARISON_NONE,
+    BWRITER_COMPARISON_GT,
+    BWRITER_COMPARISON_EQ,
+    BWRITER_COMPARISON_GE,
+    BWRITER_COMPARISON_LT,
+    BWRITER_COMPARISON_NE,
+    BWRITER_COMPARISON_LE
+} BWRITER_COMPARISON_TYPE;
+
+struct constant {
+    DWORD                   regnum;
+    union {
+        float               f;
+        INT                 i;
+        BOOL                b;
+        DWORD               d;
+    }                       value[4];
+};
+
+struct shader_reg {
+    DWORD                   type;
+    DWORD                   regnum;
+    struct shader_reg       *rel_reg;
+    DWORD                   srcmod;
+    union {
+        DWORD               swizzle;
+        DWORD               writemask;
+    };
+};
+
+struct instruction {
+    DWORD                   opcode;
+    DWORD                   dstmod;
+    DWORD                   shift;
+    BWRITER_COMPARISON_TYPE comptype;
+    BOOL                    has_dst;
+    struct shader_reg       dst;
+    struct shader_reg       *src;
+    unsigned int            num_srcs; /* For freeing the rel_regs */
+    BOOL                    has_predicate;
+    struct shader_reg       predicate;
+    BOOL                    coissue;
+};
+
+struct declaration {
+    DWORD                   usage, usage_idx;
+    DWORD                   regnum;
+    DWORD                   mod;
+    DWORD                   writemask;
+    BOOL                    builtin;
+};
+
+struct samplerdecl {
+    DWORD                   type;
+    DWORD                   regnum;
+    DWORD                   mod;
+};
+
+#define INSTRARRAY_INITIAL_SIZE 8
+struct bwriter_shader {
+    shader_type             type;
+
+    /* Shader version selected */
+    DWORD                   version;
+
+    /* Local constants. Every constant that is not defined below is loaded from
+     * the global constant set at shader runtime
+     */
+    struct constant         **constF;
+    struct constant         **constI;
+    struct constant         **constB;
+    unsigned int            num_cf, num_ci, num_cb;
+
+    /* Declared input and output varyings */
+    struct declaration      *inputs, *outputs;
+    unsigned int            num_inputs, num_outputs;
+    struct samplerdecl      *samplers;
+    unsigned int            num_samplers;
+
+    /* Are special pixel shader 3.0 registers declared? */
+    BOOL                    vPos, vFace;
+
+    /* Array of shader instructions - The shader code itself */
+    struct instruction      **instr;
+    unsigned int            num_instrs, instr_alloc_size;
+};
+
+static inline LPVOID asm_alloc(SIZE_T size) {
+    return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
+}
+
+static inline LPVOID asm_realloc(LPVOID ptr, SIZE_T size) {
+    return HeapReAlloc(GetProcessHeap(), 0, ptr, size);
+}
+
+static inline BOOL asm_free(LPVOID ptr) {
+    return HeapFree(GetProcessHeap(), 0, ptr);
+}
+
+struct asm_parser;
+
+/* This structure is only used in asmshader.y, but since the .l file accesses the semantic types
+ * too it has to know it as well
+ */
+struct rel_reg {
+    BOOL            has_rel_reg;
+    DWORD           type;
+    DWORD           additional_offset;
+    DWORD           rel_regnum;
+    DWORD           swizzle;
+};
+
+#define MAX_SRC_REGS 4
+
+struct src_regs {
+    struct shader_reg reg[MAX_SRC_REGS];
+    unsigned int      count;
+};
+
+struct asmparser_backend {
+    void (*constF)(struct asm_parser *This, DWORD reg, float x, float y, float z, float w);
+    void (*constI)(struct asm_parser *This, DWORD reg, INT x, INT y, INT z, INT w);
+    void (*constB)(struct asm_parser *This, DWORD reg, BOOL x);
+
+    void (*dstreg)(struct asm_parser *This, struct instruction *instr,
+                   const struct shader_reg *dst);
+    void (*srcreg)(struct asm_parser *This, struct instruction *instr, int num,
+                   const struct shader_reg *src);
+
+    void (*predicate)(struct asm_parser *This,
+                      const struct shader_reg *predicate);
+    void (*coissue)(struct asm_parser *This);
+
+    void (*dcl_output)(struct asm_parser *This, DWORD usage, DWORD num,
+                       const struct shader_reg *reg);
+    void (*dcl_input)(struct asm_parser *This, DWORD usage, DWORD num,
+                      DWORD mod, const struct shader_reg *reg);
+    void (*dcl_sampler)(struct asm_parser *This, DWORD samptype, DWORD mod,
+                        DWORD regnum, unsigned int line_no);
+
+    void (*end)(struct asm_parser *This);
+
+    void (*instr)(struct asm_parser *This, DWORD opcode, DWORD mod, DWORD shift,
+                  BWRITER_COMPARISON_TYPE comp, const struct shader_reg *dst,
+                  const struct src_regs *srcs, int expectednsrcs);
+};
+
+struct instruction *alloc_instr(unsigned int srcs);
+BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr);
+BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w);
+BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w);
+BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x);
+BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
+                        DWORD usage_idx, DWORD mod, BOOL output,
+                        DWORD regnum, DWORD writemask, BOOL builtin);
+BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype,
+                    DWORD mod, DWORD regnum);
+
+#define MESSAGEBUFFER_INITIAL_SIZE 256
+
+struct asm_parser {
+    /* The function table of the parser implementation */
+    const struct asmparser_backend *funcs;
+
+    /* Private data follows */
+    struct bwriter_shader *shader;
+    unsigned int m3x3pad_count;
+
+    enum parse_status {
+        PARSE_SUCCESS = 0,
+        PARSE_WARN = 1,
+        PARSE_ERR = 2
+    } status;
+    char *messages;
+    unsigned int messagesize;
+    unsigned int messagecapacity;
+    unsigned int line_no;
+};
+
+extern struct asm_parser asm_ctx;
+
+void create_vs10_parser(struct asm_parser *ret);
+void create_vs11_parser(struct asm_parser *ret);
+void create_vs20_parser(struct asm_parser *ret);
+void create_vs2x_parser(struct asm_parser *ret);
+void create_vs30_parser(struct asm_parser *ret);
+void create_ps10_parser(struct asm_parser *ret);
+void create_ps11_parser(struct asm_parser *ret);
+void create_ps12_parser(struct asm_parser *ret);
+void create_ps13_parser(struct asm_parser *ret);
+void create_ps14_parser(struct asm_parser *ret);
+void create_ps20_parser(struct asm_parser *ret);
+void create_ps2x_parser(struct asm_parser *ret);
+void create_ps30_parser(struct asm_parser *ret);
+
+struct bwriter_shader *parse_asm_shader(char **messages);
+
+#ifdef __GNUC__
+#define PRINTF_ATTR(fmt,args) __attribute__((format (printf,fmt,args)))
+#else
+#define PRINTF_ATTR(fmt,args)
+#endif
+
+void asmparser_message(struct asm_parser *ctx, const char *fmt, ...) PRINTF_ATTR(2,3);
+void set_parse_status(struct asm_parser *ctx, enum parse_status status);
+
+/* A reasonable value as initial size */
+#define BYTECODEBUFFER_INITIAL_SIZE 32
+struct bytecode_buffer {
+    DWORD *data;
+    DWORD size;
+    DWORD alloc_size;
+    /* For tracking rare out of memory situations without passing
+     * return values around everywhere
+     */
+    HRESULT state;
+};
+
+struct bc_writer; /* Predeclaration for use in vtable parameters */
+
+typedef void (*instr_writer)(struct bc_writer *This,
+                             const struct instruction *instr,
+                             struct bytecode_buffer *buffer);
+
+struct bytecode_backend {
+    void (*header)(struct bc_writer *This, const struct bwriter_shader *shader,
+		   struct bytecode_buffer *buffer);
+    void (*end)(struct bc_writer *This, const struct bwriter_shader *shader,
+                struct bytecode_buffer *buffer);
+    void (*srcreg)(struct bc_writer *This, const struct shader_reg *reg,
+                   struct bytecode_buffer *buffer);
+    void (*dstreg)(struct bc_writer *This, const struct shader_reg *reg,
+                   struct bytecode_buffer *buffer, DWORD shift, DWORD mod);
+    void (*opcode)(struct bc_writer *This, const struct instruction *instr,
+                   DWORD token, struct bytecode_buffer *buffer);
+
+    const struct instr_handler_table {
+        DWORD opcode;
+        instr_writer func;
+    } *instructions;
+};
+
+/* Bytecode writing stuff */
+struct bc_writer {
+    const struct bytecode_backend *funcs;
+
+    /* Avoid result checking */
+    HRESULT                       state;
+
+    DWORD                         version;
+
+    /* Vertex shader varying mapping */
+    DWORD                         oPos_regnum;
+    DWORD                         oD_regnum[2];
+    DWORD                         oT_regnum[8];
+    DWORD                         oFog_regnum;
+    DWORD                         oFog_mask;
+    DWORD                         oPts_regnum;
+    DWORD                         oPts_mask;
+
+    /* Pixel shader specific members */
+    DWORD                         t_regnum[8];
+    DWORD                         v_regnum[2];
+};
+
+/* Debug utility routines */
+const char *debug_print_srcmod(DWORD mod);
+const char *debug_print_dstmod(DWORD mod);
+const char *debug_print_shift(DWORD shift);
+const char *debug_print_dstreg(const struct shader_reg *reg);
+const char *debug_print_srcreg(const struct shader_reg *reg);
+const char *debug_print_swizzle(DWORD swizzle);
+const char *debug_print_writemask(DWORD mask);
+const char *debug_print_comp(DWORD comp);
+const char *debug_print_opcode(DWORD opcode);
+
+/* Utilities for internal->d3d constant mapping */
+DWORD d3d9_swizzle(DWORD bwriter_swizzle);
+DWORD d3d9_writemask(DWORD bwriter_writemask);
+DWORD d3d9_srcmod(DWORD bwriter_srcmod);
+DWORD d3d9_dstmod(DWORD bwriter_mod);
+DWORD d3d9_comparetype(DWORD bwriter_comparetype);
+DWORD d3d9_sampler(DWORD bwriter_sampler);
+DWORD d3d9_register(DWORD bwriter_register);
+DWORD d3d9_opcode(DWORD bwriter_opcode);
+
+/* Used to signal an incorrect swizzle/writemask */
+#define SWIZZLE_ERR ~0U
+
+/*
+  Enumerations and defines used in the bytecode writer
+  intermediate representation
+*/
+typedef enum _BWRITERSHADER_INSTRUCTION_OPCODE_TYPE {
+    BWRITERSIO_NOP,
+    BWRITERSIO_MOV,
+    BWRITERSIO_ADD,
+    BWRITERSIO_SUB,
+    BWRITERSIO_MAD,
+    BWRITERSIO_MUL,
+    BWRITERSIO_RCP,
+    BWRITERSIO_RSQ,
+    BWRITERSIO_DP3,
+    BWRITERSIO_DP4,
+    BWRITERSIO_MIN,
+    BWRITERSIO_MAX,
+    BWRITERSIO_SLT,
+    BWRITERSIO_SGE,
+    BWRITERSIO_EXP,
+    BWRITERSIO_LOG,
+    BWRITERSIO_LIT,
+    BWRITERSIO_DST,
+    BWRITERSIO_LRP,
+    BWRITERSIO_FRC,
+    BWRITERSIO_M4x4,
+    BWRITERSIO_M4x3,
+    BWRITERSIO_M3x4,
+    BWRITERSIO_M3x3,
+    BWRITERSIO_M3x2,
+    BWRITERSIO_CALL,
+    BWRITERSIO_CALLNZ,
+    BWRITERSIO_LOOP,
+    BWRITERSIO_RET,
+    BWRITERSIO_ENDLOOP,
+    BWRITERSIO_LABEL,
+    BWRITERSIO_DCL,
+    BWRITERSIO_POW,
+    BWRITERSIO_CRS,
+    BWRITERSIO_SGN,
+    BWRITERSIO_ABS,
+    BWRITERSIO_NRM,
+    BWRITERSIO_SINCOS,
+    BWRITERSIO_REP,
+    BWRITERSIO_ENDREP,
+    BWRITERSIO_IF,
+    BWRITERSIO_IFC,
+    BWRITERSIO_ELSE,
+    BWRITERSIO_ENDIF,
+    BWRITERSIO_BREAK,
+    BWRITERSIO_BREAKC,
+    BWRITERSIO_MOVA,
+    BWRITERSIO_DEFB,
+    BWRITERSIO_DEFI,
+
+    BWRITERSIO_TEXCOORD,
+    BWRITERSIO_TEXKILL,
+    BWRITERSIO_TEX,
+    BWRITERSIO_TEXBEM,
+    BWRITERSIO_TEXBEML,
+    BWRITERSIO_TEXREG2AR,
+    BWRITERSIO_TEXREG2GB,
+    BWRITERSIO_TEXM3x2PAD,
+    BWRITERSIO_TEXM3x2TEX,
+    BWRITERSIO_TEXM3x3PAD,
+    BWRITERSIO_TEXM3x3TEX,
+    BWRITERSIO_TEXM3x3SPEC,
+    BWRITERSIO_TEXM3x3VSPEC,
+    BWRITERSIO_EXPP,
+    BWRITERSIO_LOGP,
+    BWRITERSIO_CND,
+    BWRITERSIO_DEF,
+    BWRITERSIO_TEXREG2RGB,
+    BWRITERSIO_TEXDP3TEX,
+    BWRITERSIO_TEXM3x2DEPTH,
+    BWRITERSIO_TEXDP3,
+    BWRITERSIO_TEXM3x3,
+    BWRITERSIO_TEXDEPTH,
+    BWRITERSIO_CMP,
+    BWRITERSIO_BEM,
+    BWRITERSIO_DP2ADD,
+    BWRITERSIO_DSX,
+    BWRITERSIO_DSY,
+    BWRITERSIO_TEXLDD,
+    BWRITERSIO_SETP,
+    BWRITERSIO_TEXLDL,
+    BWRITERSIO_BREAKP,
+    BWRITERSIO_TEXLDP,
+    BWRITERSIO_TEXLDB,
+
+    BWRITERSIO_PHASE,
+    BWRITERSIO_COMMENT,
+    BWRITERSIO_END,
+} BWRITERSHADER_INSTRUCTION_OPCODE_TYPE;
+
+typedef enum _BWRITERSHADER_PARAM_REGISTER_TYPE {
+    BWRITERSPR_TEMP,
+    BWRITERSPR_INPUT,
+    BWRITERSPR_CONST,
+    BWRITERSPR_ADDR,
+    BWRITERSPR_TEXTURE,
+    BWRITERSPR_RASTOUT,
+    BWRITERSPR_ATTROUT,
+    BWRITERSPR_TEXCRDOUT,
+    BWRITERSPR_OUTPUT,
+    BWRITERSPR_CONSTINT,
+    BWRITERSPR_COLOROUT,
+    BWRITERSPR_DEPTHOUT,
+    BWRITERSPR_SAMPLER,
+    BWRITERSPR_CONSTBOOL,
+    BWRITERSPR_LOOP,
+    BWRITERSPR_MISCTYPE,
+    BWRITERSPR_LABEL,
+    BWRITERSPR_PREDICATE
+} BWRITERSHADER_PARAM_REGISTER_TYPE;
+
+typedef enum _BWRITERVS_RASTOUT_OFFSETS
+{
+    BWRITERSRO_POSITION,
+    BWRITERSRO_FOG,
+    BWRITERSRO_POINT_SIZE
+} BWRITERVS_RASTOUT_OFFSETS;
+
+#define BWRITERSP_WRITEMASK_0   0x1 /* .x r */
+#define BWRITERSP_WRITEMASK_1   0x2 /* .y g */
+#define BWRITERSP_WRITEMASK_2   0x4 /* .z b */
+#define BWRITERSP_WRITEMASK_3   0x8 /* .w a */
+#define BWRITERSP_WRITEMASK_ALL 0xf /* all */
+
+typedef enum _BWRITERSHADER_PARAM_DSTMOD_TYPE {
+    BWRITERSPDM_NONE = 0,
+    BWRITERSPDM_SATURATE = 1,
+    BWRITERSPDM_PARTIALPRECISION = 2,
+    BWRITERSPDM_MSAMPCENTROID = 4,
+} BWRITERSHADER_PARAM_DSTMOD_TYPE;
+
+typedef enum _BWRITERSAMPLER_TEXTURE_TYPE {
+    BWRITERSTT_UNKNOWN = 0,
+    BWRITERSTT_1D = 1,
+    BWRITERSTT_2D = 2,
+    BWRITERSTT_CUBE = 3,
+    BWRITERSTT_VOLUME = 4,
+} BWRITERSAMPLER_TEXTURE_TYPE;
+
+#define BWRITERSI_TEXLD_PROJECT 1
+#define BWRITERSI_TEXLD_BIAS    2
+
+typedef enum _BWRITERSHADER_PARAM_SRCMOD_TYPE {
+    BWRITERSPSM_NONE = 0,
+    BWRITERSPSM_NEG,
+    BWRITERSPSM_BIAS,
+    BWRITERSPSM_BIASNEG,
+    BWRITERSPSM_SIGN,
+    BWRITERSPSM_SIGNNEG,
+    BWRITERSPSM_COMP,
+    BWRITERSPSM_X2,
+    BWRITERSPSM_X2NEG,
+    BWRITERSPSM_DZ,
+    BWRITERSPSM_DW,
+    BWRITERSPSM_ABS,
+    BWRITERSPSM_ABSNEG,
+    BWRITERSPSM_NOT,
+} BWRITERSHADER_PARAM_SRCMOD_TYPE;
+
+#define BWRITER_SM1_VS  0xfffe
+#define BWRITER_SM1_PS  0xffff
+
+#define BWRITERPS_VERSION(major, minor) ((BWRITER_SM1_PS << 16) | ((major) << 8) | (minor))
+#define BWRITERVS_VERSION(major, minor) ((BWRITER_SM1_VS << 16) | ((major) << 8) | (minor))
+
+#define BWRITERVS_SWIZZLE_SHIFT      16
+#define BWRITERVS_SWIZZLE_MASK       (0xFF << BWRITERVS_SWIZZLE_SHIFT)
+
+#define BWRITERVS_X_X       (0 << BWRITERVS_SWIZZLE_SHIFT)
+#define BWRITERVS_X_Y       (1 << BWRITERVS_SWIZZLE_SHIFT)
+#define BWRITERVS_X_Z       (2 << BWRITERVS_SWIZZLE_SHIFT)
+#define BWRITERVS_X_W       (3 << BWRITERVS_SWIZZLE_SHIFT)
+
+#define BWRITERVS_Y_X       (0 << (BWRITERVS_SWIZZLE_SHIFT + 2))
+#define BWRITERVS_Y_Y       (1 << (BWRITERVS_SWIZZLE_SHIFT + 2))
+#define BWRITERVS_Y_Z       (2 << (BWRITERVS_SWIZZLE_SHIFT + 2))
+#define BWRITERVS_Y_W       (3 << (BWRITERVS_SWIZZLE_SHIFT + 2))
+
+#define BWRITERVS_Z_X       (0 << (BWRITERVS_SWIZZLE_SHIFT + 4))
+#define BWRITERVS_Z_Y       (1 << (BWRITERVS_SWIZZLE_SHIFT + 4))
+#define BWRITERVS_Z_Z       (2 << (BWRITERVS_SWIZZLE_SHIFT + 4))
+#define BWRITERVS_Z_W       (3 << (BWRITERVS_SWIZZLE_SHIFT + 4))
+
+#define BWRITERVS_W_X       (0 << (BWRITERVS_SWIZZLE_SHIFT + 6))
+#define BWRITERVS_W_Y       (1 << (BWRITERVS_SWIZZLE_SHIFT + 6))
+#define BWRITERVS_W_Z       (2 << (BWRITERVS_SWIZZLE_SHIFT + 6))
+#define BWRITERVS_W_W       (3 << (BWRITERVS_SWIZZLE_SHIFT + 6))
+
+#define BWRITERVS_NOSWIZZLE (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z | BWRITERVS_W_W)
+
+#define BWRITERVS_SWIZZLE_X (BWRITERVS_X_X | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)
+#define BWRITERVS_SWIZZLE_Y (BWRITERVS_X_Y | BWRITERVS_Y_Y | BWRITERVS_Z_Y | BWRITERVS_W_Y)
+#define BWRITERVS_SWIZZLE_Z (BWRITERVS_X_Z | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)
+#define BWRITERVS_SWIZZLE_W (BWRITERVS_X_W | BWRITERVS_Y_W | BWRITERVS_Z_W | BWRITERVS_W_W)
+
+typedef enum _BWRITERDECLUSAGE {
+    BWRITERDECLUSAGE_POSITION,
+    BWRITERDECLUSAGE_BLENDWEIGHT,
+    BWRITERDECLUSAGE_BLENDINDICES,
+    BWRITERDECLUSAGE_NORMAL,
+    BWRITERDECLUSAGE_PSIZE,
+    BWRITERDECLUSAGE_TEXCOORD,
+    BWRITERDECLUSAGE_TANGENT,
+    BWRITERDECLUSAGE_BINORMAL,
+    BWRITERDECLUSAGE_TESSFACTOR,
+    BWRITERDECLUSAGE_POSITIONT,
+    BWRITERDECLUSAGE_COLOR,
+    BWRITERDECLUSAGE_FOG,
+    BWRITERDECLUSAGE_DEPTH,
+    BWRITERDECLUSAGE_SAMPLE
+} BWRITERDECLUSAGE;
+
+/* ps 1.x texture registers mappings */
+#define T0_REG          2
+#define T1_REG          3
+#define T2_REG          4
+#define T3_REG          5
+
+struct bwriter_shader *SlAssembleShader(const char *text, char **messages);
+DWORD SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result);
+void SlDeleteShader(struct bwriter_shader *shader);
+
 #endif /* __WINE_D3DCOMPILER_PRIVATE_H */
diff --git a/dlls/d3dx9_36/Makefile.in b/dlls/d3dx9_36/Makefile.in
index 4c97c0d..cd24e62 100644
--- a/dlls/d3dx9_36/Makefile.in
+++ b/dlls/d3dx9_36/Makefile.in
@@ -4,13 +4,9 @@
 VPATH     = @srcdir@
 MODULE    = d3dx9_36.dll
 IMPORTLIB = d3dx9
-IMPORTS   = d3d9 ole32 gdi32 user32
-EXTRALIBS = $(LIBWPP)
+IMPORTS   = d3d9 d3dcompiler ole32 gdi32 user32
 
 C_SRCS = \
-	asmparser.c \
-	asmutils.c \
-	bytecodewriter.c \
 	core.c \
 	d3dx9_36_main.c \
 	effect.c \
@@ -25,9 +21,6 @@
 	util.c \
 	volume.c
 
-LEX_SRCS = asmshader.l
-BISON_SRCS = asmshader.y
-
 RC_SRCS = version.rc
 
 @MAKE_DLL_RULES@
diff --git a/dlls/d3dx9_36/d3dx9_36_private.h b/dlls/d3dx9_36/d3dx9_36_private.h
index 7488307..b910139 100644
--- a/dlls/d3dx9_36/d3dx9_36_private.h
+++ b/dlls/d3dx9_36/d3dx9_36_private.h
@@ -2,8 +2,6 @@
  * Copyright (C) 2002 Raphael Junqueira
  * Copyright (C) 2008 David Adam
  * Copyright (C) 2008 Tony Wasserka
- * Copyright (C) 2008 Stefan Dösinger
- * Copyright (C) 2009 Matteo Bruni
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -130,527 +128,4 @@
     int allocated_sprites; /* number of (pre-)allocated sprites */
 } ID3DXSpriteImpl;
 
-/* Shader assembler definitions */
-typedef enum _shader_type {
-    ST_VERTEX,
-    ST_PIXEL,
-} shader_type;
-
-typedef enum BWRITER_COMPARISON_TYPE {
-    BWRITER_COMPARISON_NONE,
-    BWRITER_COMPARISON_GT,
-    BWRITER_COMPARISON_EQ,
-    BWRITER_COMPARISON_GE,
-    BWRITER_COMPARISON_LT,
-    BWRITER_COMPARISON_NE,
-    BWRITER_COMPARISON_LE
-} BWRITER_COMPARISON_TYPE;
-
-struct constant {
-    DWORD                   regnum;
-    union {
-        float               f;
-        INT                 i;
-        BOOL                b;
-        DWORD               d;
-    }                       value[4];
-};
-
-struct shader_reg {
-    DWORD                   type;
-    DWORD                   regnum;
-    struct shader_reg       *rel_reg;
-    DWORD                   srcmod;
-    union {
-        DWORD               swizzle;
-        DWORD               writemask;
-    };
-};
-
-struct instruction {
-    DWORD                   opcode;
-    DWORD                   dstmod;
-    DWORD                   shift;
-    BWRITER_COMPARISON_TYPE comptype;
-    BOOL                    has_dst;
-    struct shader_reg       dst;
-    struct shader_reg       *src;
-    unsigned int            num_srcs; /* For freeing the rel_regs */
-    BOOL                    has_predicate;
-    struct shader_reg       predicate;
-    BOOL                    coissue;
-};
-
-struct declaration {
-    DWORD                   usage, usage_idx;
-    DWORD                   regnum;
-    DWORD                   mod;
-    DWORD                   writemask;
-    BOOL                    builtin;
-};
-
-struct samplerdecl {
-    DWORD                   type;
-    DWORD                   regnum;
-    DWORD                   mod;
-};
-
-#define INSTRARRAY_INITIAL_SIZE 8
-struct bwriter_shader {
-    shader_type             type;
-
-    /* Shader version selected */
-    DWORD                   version;
-
-    /* Local constants. Every constant that is not defined below is loaded from
-     * the global constant set at shader runtime
-     */
-    struct constant         **constF;
-    struct constant         **constI;
-    struct constant         **constB;
-    unsigned int            num_cf, num_ci, num_cb;
-
-    /* Declared input and output varyings */
-    struct declaration      *inputs, *outputs;
-    unsigned int            num_inputs, num_outputs;
-    struct samplerdecl      *samplers;
-    unsigned int            num_samplers;
-
-    /* Are special pixel shader 3.0 registers declared? */
-    BOOL                    vPos, vFace;
-
-    /* Array of shader instructions - The shader code itself */
-    struct instruction      **instr;
-    unsigned int            num_instrs, instr_alloc_size;
-};
-
-static inline LPVOID asm_alloc(SIZE_T size) {
-    return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
-}
-
-static inline LPVOID asm_realloc(LPVOID ptr, SIZE_T size) {
-    return HeapReAlloc(GetProcessHeap(), 0, ptr, size);
-}
-
-static inline BOOL asm_free(LPVOID ptr) {
-    return HeapFree(GetProcessHeap(), 0, ptr);
-}
-
-struct asm_parser;
-
-/* This structure is only used in asmshader.y, but since the .l file accesses the semantic types
- * too it has to know it as well
- */
-struct rel_reg {
-    BOOL            has_rel_reg;
-    DWORD           type;
-    DWORD           additional_offset;
-    DWORD           rel_regnum;
-    DWORD           swizzle;
-};
-
-#define MAX_SRC_REGS 4
-
-struct src_regs {
-    struct shader_reg reg[MAX_SRC_REGS];
-    unsigned int      count;
-};
-
-struct asmparser_backend {
-    void (*constF)(struct asm_parser *This, DWORD reg, float x, float y, float z, float w);
-    void (*constI)(struct asm_parser *This, DWORD reg, INT x, INT y, INT z, INT w);
-    void (*constB)(struct asm_parser *This, DWORD reg, BOOL x);
-
-    void (*dstreg)(struct asm_parser *This, struct instruction *instr,
-                   const struct shader_reg *dst);
-    void (*srcreg)(struct asm_parser *This, struct instruction *instr, int num,
-                   const struct shader_reg *src);
-
-    void (*predicate)(struct asm_parser *This,
-                      const struct shader_reg *predicate);
-    void (*coissue)(struct asm_parser *This);
-
-    void (*dcl_output)(struct asm_parser *This, DWORD usage, DWORD num,
-                       const struct shader_reg *reg);
-    void (*dcl_input)(struct asm_parser *This, DWORD usage, DWORD num,
-                      DWORD mod, const struct shader_reg *reg);
-    void (*dcl_sampler)(struct asm_parser *This, DWORD samptype, DWORD mod,
-                        DWORD regnum, unsigned int line_no);
-
-    void (*end)(struct asm_parser *This);
-
-    void (*instr)(struct asm_parser *This, DWORD opcode, DWORD mod, DWORD shift,
-                  BWRITER_COMPARISON_TYPE comp, const struct shader_reg *dst,
-                  const struct src_regs *srcs, int expectednsrcs);
-};
-
-struct instruction *alloc_instr(unsigned int srcs);
-BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr);
-BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w);
-BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w);
-BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x);
-BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
-                        DWORD usage_idx, DWORD mod, BOOL output,
-                        DWORD regnum, DWORD writemask, BOOL builtin);
-BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype,
-                    DWORD mod, DWORD regnum);
-
-#define MESSAGEBUFFER_INITIAL_SIZE 256
-
-struct asm_parser {
-    /* The function table of the parser implementation */
-    const struct asmparser_backend *funcs;
-
-    /* Private data follows */
-    struct bwriter_shader *shader;
-    unsigned int m3x3pad_count;
-
-    enum parse_status {
-        PARSE_SUCCESS = 0,
-        PARSE_WARN = 1,
-        PARSE_ERR = 2
-    } status;
-    char *messages;
-    unsigned int messagesize;
-    unsigned int messagecapacity;
-    unsigned int line_no;
-};
-
-extern struct asm_parser asm_ctx;
-
-void create_vs10_parser(struct asm_parser *ret);
-void create_vs11_parser(struct asm_parser *ret);
-void create_vs20_parser(struct asm_parser *ret);
-void create_vs2x_parser(struct asm_parser *ret);
-void create_vs30_parser(struct asm_parser *ret);
-void create_ps10_parser(struct asm_parser *ret);
-void create_ps11_parser(struct asm_parser *ret);
-void create_ps12_parser(struct asm_parser *ret);
-void create_ps13_parser(struct asm_parser *ret);
-void create_ps14_parser(struct asm_parser *ret);
-void create_ps20_parser(struct asm_parser *ret);
-void create_ps2x_parser(struct asm_parser *ret);
-void create_ps30_parser(struct asm_parser *ret);
-
-struct bwriter_shader *parse_asm_shader(char **messages);
-
-#ifdef __GNUC__
-#define PRINTF_ATTR(fmt,args) __attribute__((format (printf,fmt,args)))
-#else
-#define PRINTF_ATTR(fmt,args)
-#endif
-
-void asmparser_message(struct asm_parser *ctx, const char *fmt, ...) PRINTF_ATTR(2,3);
-void set_parse_status(struct asm_parser *ctx, enum parse_status status);
-
-/* A reasonable value as initial size */
-#define BYTECODEBUFFER_INITIAL_SIZE 32
-struct bytecode_buffer {
-    DWORD *data;
-    DWORD size;
-    DWORD alloc_size;
-    /* For tracking rare out of memory situations without passing
-     * return values around everywhere
-     */
-    HRESULT state;
-};
-
-struct bc_writer; /* Predeclaration for use in vtable parameters */
-
-typedef void (*instr_writer)(struct bc_writer *This,
-                             const struct instruction *instr,
-                             struct bytecode_buffer *buffer);
-
-struct bytecode_backend {
-    void (*header)(struct bc_writer *This, const struct bwriter_shader *shader,
-		   struct bytecode_buffer *buffer);
-    void (*end)(struct bc_writer *This, const struct bwriter_shader *shader,
-                struct bytecode_buffer *buffer);
-    void (*srcreg)(struct bc_writer *This, const struct shader_reg *reg,
-                   struct bytecode_buffer *buffer);
-    void (*dstreg)(struct bc_writer *This, const struct shader_reg *reg,
-                   struct bytecode_buffer *buffer, DWORD shift, DWORD mod);
-    void (*opcode)(struct bc_writer *This, const struct instruction *instr,
-                   DWORD token, struct bytecode_buffer *buffer);
-
-    const struct instr_handler_table {
-        DWORD opcode;
-        instr_writer func;
-    } *instructions;
-};
-
-/* Bytecode writing stuff */
-struct bc_writer {
-    const struct bytecode_backend *funcs;
-
-    /* Avoid result checking */
-    HRESULT                       state;
-
-    DWORD                         version;
-
-    /* Vertex shader varying mapping */
-    DWORD                         oPos_regnum;
-    DWORD                         oD_regnum[2];
-    DWORD                         oT_regnum[8];
-    DWORD                         oFog_regnum;
-    DWORD                         oFog_mask;
-    DWORD                         oPts_regnum;
-    DWORD                         oPts_mask;
-
-    /* Pixel shader specific members */
-    DWORD                         t_regnum[8];
-    DWORD                         v_regnum[2];
-};
-
-/* Debug utility routines */
-const char *debug_print_srcmod(DWORD mod);
-const char *debug_print_dstmod(DWORD mod);
-const char *debug_print_shift(DWORD shift);
-const char *debug_print_dstreg(const struct shader_reg *reg);
-const char *debug_print_srcreg(const struct shader_reg *reg);
-const char *debug_print_swizzle(DWORD swizzle);
-const char *debug_print_writemask(DWORD mask);
-const char *debug_print_comp(DWORD comp);
-const char *debug_print_opcode(DWORD opcode);
-
-/* Utilities for internal->d3d constant mapping */
-DWORD d3d9_swizzle(DWORD bwriter_swizzle);
-DWORD d3d9_writemask(DWORD bwriter_writemask);
-DWORD d3d9_srcmod(DWORD bwriter_srcmod);
-DWORD d3d9_dstmod(DWORD bwriter_mod);
-DWORD d3d9_comparetype(DWORD bwriter_comparetype);
-DWORD d3d9_sampler(DWORD bwriter_sampler);
-DWORD d3d9_register(DWORD bwriter_register);
-DWORD d3d9_opcode(DWORD bwriter_opcode);
-
-/* Used to signal an incorrect swizzle/writemask */
-#define SWIZZLE_ERR ~0U
-
-/*
-  Enumerations and defines used in the bytecode writer
-  intermediate representation
-*/
-typedef enum _BWRITERSHADER_INSTRUCTION_OPCODE_TYPE {
-    BWRITERSIO_NOP,
-    BWRITERSIO_MOV,
-    BWRITERSIO_ADD,
-    BWRITERSIO_SUB,
-    BWRITERSIO_MAD,
-    BWRITERSIO_MUL,
-    BWRITERSIO_RCP,
-    BWRITERSIO_RSQ,
-    BWRITERSIO_DP3,
-    BWRITERSIO_DP4,
-    BWRITERSIO_MIN,
-    BWRITERSIO_MAX,
-    BWRITERSIO_SLT,
-    BWRITERSIO_SGE,
-    BWRITERSIO_EXP,
-    BWRITERSIO_LOG,
-    BWRITERSIO_LIT,
-    BWRITERSIO_DST,
-    BWRITERSIO_LRP,
-    BWRITERSIO_FRC,
-    BWRITERSIO_M4x4,
-    BWRITERSIO_M4x3,
-    BWRITERSIO_M3x4,
-    BWRITERSIO_M3x3,
-    BWRITERSIO_M3x2,
-    BWRITERSIO_CALL,
-    BWRITERSIO_CALLNZ,
-    BWRITERSIO_LOOP,
-    BWRITERSIO_RET,
-    BWRITERSIO_ENDLOOP,
-    BWRITERSIO_LABEL,
-    BWRITERSIO_DCL,
-    BWRITERSIO_POW,
-    BWRITERSIO_CRS,
-    BWRITERSIO_SGN,
-    BWRITERSIO_ABS,
-    BWRITERSIO_NRM,
-    BWRITERSIO_SINCOS,
-    BWRITERSIO_REP,
-    BWRITERSIO_ENDREP,
-    BWRITERSIO_IF,
-    BWRITERSIO_IFC,
-    BWRITERSIO_ELSE,
-    BWRITERSIO_ENDIF,
-    BWRITERSIO_BREAK,
-    BWRITERSIO_BREAKC,
-    BWRITERSIO_MOVA,
-    BWRITERSIO_DEFB,
-    BWRITERSIO_DEFI,
-
-    BWRITERSIO_TEXCOORD,
-    BWRITERSIO_TEXKILL,
-    BWRITERSIO_TEX,
-    BWRITERSIO_TEXBEM,
-    BWRITERSIO_TEXBEML,
-    BWRITERSIO_TEXREG2AR,
-    BWRITERSIO_TEXREG2GB,
-    BWRITERSIO_TEXM3x2PAD,
-    BWRITERSIO_TEXM3x2TEX,
-    BWRITERSIO_TEXM3x3PAD,
-    BWRITERSIO_TEXM3x3TEX,
-    BWRITERSIO_TEXM3x3SPEC,
-    BWRITERSIO_TEXM3x3VSPEC,
-    BWRITERSIO_EXPP,
-    BWRITERSIO_LOGP,
-    BWRITERSIO_CND,
-    BWRITERSIO_DEF,
-    BWRITERSIO_TEXREG2RGB,
-    BWRITERSIO_TEXDP3TEX,
-    BWRITERSIO_TEXM3x2DEPTH,
-    BWRITERSIO_TEXDP3,
-    BWRITERSIO_TEXM3x3,
-    BWRITERSIO_TEXDEPTH,
-    BWRITERSIO_CMP,
-    BWRITERSIO_BEM,
-    BWRITERSIO_DP2ADD,
-    BWRITERSIO_DSX,
-    BWRITERSIO_DSY,
-    BWRITERSIO_TEXLDD,
-    BWRITERSIO_SETP,
-    BWRITERSIO_TEXLDL,
-    BWRITERSIO_BREAKP,
-    BWRITERSIO_TEXLDP,
-    BWRITERSIO_TEXLDB,
-
-    BWRITERSIO_PHASE,
-    BWRITERSIO_COMMENT,
-    BWRITERSIO_END,
-} BWRITERSHADER_INSTRUCTION_OPCODE_TYPE;
-
-typedef enum _BWRITERSHADER_PARAM_REGISTER_TYPE {
-    BWRITERSPR_TEMP,
-    BWRITERSPR_INPUT,
-    BWRITERSPR_CONST,
-    BWRITERSPR_ADDR,
-    BWRITERSPR_TEXTURE,
-    BWRITERSPR_RASTOUT,
-    BWRITERSPR_ATTROUT,
-    BWRITERSPR_TEXCRDOUT,
-    BWRITERSPR_OUTPUT,
-    BWRITERSPR_CONSTINT,
-    BWRITERSPR_COLOROUT,
-    BWRITERSPR_DEPTHOUT,
-    BWRITERSPR_SAMPLER,
-    BWRITERSPR_CONSTBOOL,
-    BWRITERSPR_LOOP,
-    BWRITERSPR_MISCTYPE,
-    BWRITERSPR_LABEL,
-    BWRITERSPR_PREDICATE
-} BWRITERSHADER_PARAM_REGISTER_TYPE;
-
-typedef enum _BWRITERVS_RASTOUT_OFFSETS
-{
-    BWRITERSRO_POSITION,
-    BWRITERSRO_FOG,
-    BWRITERSRO_POINT_SIZE
-} BWRITERVS_RASTOUT_OFFSETS;
-
-#define BWRITERSP_WRITEMASK_0   0x1 /* .x r */
-#define BWRITERSP_WRITEMASK_1   0x2 /* .y g */
-#define BWRITERSP_WRITEMASK_2   0x4 /* .z b */
-#define BWRITERSP_WRITEMASK_3   0x8 /* .w a */
-#define BWRITERSP_WRITEMASK_ALL 0xf /* all */
-
-typedef enum _BWRITERSHADER_PARAM_DSTMOD_TYPE {
-    BWRITERSPDM_NONE = 0,
-    BWRITERSPDM_SATURATE = 1,
-    BWRITERSPDM_PARTIALPRECISION = 2,
-    BWRITERSPDM_MSAMPCENTROID = 4,
-} BWRITERSHADER_PARAM_DSTMOD_TYPE;
-
-typedef enum _BWRITERSAMPLER_TEXTURE_TYPE {
-    BWRITERSTT_UNKNOWN = 0,
-    BWRITERSTT_1D = 1,
-    BWRITERSTT_2D = 2,
-    BWRITERSTT_CUBE = 3,
-    BWRITERSTT_VOLUME = 4,
-} BWRITERSAMPLER_TEXTURE_TYPE;
-
-#define BWRITERSI_TEXLD_PROJECT 1
-#define BWRITERSI_TEXLD_BIAS    2
-
-typedef enum _BWRITERSHADER_PARAM_SRCMOD_TYPE {
-    BWRITERSPSM_NONE = 0,
-    BWRITERSPSM_NEG,
-    BWRITERSPSM_BIAS,
-    BWRITERSPSM_BIASNEG,
-    BWRITERSPSM_SIGN,
-    BWRITERSPSM_SIGNNEG,
-    BWRITERSPSM_COMP,
-    BWRITERSPSM_X2,
-    BWRITERSPSM_X2NEG,
-    BWRITERSPSM_DZ,
-    BWRITERSPSM_DW,
-    BWRITERSPSM_ABS,
-    BWRITERSPSM_ABSNEG,
-    BWRITERSPSM_NOT,
-} BWRITERSHADER_PARAM_SRCMOD_TYPE;
-
-#define BWRITER_SM1_VS  0xfffe
-#define BWRITER_SM1_PS  0xffff
-
-#define BWRITERPS_VERSION(major, minor) ((BWRITER_SM1_PS << 16) | ((major) << 8) | (minor))
-#define BWRITERVS_VERSION(major, minor) ((BWRITER_SM1_VS << 16) | ((major) << 8) | (minor))
-
-#define BWRITERVS_SWIZZLE_SHIFT      16
-#define BWRITERVS_SWIZZLE_MASK       (0xFF << BWRITERVS_SWIZZLE_SHIFT)
-
-#define BWRITERVS_X_X       (0 << BWRITERVS_SWIZZLE_SHIFT)
-#define BWRITERVS_X_Y       (1 << BWRITERVS_SWIZZLE_SHIFT)
-#define BWRITERVS_X_Z       (2 << BWRITERVS_SWIZZLE_SHIFT)
-#define BWRITERVS_X_W       (3 << BWRITERVS_SWIZZLE_SHIFT)
-
-#define BWRITERVS_Y_X       (0 << (BWRITERVS_SWIZZLE_SHIFT + 2))
-#define BWRITERVS_Y_Y       (1 << (BWRITERVS_SWIZZLE_SHIFT + 2))
-#define BWRITERVS_Y_Z       (2 << (BWRITERVS_SWIZZLE_SHIFT + 2))
-#define BWRITERVS_Y_W       (3 << (BWRITERVS_SWIZZLE_SHIFT + 2))
-
-#define BWRITERVS_Z_X       (0 << (BWRITERVS_SWIZZLE_SHIFT + 4))
-#define BWRITERVS_Z_Y       (1 << (BWRITERVS_SWIZZLE_SHIFT + 4))
-#define BWRITERVS_Z_Z       (2 << (BWRITERVS_SWIZZLE_SHIFT + 4))
-#define BWRITERVS_Z_W       (3 << (BWRITERVS_SWIZZLE_SHIFT + 4))
-
-#define BWRITERVS_W_X       (0 << (BWRITERVS_SWIZZLE_SHIFT + 6))
-#define BWRITERVS_W_Y       (1 << (BWRITERVS_SWIZZLE_SHIFT + 6))
-#define BWRITERVS_W_Z       (2 << (BWRITERVS_SWIZZLE_SHIFT + 6))
-#define BWRITERVS_W_W       (3 << (BWRITERVS_SWIZZLE_SHIFT + 6))
-
-#define BWRITERVS_NOSWIZZLE (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z | BWRITERVS_W_W)
-
-#define BWRITERVS_SWIZZLE_X (BWRITERVS_X_X | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)
-#define BWRITERVS_SWIZZLE_Y (BWRITERVS_X_Y | BWRITERVS_Y_Y | BWRITERVS_Z_Y | BWRITERVS_W_Y)
-#define BWRITERVS_SWIZZLE_Z (BWRITERVS_X_Z | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)
-#define BWRITERVS_SWIZZLE_W (BWRITERVS_X_W | BWRITERVS_Y_W | BWRITERVS_Z_W | BWRITERVS_W_W)
-
-typedef enum _BWRITERDECLUSAGE {
-    BWRITERDECLUSAGE_POSITION,
-    BWRITERDECLUSAGE_BLENDWEIGHT,
-    BWRITERDECLUSAGE_BLENDINDICES,
-    BWRITERDECLUSAGE_NORMAL,
-    BWRITERDECLUSAGE_PSIZE,
-    BWRITERDECLUSAGE_TEXCOORD,
-    BWRITERDECLUSAGE_TANGENT,
-    BWRITERDECLUSAGE_BINORMAL,
-    BWRITERDECLUSAGE_TESSFACTOR,
-    BWRITERDECLUSAGE_POSITIONT,
-    BWRITERDECLUSAGE_COLOR,
-    BWRITERDECLUSAGE_FOG,
-    BWRITERDECLUSAGE_DEPTH,
-    BWRITERDECLUSAGE_SAMPLE
-} BWRITERDECLUSAGE;
-
-/* ps 1.x texture registers mappings */
-#define T0_REG          2
-#define T1_REG          3
-#define T2_REG          4
-#define T3_REG          5
-
-struct bwriter_shader *SlAssembleShader(const char *text, char **messages);
-DWORD SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result);
-void SlDeleteShader(struct bwriter_shader *shader);
-
 #endif /* __WINE_D3DX9_36_PRIVATE_H */
diff --git a/dlls/d3dx9_36/shader.c b/dlls/d3dx9_36/shader.c
index f5ba7aa..9c6700a 100644
--- a/dlls/d3dx9_36/shader.c
+++ b/dlls/d3dx9_36/shader.c
@@ -23,11 +23,18 @@
 #include "wine/unicode.h"
 #include "windef.h"
 #include "wingdi.h"
-#include "wine/wpp.h"
+#include "objbase.h"
+#include "d3dcommon.h"
 #include "d3dx9_36_private.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
 
+/* This function is not declared in the SDK headers yet */
+HRESULT WINAPI D3DAssemble(LPCVOID data, SIZE_T datasize, LPCSTR filename,
+                           const D3D_SHADER_MACRO *defines, ID3DInclude *include,
+                           UINT flags,
+                           ID3DBlob **shader, ID3DBlob **error_messages);
+
 LPCSTR WINAPI D3DXGetPixelShaderProfile(LPDIRECT3DDEVICE9 device)
 {
     D3DCAPS9 caps;
@@ -170,372 +177,6 @@
     return S_FALSE;
 }
 
-#define BUFFER_INITIAL_CAPACITY 256
-
-struct mem_file_desc
-{
-    const char *buffer;
-    unsigned int size;
-    unsigned int pos;
-};
-
-struct mem_file_desc current_shader;
-LPD3DXINCLUDE current_include;
-
-#define INCLUDES_INITIAL_CAPACITY 4
-
-struct loaded_include
-{
-    const char *name;
-    const char *data;
-};
-
-struct loaded_include *includes;
-int includes_capacity, includes_size;
-const char *parent_include;
-
-char *wpp_output;
-int wpp_output_capacity, wpp_output_size;
-
-char *wpp_messages;
-int wpp_messages_capacity, wpp_messages_size;
-
-/* Mutex used to guarantee a single invocation
-   of the D3DXAssembleShader function (or its variants) at a time.
-   This is needed as wpp isn't thread-safe */
-static CRITICAL_SECTION wpp_mutex;
-static CRITICAL_SECTION_DEBUG wpp_mutex_debug =
-{
-    0, 0, &wpp_mutex,
-    { &wpp_mutex_debug.ProcessLocksList,
-      &wpp_mutex_debug.ProcessLocksList },
-      0, 0, { (DWORD_PTR)(__FILE__ ": wpp_mutex") }
-};
-static CRITICAL_SECTION wpp_mutex = { &wpp_mutex_debug, -1, 0, 0, 0, 0 };
-
-/* Preprocessor error reporting functions */
-static void wpp_write_message(const char *fmt, va_list args)
-{
-    char* newbuffer;
-    int rc, newsize;
-
-    if(wpp_messages_capacity == 0)
-    {
-        wpp_messages = HeapAlloc(GetProcessHeap(), 0, MESSAGEBUFFER_INITIAL_SIZE);
-        if(wpp_messages == NULL)
-        {
-            ERR("Error allocating memory for parser messages\n");
-            return;
-        }
-        wpp_messages_capacity = MESSAGEBUFFER_INITIAL_SIZE;
-    }
-
-    while(1)
-    {
-        rc = vsnprintf(wpp_messages + wpp_messages_size,
-                       wpp_messages_capacity - wpp_messages_size, fmt, args);
-
-        if (rc < 0 ||                                           /* C89 */
-            rc >= wpp_messages_capacity - wpp_messages_size) {  /* C99 */
-            /* Resize the buffer */
-            newsize = wpp_messages_capacity * 2;
-            newbuffer = HeapReAlloc(GetProcessHeap(), 0, wpp_messages, newsize);
-            if(newbuffer == NULL)
-            {
-                ERR("Error reallocating memory for parser messages\n");
-                return;
-            }
-            wpp_messages = newbuffer;
-            wpp_messages_capacity = newsize;
-        }
-        else
-        {
-            wpp_messages_size += rc;
-            return;
-        }
-    }
-}
-
-static void PRINTF_ATTR(1,2) wpp_write_message_var(const char *fmt, ...)
-{
-    va_list args;
-
-    va_start(args, fmt);
-    wpp_write_message(fmt, args);
-    va_end(args);
-}
-
-static void wpp_error(const char *file, int line, int col, const char *near,
-                      const char *msg, va_list ap)
-{
-    wpp_write_message_var("%s:%d:%d: %s: ", file ? file : "'main file'",
-                          line, col, "Error");
-    wpp_write_message(msg, ap);
-    wpp_write_message_var("\n");
-}
-
-static void wpp_warning(const char *file, int line, int col, const char *near,
-                        const char *msg, va_list ap)
-{
-    wpp_write_message_var("%s:%d:%d: %s: ", file ? file : "'main file'",
-                          line, col, "Warning");
-    wpp_write_message(msg, ap);
-    wpp_write_message_var("\n");
-}
-
-static char *wpp_lookup_mem(const char *filename, const char *parent_name,
-                            char **include_path, int include_path_count)
-{
-    /* Here we return always ok. We will maybe fail on the next wpp_open_mem */
-    char *path;
-    int i;
-
-    parent_include = NULL;
-    if(parent_name[0] != '\0')
-    {
-        for(i = 0; i < includes_size; i++)
-        {
-            if(!strcmp(parent_name, includes[i].name))
-            {
-                parent_include = includes[i].data;
-                break;
-            }
-        }
-        if(parent_include == NULL)
-        {
-            ERR("Parent include file missing\n");
-            return NULL;
-        }
-    }
-
-    path = malloc(strlen(filename) + 1);
-    if(path)
-        memcpy(path, filename, strlen(filename) + 1);
-    return path;
-}
-
-static void *wpp_open_mem(const char *filename, int type)
-{
-    struct mem_file_desc *desc;
-    HRESULT hr;
-
-    if(filename[0] == '\0') /* "" means to load the initial shader */
-    {
-        current_shader.pos = 0;
-        return &current_shader;
-    }
-
-    if(current_include == NULL) return NULL;
-    desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*desc));
-    if(!desc)
-    {
-        ERR("Error allocating memory\n");
-        return NULL;
-    }
-    hr = ID3DXInclude_Open(current_include,
-                           type ? D3DXINC_SYSTEM : D3DXINC_LOCAL,
-                           filename, parent_include, (LPCVOID *)&desc->buffer,
-                           &desc->size);
-    if(FAILED(hr))
-    {
-        HeapFree(GetProcessHeap(), 0, desc);
-        return NULL;
-    }
-
-    if(includes_capacity == includes_size)
-    {
-        if(includes_capacity == 0)
-        {
-            includes = HeapAlloc(GetProcessHeap(), 0, INCLUDES_INITIAL_CAPACITY);
-            if(includes == NULL)
-            {
-                ERR("Error allocating memory for the loaded includes structure\n");
-                goto error;
-            }
-            includes_capacity = INCLUDES_INITIAL_CAPACITY;
-        }
-        else
-        {
-            int newcapacity = includes_capacity * 2;
-            struct loaded_include *newincludes =
-                HeapReAlloc(GetProcessHeap(), 0, includes, newcapacity);
-            if(newincludes == NULL)
-            {
-                ERR("Error reallocating memory for the loaded includes structure\n");
-                goto error;
-            }
-            includes = newincludes;
-            includes_capacity = newcapacity;
-        }
-    }
-    includes[includes_size].name = filename;
-    includes[includes_size++].data = desc->buffer;
-
-    desc->pos = 0;
-    return desc;
-
-error:
-    ID3DXInclude_Close(current_include, desc->buffer);
-    HeapFree(GetProcessHeap(), 0, desc);
-    return NULL;
-}
-
-static void wpp_close_mem(void *file)
-{
-    struct mem_file_desc *desc = file;
-
-    if(desc != &current_shader)
-    {
-        if(current_include)
-            ID3DXInclude_Close(current_include, desc->buffer);
-        else
-            ERR("current_include == NULL, desc == %p, buffer = %s\n",
-                desc, desc->buffer);
-
-        HeapFree(GetProcessHeap(), 0, desc);
-    }
-}
-
-static int wpp_read_mem(void *file, char *buffer, unsigned int len)
-{
-    struct mem_file_desc *desc = file;
-
-    len = min(len, desc->size - desc->pos);
-    memcpy(buffer, desc->buffer + desc->pos, len);
-    desc->pos += len;
-    return len;
-}
-
-static void wpp_write_mem(const char *buffer, unsigned int len)
-{
-    char *new_wpp_output;
-
-    if(wpp_output_capacity == 0)
-    {
-        wpp_output = HeapAlloc(GetProcessHeap(), 0, BUFFER_INITIAL_CAPACITY);
-        if(!wpp_output)
-        {
-            ERR("Error allocating memory\n");
-            return;
-        }
-        wpp_output_capacity = BUFFER_INITIAL_CAPACITY;
-    }
-    if(len > wpp_output_capacity - wpp_output_size)
-    {
-        while(len > wpp_output_capacity - wpp_output_size)
-        {
-            wpp_output_capacity *= 2;
-        }
-        new_wpp_output = HeapReAlloc(GetProcessHeap(), 0, wpp_output,
-                                     wpp_output_capacity);
-        if(!new_wpp_output)
-        {
-            ERR("Error allocating memory\n");
-            return;
-        }
-        wpp_output = new_wpp_output;
-    }
-    memcpy(wpp_output + wpp_output_size, buffer, len);
-    wpp_output_size += len;
-}
-
-static int wpp_close_output(void)
-{
-    char *new_wpp_output = HeapReAlloc(GetProcessHeap(), 0, wpp_output,
-                                       wpp_output_size + 1);
-    if(!new_wpp_output) return 0;
-    wpp_output = new_wpp_output;
-    wpp_output[wpp_output_size]='\0';
-    return 1;
-}
-
-static HRESULT assemble_shader(const char *preprocShader, const char *preprocMessages,
-                        LPD3DXBUFFER* ppShader, LPD3DXBUFFER* ppErrorMsgs)
-{
-    struct bwriter_shader *shader;
-    char *messages = NULL;
-    HRESULT hr;
-    DWORD *res;
-    LPD3DXBUFFER buffer;
-    int size;
-    char *pos;
-
-    shader = SlAssembleShader(preprocShader, &messages);
-
-    if(messages || preprocMessages)
-    {
-        if(preprocMessages)
-        {
-            TRACE("Preprocessor messages:\n");
-            TRACE("%s", preprocMessages);
-        }
-        if(messages)
-        {
-            TRACE("Assembler messages:\n");
-            TRACE("%s", messages);
-        }
-
-        TRACE("Shader source:\n");
-        TRACE("%s\n", debugstr_a(preprocShader));
-
-        if(ppErrorMsgs)
-        {
-            size = (messages ? strlen(messages) : 0) +
-                (preprocMessages ? strlen(preprocMessages) : 0) + 1;
-            hr = D3DXCreateBuffer(size, &buffer);
-            if(FAILED(hr))
-            {
-                HeapFree(GetProcessHeap(), 0, messages);
-                if(shader) SlDeleteShader(shader);
-                return hr;
-            }
-            pos = ID3DXBuffer_GetBufferPointer(buffer);
-            if(preprocMessages)
-            {
-                CopyMemory(pos, preprocMessages, strlen(preprocMessages) + 1);
-                pos += strlen(preprocMessages);
-            }
-            if(messages)
-                CopyMemory(pos, messages, strlen(messages) + 1);
-
-            *ppErrorMsgs = buffer;
-        }
-
-        HeapFree(GetProcessHeap(), 0, messages);
-    }
-
-    if(shader == NULL)
-    {
-        ERR("Asm reading failed\n");
-        return D3DXERR_INVALIDDATA;
-    }
-
-    hr = SlWriteBytecode(shader, 9, &res);
-    SlDeleteShader(shader);
-    if(FAILED(hr))
-    {
-        ERR("SlWriteBytecode failed with 0x%08x\n", hr);
-        return D3DXERR_INVALIDDATA;
-    }
-
-    if(ppShader)
-    {
-        size = HeapSize(GetProcessHeap(), 0, res);
-        hr = D3DXCreateBuffer(size, &buffer);
-        if(FAILED(hr))
-        {
-            HeapFree(GetProcessHeap(), 0, res);
-            return hr;
-        }
-        CopyMemory(ID3DXBuffer_GetBufferPointer(buffer), res, size);
-        *ppShader = buffer;
-    }
-
-    HeapFree(GetProcessHeap(), 0, res);
-
-    return D3D_OK;
-}
-
 HRESULT WINAPI D3DXAssembleShader(LPCSTR data,
                                   UINT data_len,
                                   CONST D3DXMACRO* defines,
@@ -544,95 +185,13 @@
                                   LPD3DXBUFFER* shader,
                                   LPD3DXBUFFER* error_messages)
 {
-    int ret;
-    HRESULT hr;
-    CONST D3DXMACRO* def = defines;
+    /* Forward to d3dcompiler: the parameter types aren't really different,
+       the actual data types are equivalent */
+    HRESULT hr = D3DAssemble(data, data_len, NULL, (D3D_SHADER_MACRO *)defines,
+                             (ID3DInclude *)include, flags, (ID3DBlob **)shader,
+                             (ID3DBlob **)error_messages);
 
-    static const struct wpp_callbacks wpp_callbacks = {
-        wpp_lookup_mem,
-        wpp_open_mem,
-        wpp_close_mem,
-        wpp_read_mem,
-        wpp_write_mem,
-        wpp_error,
-        wpp_warning,
-    };
-
-    EnterCriticalSection(&wpp_mutex);
-
-    /* TODO: flags */
-    if(flags) FIXME("flags: %x\n", flags);
-
-    if(def != NULL)
-    {
-        while(def->Name != NULL)
-        {
-            wpp_add_define(def->Name, def->Definition);
-            def++;
-        }
-    }
-    current_include = include;
-    includes_size = 0;
-
-    if(shader) *shader = NULL;
-    if(error_messages) *error_messages = NULL;
-    wpp_output_size = wpp_output_capacity = 0;
-    wpp_output = NULL;
-
-    /* Preprocess shader */
-    wpp_set_callbacks(&wpp_callbacks);
-    wpp_messages_size = wpp_messages_capacity = 0;
-    wpp_messages = NULL;
-    current_shader.buffer = data;
-    current_shader.size = data_len;
-
-    ret = wpp_parse("", NULL);
-    if(!wpp_close_output())
-        ret = 1;
-    if(ret)
-    {
-        TRACE("Error during shader preprocessing\n");
-        if(wpp_messages)
-        {
-            int size;
-            LPD3DXBUFFER buffer;
-
-            TRACE("Preprocessor messages:\n");
-            TRACE("%s", wpp_messages);
-
-            if(error_messages)
-            {
-                size = strlen(wpp_messages) + 1;
-                hr = D3DXCreateBuffer(size, &buffer);
-                if(FAILED(hr)) goto cleanup;
-                CopyMemory(ID3DXBuffer_GetBufferPointer(buffer), wpp_messages, size);
-                *error_messages = buffer;
-            }
-        }
-        if(data)
-        {
-            TRACE("Shader source:\n");
-            TRACE("%s\n", debugstr_an(data, data_len));
-        }
-        hr = D3DXERR_INVALIDDATA;
-        goto cleanup;
-    }
-
-    hr = assemble_shader(wpp_output, wpp_messages, shader, error_messages);
-
-cleanup:
-    /* Remove the previously added defines */
-    if(defines != NULL)
-    {
-        while(defines->Name != NULL)
-        {
-            wpp_del_define(defines->Name);
-            defines++;
-        }
-    }
-    HeapFree(GetProcessHeap(), 0, wpp_messages);
-    HeapFree(GetProcessHeap(), 0, wpp_output);
-    LeaveCriticalSection(&wpp_mutex);
+    if(hr == E_FAIL) hr = D3DXERR_INVALIDDATA;
     return hr;
 }
 
