Integrate hardware vertex shaders into the drawing pipeline.

diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 5304cdd..d31bf04 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -1456,7 +1456,7 @@
     IWineD3DVertexShaderImpl *object;  /* NOTE: impl usage is ok, this is a create */
     D3DCREATEOBJECTINSTANCE(object, VertexShader)
 
-    TRACE("(%p) : Created Vertex shader %p\n", This, ppVertexShader);
+    TRACE("(%p) : Created Vertex shader %p\n", This, *ppVertexShader);
     IWineD3DVertexShader_SetFunction(*ppVertexShader, pFunction);
 
     return D3D_OK;
@@ -4334,7 +4334,9 @@
     TRACE("(%p) : Type=(%d,%s), Start=%d, Count=%d\n", This, PrimitiveType,
                                debug_d3dprimitivetype(PrimitiveType),
                                StartVertex, PrimitiveCount);
-    drawPrimitive(iface, PrimitiveType, PrimitiveCount, StartVertex, -1, 0, NULL, 0);
+    drawPrimitive(iface, PrimitiveType, PrimitiveCount, StartVertex, 0/* NumVertices */, -1 /* indxStart */,
+                  0 /* indxSize */, NULL /* indxData */, 0 /* minIndex */);
+
 
     return D3D_OK;
 }
@@ -4343,7 +4345,7 @@
 HRESULT  WINAPI  IWineD3DDeviceImpl_DrawIndexedPrimitive(IWineD3DDevice *iface,
                                                            D3DPRIMITIVETYPE PrimitiveType,
                                                            INT baseVIndex, UINT minIndex,
-                                                           UINT NumVertices,UINT startIndex,UINT primCount) {
+                                                           UINT NumVertices, UINT startIndex, UINT primCount) {
 
     IWineD3DDeviceImpl  *This = (IWineD3DDeviceImpl *)iface;
     UINT                 idxStride = 2;
@@ -4364,10 +4366,8 @@
         idxStride = 4;
     }
 
-    drawPrimitive(iface, PrimitiveType, primCount, baseVIndex,
-                      startIndex, idxStride,
-                      ((IWineD3DIndexBufferImpl *) pIB)->resource.allocatedMemory,
-                      minIndex);
+    drawPrimitive(iface, PrimitiveType, primCount, baseVIndex, NumVertices, startIndex,
+                   idxStride, ((IWineD3DIndexBufferImpl *) pIB)->resource.allocatedMemory, minIndex);
 
     return D3D_OK;
 }
@@ -4392,7 +4392,12 @@
     This->stateBlock->streamSource[0] = (IWineD3DVertexBuffer *)pVertexStreamZeroData;
     This->stateBlock->streamStride[0] = VertexStreamZeroStride;
     This->stateBlock->streamIsUP = TRUE;
-    drawPrimitive(iface, PrimitiveType, PrimitiveCount, 0, 0, 0, NULL, 0);
+
+    drawPrimitive(iface, PrimitiveType, PrimitiveCount, 0 /* start vertex */, 0  /* NumVertices */,
+                  0 /* indxStart*/, 0 /* indxSize*/, NULL /* indxData */, 0 /* indxMin */);
+    /* stream zero settings set to null at end, as per the msdn
+            http://msdn.microsoft.com/archive/default.asp?url=/archive/en-us/directx9_c/directx/graphics/reference/d3d/interfaces/idirect3ddevice9/DrawPrimitiveUP.asp
+    */
     This->stateBlock->streamStride[0] = 0;
     This->stateBlock->streamSource[0] = NULL;
 
@@ -4401,16 +4406,16 @@
 }
 
 HRESULT WINAPI IWineD3DDeviceImpl_DrawIndexedPrimitiveUP(IWineD3DDevice *iface, D3DPRIMITIVETYPE PrimitiveType,
-                                                             UINT MinVertexIndex,
-                                                             UINT NumVertexIndices,UINT PrimitiveCount,CONST void* pIndexData,
-                                                             WINED3DFORMAT IndexDataFormat, CONST void* pVertexStreamZeroData,
+                                                             UINT MinVertexIndex, UINT NumVertices,
+                                                             UINT PrimitiveCount, CONST void* pIndexData,
+                                                             WINED3DFORMAT IndexDataFormat,CONST void* pVertexStreamZeroData,
                                                              UINT VertexStreamZeroStride) {
     int                 idxStride;
     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
 
     TRACE("(%p) : Type=(%d,%s), MinVtxIdx=%d, NumVIdx=%d, PCount=%d, pidxdata=%p, IdxFmt=%d, pVtxdata=%p, stride=%d\n",
              This, PrimitiveType, debug_d3dprimitivetype(PrimitiveType),
-             MinVertexIndex, NumVertexIndices, PrimitiveCount, pIndexData,
+             MinVertexIndex, NumVertices, PrimitiveCount, pIndexData,
              IndexDataFormat, pVertexStreamZeroData, VertexStreamZeroStride);
 
     if (IndexDataFormat == WINED3DFMT_INDEX16) {
@@ -4440,7 +4445,10 @@
     This->stateBlock->streamIsUP = TRUE;
     This->stateBlock->streamStride[0] = VertexStreamZeroStride;
 
-    drawPrimitive(iface, PrimitiveType, PrimitiveCount, 0, 0, idxStride, pIndexData, MinVertexIndex);
+    drawPrimitive(iface, PrimitiveType, PrimitiveCount, 0 /* vertexStart */, NumVertices, 0 /* indxStart */, idxStride, pIndexData, MinVertexIndex);
+    /* stream zero settings set to null at end as per the msdn
+    http://msdn.microsoft.com/archive/default.asp?url=/archive/en-us/directx9_c/directx/graphics/reference/d3d/interfaces/idirect3ddevice9/DrawPrimitiveUP.asp
+    */
 
     /* stream zero settings set to null at end as per the msdn */
     This->stateBlock->streamSource[0] = NULL;
diff --git a/dlls/wined3d/drawprim.c b/dlls/wined3d/drawprim.c
index 6ce30d0..877cd69 100644
--- a/dlls/wined3d/drawprim.c
+++ b/dlls/wined3d/drawprim.c
@@ -29,9 +29,9 @@
 #define GLINFO_LOCATION ((IWineD3DImpl *)(This->wineD3D))->gl_info
 
 #if 0 /* TODO */
-extern IDirect3DVertexShaderImpl*            VertexShaders[64];
-extern IDirect3DVertexShaderDeclarationImpl* VertexShaderDeclarations[64];
-extern IDirect3DPixelShaderImpl*             PixelShaders[64];
+extern IWineD3DVertexShaderImpl*            VertexShaders[64];
+extern IWineD3DVertexDeclarationImpl*       VertexShaderDeclarations[64];
+extern IWineD3DPixelShaderImpl*             PixelShaders[64];
 
 #undef GL_VERSION_1_4 /* To be fixed, caused by mesa headers */
 #endif
@@ -777,101 +777,265 @@
 }
 #endif /* TODO: Software shaders */
 
-/*
- * Actually draw using the supplied information.
- * Faster GL version using pointers to data, harder to debug though
- * Note does not handle vertex shaders yet
- */
-static void drawStridedFast(IWineD3DDevice *iface, Direct3DVertexStridedData *sd,
-                     int PrimitiveType, ULONG NumPrimitives,
-                     const void *idxData, short idxSize, ULONG minIndex, ULONG startIdx) {
+void loadNumberedArrays(IWineD3DDevice *iface, Direct3DVertexStridedData *sd, INT arrayUsageMap[WINED3DSHADERDECLUSAGE_MAX_USAGE]) {
+    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
+
+#define LOAD_NUMBERED_ARRAY(_arrayName, _lookupName) \
+    if (sd->u.s._arrayName.lpData != NULL && ((arrayUsageMap[WINED3DSHADERDECLUSAGE_##_lookupName] & 0x7FFF) == arrayUsageMap[WINED3DSHADERDECLUSAGE_##_lookupName])) { \
+       TRACE_(d3d_shader)("Loading array %u with data from %s\n", arrayUsageMap[WINED3DSHADERDECLUSAGE_##_lookupName], #_arrayName); \
+       GL_EXTCALL(glVertexAttribPointerARB(arrayUsageMap[WINED3DSHADERDECLUSAGE_##_lookupName], \
+                        WINED3D_ATR_SIZE(_arrayName), \
+                        WINED3D_ATR_GLTYPE(_arrayName), \
+                        WINED3D_ATR_NORMALIZED(_arrayName), \
+                        sd->u.s._arrayName.dwStride, \
+                        sd->u.s._arrayName.lpData)); \
+        GL_EXTCALL(glEnableVertexAttribArrayARB(arrayUsageMap[WINED3DSHADERDECLUSAGE_##_lookupName])); \
+    }
+
+
+#define LOAD_NUMBERED_POSITION_ARRAY(_lookupNumber) \
+    if (sd->u.s.position2.lpData != NULL && ((arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + _lookupNumber] & 0x7FFF) == arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + _lookupNumber])) { \
+       FIXME_(d3d_shader)("Loading array %u with data from %s\n", arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + _lookupNumber], "position2"); \
+       GL_EXTCALL(glVertexAttribPointerARB(arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + _lookupNumber], \
+                        WINED3D_ATR_SIZE(position2), \
+                        WINED3D_ATR_GLTYPE(position2), \
+                        WINED3D_ATR_NORMALIZED(position2), \
+                        sd->u.s.position2.dwStride, \
+                        ((char *)sd->u.s.position2.lpData) + \
+                        WINED3D_ATR_SIZE(position2) * WINED3D_ATR_TYPESIZE(position2) * _lookupNumber)); \
+        GL_EXTCALL(glEnableVertexAttribArrayARB(arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + _lookupNumber])); \
+    }
+
+/* Generate some lookup tables */
+    /* drop the RHW coord, there must be a nicer way of doing this. */
+    sd->u.s.position.dwType  = min(D3DDECLTYPE_FLOAT3, sd->u.s.position.dwType);
+    sd->u.s.position2.dwType = min(D3DDECLTYPE_FLOAT3, sd->u.s.position.dwType);
+
+    LOAD_NUMBERED_ARRAY(blendWeights,BLENDWEIGHT);
+    LOAD_NUMBERED_ARRAY(blendMatrixIndices,BLENDINDICES);
+    LOAD_NUMBERED_ARRAY(position,POSITION);
+    LOAD_NUMBERED_ARRAY(normal,NORMAL);
+    LOAD_NUMBERED_ARRAY(pSize,PSIZE);
+    LOAD_NUMBERED_ARRAY(diffuse,DIFFUSE);
+    LOAD_NUMBERED_ARRAY(specular,SPECULAR);
+    LOAD_NUMBERED_ARRAY(texCoords[0],TEXCOORD0);
+    LOAD_NUMBERED_ARRAY(texCoords[1],TEXCOORD1);
+    LOAD_NUMBERED_ARRAY(texCoords[2],TEXCOORD2);
+    LOAD_NUMBERED_ARRAY(texCoords[3],TEXCOORD3);
+    LOAD_NUMBERED_ARRAY(texCoords[4],TEXCOORD4);
+    LOAD_NUMBERED_ARRAY(texCoords[5],TEXCOORD5);
+    LOAD_NUMBERED_ARRAY(texCoords[6],TEXCOORD6);
+    LOAD_NUMBERED_ARRAY(texCoords[7],TEXCOORD7);
+#if 0   /* TODO: Samplers may allow for more texture coords */
+    LOAD_NUMBERED_ARRAY(texCoords[8],TEXCOORD8);
+    LOAD_NUMBERED_ARRAY(texCoords[9],TEXCOORD9);
+    LOAD_NUMBERED_ARRAY(texCoords[10],TEXCOORD10);
+    LOAD_NUMBERED_ARRAY(texCoords[11],TEXCOORD11);
+    LOAD_NUMBERED_ARRAY(texCoords[12],TEXCOORD12);
+    LOAD_NUMBERED_ARRAY(texCoords[13],TEXCOORD13);
+    LOAD_NUMBERED_ARRAY(texCoords[14],TEXCOORD14);
+    LOAD_NUMBERED_ARRAY(texCoords[15],TEXCOORD15);
+#endif
+    LOAD_NUMBERED_ARRAY(position,POSITIONT);
+    /* d3d9 types */
+    LOAD_NUMBERED_ARRAY(tangent,TANGENT);
+    LOAD_NUMBERED_ARRAY(binormal,BINORMAL);
+    LOAD_NUMBERED_ARRAY(tessFactor,TESSFACTOR);
+    LOAD_NUMBERED_ARRAY(position2,POSITION2);
+    /* there can be lots of position arrays */
+    LOAD_NUMBERED_POSITION_ARRAY(0);
+    LOAD_NUMBERED_POSITION_ARRAY(1);
+    LOAD_NUMBERED_POSITION_ARRAY(2);
+    LOAD_NUMBERED_POSITION_ARRAY(3);
+    LOAD_NUMBERED_POSITION_ARRAY(4);
+    LOAD_NUMBERED_POSITION_ARRAY(5);
+    LOAD_NUMBERED_POSITION_ARRAY(6);
+    LOAD_NUMBERED_POSITION_ARRAY(7);
+    LOAD_NUMBERED_ARRAY(position2,POSITIONT2);
+    LOAD_NUMBERED_ARRAY(normal2,NORMAL2);
+    LOAD_NUMBERED_ARRAY(fog,FOG);
+    LOAD_NUMBERED_ARRAY(depth,DEPTH);
+    LOAD_NUMBERED_ARRAY(sample,SAMPLE);
+
+#undef LOAD_NUMBERED_ARRAY
+}
+
+static void loadVertexData(IWineD3DDevice *iface, Direct3DVertexStridedData *sd) {
     unsigned int textureNo   = 0;
-    GLenum       glPrimType  = GL_POINTS;
-    int          NumVertexes = NumPrimitives;
     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
 
     TRACE("Using fast vertex array code\n");
+    /* Blend Data ---------------------------------------------- */
+    if ((sd->u.s.blendWeights.lpData != NULL) ||
+        (sd->u.s.blendMatrixIndices.lpData != NULL)) {
+
+
+        if (GL_SUPPORT(ARB_VERTEX_BLEND)) {
+
+#if 1
+            glEnableClientState(GL_WEIGHT_ARRAY_ARB);
+            checkGLcall("glEnableClientState(GL_WEIGHT_ARRAY_ARB)");
+#endif
+
+            TRACE("Blend %d %p %ld\n", WINED3D_ATR_SIZE(blendWeights), sd->u.s.blendWeights.lpData, sd->u.s.blendWeights.dwStride);
+            /* FIXME("TODO\n");*/
+            /* Note dwType == float3 or float4 == 2 or 3 */
+
+#if 0
+            /* with this on, the normals appear to be being modified,
+               but the vertices aren't being translated as they should be
+               Maybe the world matrix aren't being setup properly? */
+            glVertexBlendARB(WINED3D_ATR_SIZE(blendWeights) + 1);
+#endif
+
+
+            VTRACE(("glWeightPointerARB(%ld, GL_FLOAT, %ld, %p)\n",
+                WINED3D_ATR_SIZE(blendWeights) ,
+                sd->u.s.blendWeights.dwStride,
+                sd->u.s.blendWeights.lpData));
+
+            GL_EXTCALL(glWeightPointerARB)(WINED3D_ATR_SIZE(blendWeights), WINED3D_ATR_GLTYPE(blendWeights),
+                            sd->u.s.blendWeights.dwStride,
+                            sd->u.s.blendWeights.lpData);
+
+            checkGLcall("glWeightPointerARB");
+
+            if(sd->u.s.blendMatrixIndices.lpData != NULL){
+                static BOOL showfixme = TRUE;
+                if(showfixme){
+                    FIXME("blendMatrixIndices support\n");
+                    showfixme = FALSE;
+                }
+            }
+
+
+
+        } else if (GL_SUPPORT(EXT_VERTEX_WEIGHTING)) {
+            /* FIXME("TODO\n");*/
+#if 0
+
+            GL_EXTCALL(glVertexWeightPointerEXT)(WINED3D_ATR_SIZE(blendWeights), WINED3D_ATR_GLTYPE(blendWeights),
+                                                sd->u.s.blendWeights.dwStride,
+                                                sd->u.s.blendWeights.lpData);
+            checkGLcall("glVertexWeightPointerEXT(numBlends, ...)");
+            glEnableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT);
+            checkGLcall("glEnableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT)");
+#endif
+
+        } else {
+            /* TODO: support blends in fixupVertices */
+            FIXME("unsupported blending in openGl\n");
+        }
+    } else {
+        if (GL_SUPPORT(ARB_VERTEX_BLEND)) {
+#if 0    /* TODO: Vertex blending */
+            glDisable(GL_VERTEX_BLEND_ARB);
+#endif
+            TRACE("ARB_VERTEX_BLEND \n");
+        } else if (GL_SUPPORT(EXT_VERTEX_WEIGHTING)) {
+            TRACE(" EXT_VERTEX_WEIGHTING\n");
+            glDisableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT);
+            checkGLcall("glDisableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT)");
+
+        }
+    }
+
+#if 0 /* FOG  ----------------------------------------------*/
+    if (sd->u.s.fog.lpData != NULL) {
+        /* TODO: fog*/
+    if (GL_SUPPORT(EXT_FOG_COORD) {
+             glEnableClientState(GL_FOG_COORD_EXT);
+            (GL_EXTCALL)(FogCoordPointerEXT)(WINED3D_ATR_GLTYPE(fog),
+                        sd->u.s.fog.dwStride,
+                        sd->u.s.fog.lpData);
+        } else {
+            /* don't bother falling back to 'slow' as we don't support software FOG yet. */
+            /* FIXME: fixme once */
+            TRACE("Hardware support for FOG is not avaiable, FOG disabled. \n");
+        }
+    } else {
+        if (GL_SUPPRT(EXT_FOR_COORD) {
+             /* make sure fog is disabled */
+             glDisableClientState(GL_FOG_COORD_EXT);
+        }
+    }
+#endif
+
+#if 0 /* tangents  ----------------------------------------------*/
+    if (sd->u.s.tangent.lpData != NULL || sd->u.s.binormal.lpData != NULL) {
+        /* TODO: tangents*/
+        if (GL_SUPPORT(EXT_COORDINATE_FRAME) {
+            if (sd->u.s.tangent.lpData != NULL) {
+                glEnable(GL_TANGENT_ARRAY_EXT);
+                (GL_EXTCALL)(TangentPointerEXT)(WINED3D_ATR_GLTYPE(tangent),
+                            sd->u.s.tangent.dwStride,
+                            sd->u.s.tangent.lpData);
+            } else {
+                    glDisable(GL_TANGENT_ARRAY_EXT);
+            }
+            if (sd->u.s.binormal.lpData != NULL) {
+                    glEnable(GL_BINORMAL_ARRAY_EXT);
+                    (GL_EXTCALL)(BinormalPointerEXT)(WINED3D_ATR_GLTYPE(binormal),
+                                                sd->u.s.binormal.dwStride,
+                                                sd->u.s.binormal.lpData);
+            } else{
+                    glDisable(GL_BINORMAL_ARRAY_EXT);
+            }
+
+        } else {
+            /* don't bother falling back to 'slow' as we don't support software tangents and binormals yet . */
+            /* FIXME: fixme once */
+            TRACE("Hardware support for tangents and binormals is not avaiable, tangents and binormals disabled. \n");
+        }
+    } else {
+        if (GL_SUPPORT(EXT_COORDINATE_FRAME) {
+             /* make sure fog is disabled */
+             glDisable(GL_TANGENT_ARRAY_EXT);
+             glDisable(GL_BINORMAL_ARRAY_EXT);
+        }
+    }
+#endif
+
+    /* Point Size ----------------------------------------------*/
+    if (sd->u.s.pSize.lpData != NULL) {
+
+        /* no such functionality in the fixed function GL pipeline */
+        TRACE("Cannot change ptSize here in openGl\n");
+        /* TODO: Implement this function in using shaders if they are available */
+
+    }
 
     /* Vertex Pointers -----------------------------------------*/
     if (sd->u.s.position.lpData != NULL) {
-
         /* Note dwType == float3 or float4 == 2 or 3 */
         VTRACE(("glVertexPointer(%ld, GL_FLOAT, %ld, %p)\n",
                 sd->u.s.position.dwStride,
                 sd->u.s.position.dwType + 1,
                 sd->u.s.position.lpData));
 
-        /* Disable RHW mode as 'w' coord handling for rhw mode should
-           not impact screen position whereas in GL it does. This may
-           result in very slightly distored textures in rhw mode, but
-           a very minimal different                                   */
-        glVertexPointer(3, GL_FLOAT,  /* RHW: Was 'sd->u.s.position.dwType + 1' */
-                        sd->u.s.position.dwStride,
-                        sd->u.s.position.lpData);
+        /* min(WINED3D_ATR_SIZE(position),3) to Disable RHW mode as 'w' coord
+           handling for rhw mode should not impact screen position whereas in GL it does.
+           This may  result in very slightly distored textures in rhw mode, but
+           a very minimal different. There's always the other option of
+           fixing the view matrix to prevent w from having any effect  */
+        glVertexPointer(3 /* min(WINED3D_ATR_SIZE(position),3) */, WINED3D_ATR_GLTYPE(position),
+                        sd->u.s.position.dwStride, sd->u.s.position.lpData);
         checkGLcall("glVertexPointer(...)");
         glEnableClientState(GL_VERTEX_ARRAY);
         checkGLcall("glEnableClientState(GL_VERTEX_ARRAY)");
 
     } else {
-
         glDisableClientState(GL_VERTEX_ARRAY);
         checkGLcall("glDisableClientState(GL_VERTEX_ARRAY)");
     }
 
-    /* Blend Data ----------------------------------------------*/
-    if ((sd->u.s.blendWeights.lpData != NULL) ||
-        (sd->u.s.blendMatrixIndices.lpData != NULL)) {
-#if 1 /* Vertex blend support needs to be added */
-        if (GL_SUPPORT(ARB_VERTEX_BLEND)) {
-            DWORD fvf = (sd->u.s.blendWeights.dwType - D3DDECLTYPE_FLOAT1) + 1;
-            int numBlends = ((fvf & D3DFVF_POSITION_MASK) >> 1) - 2 + ((FALSE == (fvf & D3DFVF_LASTBETA_UBYTE4)) ? 0 : -1);
-
-            /* Note dwType == float3 or float4 == 2 or 3 */
-            VTRACE(("glWeightPointerARB(%ld, GL_FLOAT, %ld, %p)\n",
-                    numBlends,
-                    sd->u.s.blendWeights.dwStride,
-                    sd->u.s.blendWeights.lpData));
-            GL_EXTCALL(glWeightPointerARB)(numBlends, GL_FLOAT,
-                                           sd->u.s.blendWeights.dwStride,
-                                           sd->u.s.blendWeights.lpData);
-            checkGLcall("glWeightPointerARB(...)");
-            glEnableClientState(GL_WEIGHT_ARRAY_ARB);
-            checkGLcall("glEnableClientState(GL_VERTEX_ARRAY)");
-        } else if (GL_SUPPORT(EXT_VERTEX_WEIGHTING)) {
-            /*FIXME("TODO\n");*/
-            /*
-            GLExtCall(glVertexWeightPointerEXT)(numBlends, GL_FLOAT, skip, curPos);
-            checkGLcall("glVertexWeightPointerEXT(numBlends, ...)");
-            glEnableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT);
-            checkGLcall("glEnableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT)");
-            */
-        } else {
-            FIXME("unsupported blending in openGl\n");
-        }
-    } else {
-        if (GL_SUPPORT(ARB_VERTEX_BLEND)) {
-            TRACE("TODO ARB_VERTEX_BLEND\n");
-        } else if (GL_SUPPORT(EXT_VERTEX_WEIGHTING)) {
-            TRACE("TODO EXT_VERTEX_WEIGHTING\n");
-            /*
-            glDisableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT);
-            checkGLcall("glDisableClientState(GL_VERTEX_WEIGHT_ARRAY_EXT)");
-            */
-        }
-#else
-        /* FIXME: Won't get here as will drop to slow method        */
-        FIXME("Blending not supported in fast draw routine\n");
-#endif
-    }
-
     /* Normals -------------------------------------------------*/
     if (sd->u.s.normal.lpData != NULL) {
-
         /* Note dwType == float3 or float4 == 2 or 3 */
         VTRACE(("glNormalPointer(GL_FLOAT, %ld, %p)\n",
                 sd->u.s.normal.dwStride,
                 sd->u.s.normal.lpData));
-        glNormalPointer(GL_FLOAT,
+        glNormalPointer(WINED3D_ATR_GLTYPE(normal),
                         sd->u.s.normal.dwStride,
                         sd->u.s.normal.lpData);
         checkGLcall("glNormalPointer(...)");
@@ -879,31 +1043,27 @@
         checkGLcall("glEnableClientState(GL_NORMAL_ARRAY)");
 
     } else {
-
         glDisableClientState(GL_NORMAL_ARRAY);
         checkGLcall("glDisableClientState(GL_NORMAL_ARRAY)");
         glNormal3f(0, 0, 1);
         checkGLcall("glNormal3f(0, 0, 1)");
     }
 
-    /* Point Size ----------------------------------------------*/
-    if (sd->u.s.pSize.lpData != NULL) {
+    /* Diffuse Colour --------------------------------------------*/
+    /*  WARNING: Data here MUST be in RGBA format, so cannot      */
+    /*     go directly into fast mode from app pgm, because       */
+    /*     directx requires data in BGRA format.                  */
+    /* currently fixupVertices swizels the format, but this isn't */
+    /* very practical when using VBOS                             */
+    /* NOTE: Unless we write a vertex shader to swizel the colour */
+    /* , or the user doesn't care and wants the speed advantage   */
 
-        /* no such functionality in the fixed function GL pipeline */
-        /* FIXME: Won't get here as will drop to slow method        */
-        FIXME("Cannot change ptSize here in openGl\n");
-    }
-
-    /* Diffuse Colour ------------------------------------------*/
-    /*  WARNING: Data here MUST be in RGBA format, so cannot    */
-    /*     go directly into fast mode from app pgm, because     */
-    /*     directx requires data in BGRA format.                */
     if (sd->u.s.diffuse.lpData != NULL) {
-
         /* Note dwType == float3 or float4 == 2 or 3 */
         VTRACE(("glColorPointer(4, GL_UNSIGNED_BYTE, %ld, %p)\n",
                 sd->u.s.diffuse.dwStride,
                 sd->u.s.diffuse.lpData));
+
         glColorPointer(4, GL_UNSIGNED_BYTE,
                        sd->u.s.diffuse.dwStride,
                        sd->u.s.diffuse.lpData);
@@ -912,7 +1072,6 @@
         checkGLcall("glEnableClientState(GL_COLOR_ARRAY)");
 
     } else {
-
         glDisableClientState(GL_COLOR_ARRAY);
         checkGLcall("glDisableClientState(GL_COLOR_ARRAY)");
         glColor4f(1.0f, 1.0f, 1.0f, 1.0f);
@@ -921,12 +1080,11 @@
 
     /* Specular Colour ------------------------------------------*/
     if (sd->u.s.specular.lpData != NULL) {
-
+        TRACE("setting specular colour\n");
         /* Note dwType == float3 or float4 == 2 or 3 */
         VTRACE(("glSecondaryColorPointer(4, GL_UNSIGNED_BYTE, %ld, %p)\n",
                 sd->u.s.specular.dwStride,
                 sd->u.s.specular.lpData));
-
         if (GL_SUPPORT(EXT_SECONDARY_COLOR)) {
             GL_EXTCALL(glSecondaryColorPointerEXT)(4, GL_UNSIGNED_BYTE,
                                                    sd->u.s.specular.dwStride,
@@ -935,33 +1093,34 @@
             glEnableClientState(GL_SECONDARY_COLOR_ARRAY_EXT);
             vcheckGLcall("glEnableClientState(GL_SECONDARY_COLOR_ARRAY_EXT)");
         } else {
-	  /* Missing specular color is not critical, no warnings */
-	  VTRACE(("Specular colour is not supported in this GL implementation\n"));
-	}
+
+        /* Missing specular color is not critical, no warnings */
+        VTRACE(("Specular colour is not supported in this GL implementation\n"));
+        }
 
     } else {
+        if (GL_SUPPORT(EXT_SECONDARY_COLOR)) {
 
-      if (GL_SUPPORT(EXT_SECONDARY_COLOR)) {
-	glDisableClientState(GL_SECONDARY_COLOR_ARRAY_EXT);
-	checkGLcall("glDisableClientState(GL_SECONDARY_COLOR_ARRAY_EXT)");
-	GL_EXTCALL(glSecondaryColor3fEXT)(0, 0, 0);
-	checkGLcall("glSecondaryColor3fEXT(0, 0, 0)");
-      } else {
-	/* Missing specular color is not critical, no warnings */
-	VTRACE(("Specular colour is not supported in this GL implementation\n"));
-      }
+            glDisableClientState(GL_SECONDARY_COLOR_ARRAY_EXT);
+            checkGLcall("glDisableClientState(GL_SECONDARY_COLOR_ARRAY_EXT)");
+            GL_EXTCALL(glSecondaryColor3fEXT)(0, 0, 0);
+            checkGLcall("glSecondaryColor3fEXT(0, 0, 0)");
+        } else {
+
+            /* Missing specular color is not critical, no warnings */
+            VTRACE(("Specular colour is not supported in this GL implementation\n"));
+        }
     }
 
     /* Texture coords -------------------------------------------*/
+
     for (textureNo = 0; textureNo < GL_LIMITS(textures); ++textureNo) {
 
         /* Select the correct texture stage */
         GLCLIENTACTIVETEXTURE(textureNo);
-
-        /* Query tex coords */
         if (This->stateBlock->textures[textureNo] != NULL) {
             int coordIdx = This->stateBlock->textureState[textureNo][D3DTSS_TEXCOORDINDEX];
-
+            TRACE("Setting up texture %u, cordindx %u, data %p\n", textureNo, coordIdx, sd->u.s.texCoords[coordIdx].lpData);
             if (!GL_SUPPORT(ARB_MULTITEXTURE) && textureNo > 0) {
                 FIXME("Program using multiple concurrent textures which this opengl implementation doesn't support\n");
                 glDisableClientState(GL_TEXTURE_COORD_ARRAY);
@@ -969,7 +1128,7 @@
                 continue;
             }
 
-            if (coordIdx > 7) {
+            if (coordIdx >= MAX_TEXTURES) {
                 VTRACE(("tex: %d - Skip tex coords, as being system generated\n", textureNo));
                 glDisableClientState(GL_TEXTURE_COORD_ARRAY);
                 GLMULTITEXCOORD4F(textureNo, 0, 0, 0, 1);
@@ -982,69 +1141,61 @@
             } else {
 
                 /* The coords to supply depend completely on the fvf / vertex shader */
-                GLint size;
-                GLenum type;
-
-                switch (sd->u.s.texCoords[coordIdx].dwType) {
-                case D3DDECLTYPE_FLOAT1: size = 1, type = GL_FLOAT; break;
-                case D3DDECLTYPE_FLOAT2: size = 2, type = GL_FLOAT; break;
-                case D3DDECLTYPE_FLOAT3: size = 3, type = GL_FLOAT; break;
-                case D3DDECLTYPE_FLOAT4: size = 4, type = GL_FLOAT; break;
-                case D3DDECLTYPE_SHORT2: size = 2, type = GL_SHORT; break;
-                case D3DDECLTYPE_SHORT4: size = 4, type = GL_SHORT; break;
-                case D3DDECLTYPE_UBYTE4: size = 4, type = GL_UNSIGNED_BYTE; break;
-                default: FIXME("Unrecognized data type %ld\n", sd->u.s.texCoords[coordIdx].dwType);
-                      size = 4; type = GL_UNSIGNED_BYTE;
-                }
-
-                glTexCoordPointer(size, type, sd->u.s.texCoords[coordIdx].dwStride, sd->u.s.texCoords[coordIdx].lpData);
+                glTexCoordPointer(WINED3D_ATR_SIZE(texCoords[coordIdx]), WINED3D_ATR_GLTYPE(texCoords[coordIdx]), sd->u.s.texCoords[coordIdx].dwStride, sd->u.s.texCoords[coordIdx].lpData);
                 glEnableClientState(GL_TEXTURE_COORD_ARRAY);
             }
+
         } else {
             glDisableClientState(GL_TEXTURE_COORD_ARRAY);
             GLMULTITEXCOORD4F(textureNo, 0, 0, 0, 1);
         }
     }
+}
 
-    /* Ok, Work out which primitive is requested and how many vertexes that
-       will be                                                              */
-    NumVertexes = primitiveToGl(PrimitiveType, NumPrimitives, &glPrimType);
+static void drawStridedFast(IWineD3DDevice *iface,UINT numberOfVertices, GLenum glPrimitiveType,
+                     const void *idxData, short idxSize, ULONG minIndex, ULONG startIdx) {
+    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
 
-    /* Finally do the drawing */
-    if (idxData != NULL) {
+    if (idxData != NULL /* This crashes sometimes!*/) {
+        TRACE("(%p) : glElements(%x, %d, %ld, ...)\n", This, glPrimitiveType, numberOfVertices, minIndex);
+        idxData = idxData == (void *)-1 ? NULL : idxData;
+#if 1
+#if 0
+        glIndexPointer(idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT, idxSize, startIdx);
+        glEnableClientState(GL_INDEX_ARRAY);
+#endif
+        glDrawElements(glPrimitiveType, numberOfVertices, idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT,
+                     (const char *)idxData+(idxSize * startIdx));
+#else /* using drawRangeElements may be faster */
 
-        TRACE("glElements(%x, %d, %ld, ...)\n", glPrimType, NumVertexes, minIndex);
-#if 1  /* FIXME: Want to use DrawRangeElements, but wrong calculation! */
-        glDrawElements(glPrimType, NumVertexes, idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT,
-                      (const char *)idxData + (idxSize * startIdx));
-#else
-        glDrawRangeElements(glPrimType, minIndex, minIndex + NumVertexes - 1, NumVertexes,
-                      idxSize == 2 ? GL_UNSIGNED_SHORT:GL_UNSIGNED_INT,
-                      (const char *)idxData + (idxSize * startIdx));
+        glDrawRangeElements(glPrimitiveType, minIndex, minIndex + numberOfVertices - 1, numberOfVertices,
+                      idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT,
+                      (const char *)idxData+(idxSize * startIdx));
 #endif
         checkGLcall("glDrawRangeElements");
 
     } else {
 
         /* Note first is now zero as we shuffled along earlier */
-        TRACE("glDrawArrays(%x, 0, %d)\n", glPrimType, NumVertexes);
-        glDrawArrays(glPrimType, 0, NumVertexes);
+        TRACE("(%p) : glDrawArrays(%x, 0, %d)\n", This, glPrimitiveType, numberOfVertices);
+        glDrawArrays(glPrimitiveType, 0, numberOfVertices);
         checkGLcall("glDrawArrays");
 
     }
+
+    return;
 }
 
 /*
  * Actually draw using the supplied information.
  * Slower GL version which extracts info about each vertex in turn
  */
+	
 static void drawStridedSlow(IWineD3DDevice *iface, Direct3DVertexStridedData *sd,
-                     int PrimitiveType, ULONG NumPrimitives,
+                     UINT NumVertexes, GLenum glPrimType,
                      const void *idxData, short idxSize, ULONG minIndex, ULONG startIdx) {
 
     unsigned int               textureNo    = 0;
-    GLenum                     glPrimType   = GL_POINTS;
-    int                        NumVertexes  = NumPrimitives;
     const short               *pIdxBufS     = NULL;
     const long                *pIdxBufL     = NULL;
     LONG                       SkipnStrides = 0;
@@ -1065,9 +1216,6 @@
         else pIdxBufL = (const long *) idxData;
     }
 
-    /* Ok, Work out which primitive is requested and how many vertexes that will be */
-    NumVertexes = primitiveToGl(PrimitiveType, NumPrimitives, &glPrimType);
-
     /* Start drawing in GL */
     VTRACE(("glBegin(%x)\n", glPrimType));
     glBegin(glPrimType);
@@ -1084,10 +1232,10 @@
             /* Indexed so work out the number of strides to skip */
             if (idxSize == 2) {
                 VTRACE(("Idx for vertex %ld = %d\n", vx_index, pIdxBufS[startIdx+vx_index]));
-                SkipnStrides = pIdxBufS[startIdx+vx_index];
+                SkipnStrides = pIdxBufS[startIdx + vx_index];
             } else {
                 VTRACE(("Idx for vertex %ld = %ld\n", vx_index, pIdxBufL[startIdx+vx_index]));
-                SkipnStrides = pIdxBufL[startIdx+vx_index];
+                SkipnStrides = pIdxBufL[startIdx + vx_index];
             }
         }
 
@@ -1110,7 +1258,7 @@
 
         /* Blending data -------------------------- */
         if (sd->u.s.blendWeights.lpData != NULL) {
-            /*float *ptrToCoords = (float *)(sd->u.s.blendWeights.lpData + (SkipnStrides * sd->u.s.blendWeights.dwStride));*/
+            /* float *ptrToCoords = (float *)(sd->u.s.blendWeights.lpData + (SkipnStrides * sd->u.s.blendWeights.dwStride)); */
             FIXME("Blending not supported yet\n");
 
             if (sd->u.s.blendMatrixIndices.lpData != NULL) {
@@ -1351,7 +1499,7 @@
     int   numcoords[8];                    /* Number of coords           */
     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
 
-    IDirect3DVertexShaderImpl* vertex_shader = NULL;
+    IDirect3DVertexShaderImpl* vertexShader = NULL;
 
     TRACE("Using slow software vertex shader code\n");
 
@@ -1365,7 +1513,7 @@
     NumVertexes = primitiveToGl(PrimitiveType, NumPrimitives, &glPrimType);
 
     /* Retrieve the VS information */
-    vertex_shader = VERTEX_SHADER(This->stateBlock->VertexShader);
+    vertexShader = (IWineD3DVertexShaderImp *)This->stateBlock->VertexShader;
 
     /* Start drawing in GL */
     VTRACE(("glBegin(%x)\n", glPrimType));
@@ -1388,52 +1536,52 @@
         }
 
         /* Fill the vertex shader input */
-        IDirect3DDeviceImpl_FillVertexShaderInputSW(This, vertex_shader, SkipnStrides);
+        IDirect3DDeviceImpl_FillVertexShaderInputSW(This, vertexShader, SkipnStrides);
 
         /* Initialize the output fields to the same defaults as it would normally have */
-        memset(&vertex_shader->output, 0, sizeof(VSHADEROUTPUTDATA8));
-        vertex_shader->output.oD[0].x = 1.0;
-        vertex_shader->output.oD[0].y = 1.0;
-        vertex_shader->output.oD[0].z = 1.0;
-        vertex_shader->output.oD[0].w = 1.0;
+        memset(&vertexShader->output, 0, sizeof(VSHADEROUTPUTDATA8));
+        vertexShader->output.oD[0].x = 1.0;
+        vertexShader->output.oD[0].y = 1.0;
+        vertexShader->output.oD[0].z = 1.0;
+        vertexShader->output.oD[0].w = 1.0;
 
         /* Now execute the vertex shader */
-        IDirect3DVertexShaderImpl_ExecuteSW(vertex_shader, &vertex_shader->input, &vertex_shader->output);
+        IDirect3DVertexShaderImpl_ExecuteSW(vertexShader, &vertexShader->input, &vertexShader->output);
 
         /*
-        TRACE_VECTOR(vertex_shader->output.oPos);
-        TRACE_VECTOR(vertex_shader->output.oD[0]);
-        TRACE_VECTOR(vertex_shader->output.oD[1]);
-        TRACE_VECTOR(vertex_shader->output.oT[0]);
-        TRACE_VECTOR(vertex_shader->output.oT[1]);
-        TRACE_VECTOR(vertex_shader->input.V[0]);
-        TRACE_VECTOR(vertex_shader->data->C[0]);
-        TRACE_VECTOR(vertex_shader->data->C[1]);
-        TRACE_VECTOR(vertex_shader->data->C[2]);
-        TRACE_VECTOR(vertex_shader->data->C[3]);
-        TRACE_VECTOR(vertex_shader->data->C[4]);
-        TRACE_VECTOR(vertex_shader->data->C[5]);
-        TRACE_VECTOR(vertex_shader->data->C[6]);
-        TRACE_VECTOR(vertex_shader->data->C[7]);
+        TRACE_VECTOR(vertexShader->output.oPos);
+        TRACE_VECTOR(vertexShader->output.oD[0]);
+        TRACE_VECTOR(vertexShader->output.oD[1]);
+        TRACE_VECTOR(vertexShader->output.oT[0]);
+        TRACE_VECTOR(vertexShader->output.oT[1]);
+        TRACE_VECTOR(vertexShader->input.V[0]);
+        TRACE_VECTOR(vertexShader->data->C[0]);
+        TRACE_VECTOR(vertexShader->data->C[1]);
+        TRACE_VECTOR(vertexShader->data->C[2]);
+        TRACE_VECTOR(vertexShader->data->C[3]);
+        TRACE_VECTOR(vertexShader->data->C[4]);
+        TRACE_VECTOR(vertexShader->data->C[5]);
+        TRACE_VECTOR(vertexShader->data->C[6]);
+        TRACE_VECTOR(vertexShader->data->C[7]);
         */
 
         /* Extract out the output */
-        /*FIXME: Fog coords? */
-        x = vertex_shader->output.oPos.x;
-        y = vertex_shader->output.oPos.y;
-        z = vertex_shader->output.oPos.z;
-        rhw = vertex_shader->output.oPos.w;
-        ptSize = vertex_shader->output.oPts.x; /* Fixme - Is this right? */
+        /* FIXME: Fog coords? */
+        x = vertexShader->output.oPos.x;
+        y = vertexShader->output.oPos.y;
+        z = vertexShader->output.oPos.z;
+        rhw = vertexShader->output.oPos.w;
+        ptSize = vertexShader->output.oPts.x; /* Fixme - Is this right? */
 
-        /** Update textures coords using vertex_shader->output.oT[0->7] */
+        /** Update textures coords using vertexShader->output.oT[0->7] */
         memset(texcoords, 0x00, sizeof(texcoords));
         memset(numcoords, 0x00, sizeof(numcoords));
         for (textureNo = 0; textureNo < GL_LIMITS(textures); ++textureNo) {
             if (This->stateBlock->textures[textureNo] != NULL) {
-               texcoords[textureNo].x = vertex_shader->output.oT[textureNo].x;
-               texcoords[textureNo].y = vertex_shader->output.oT[textureNo].y;
-               texcoords[textureNo].z = vertex_shader->output.oT[textureNo].z;
-               texcoords[textureNo].w = vertex_shader->output.oT[textureNo].w;
+               texcoords[textureNo].x = vertexShader->output.oT[textureNo].x;
+               texcoords[textureNo].y = vertexShader->output.oT[textureNo].y;
+               texcoords[textureNo].z = vertexShader->output.oT[textureNo].z;
+               texcoords[textureNo].w = vertexShader->output.oT[textureNo].w;
                if (This->stateBlock->texture_state[textureNo][D3DTSS_TEXTURETRANSFORMFLAGS] != D3DTTFF_DISABLE) {
                    numcoords[textureNo]    = This->stateBlock->texture_state[textureNo][D3DTSS_TEXTURETRANSFORMFLAGS] & ~D3DTTFF_PROJECTED;
                } else {
@@ -1452,8 +1600,8 @@
         draw_vertex(iface,
                     TRUE, x, y, z, rhw,
                     TRUE, 0.0f, 0.0f, 1.0f,
-                    TRUE, (float*) &vertex_shader->output.oD[0],
-                    TRUE, (float*) &vertex_shader->output.oD[1],
+                    TRUE, (float*) &vertexShader->output.oD[0],
+                    TRUE, (float*) &vertexShader->output.oD[1],
                     FALSE, ptSize,         /* FIXME: Change back when supported */
                     texcoords, numcoords);
 
@@ -1468,86 +1616,183 @@
     checkGLcall("glEnd and previous calls");
 }
 
-static void drawStridedHardwareVS(IWineD3DDevice *iface, Direct3DVertexStridedData *sd,
-                     int PrimitiveType, ULONG NumPrimitives,
-                     const void *idxData, short idxSize, ULONG minIndex, ULONG startIdx) {
-
-    IDirect3DVertexShaderImpl *vertex_shader = NULL;
-    int                        i;
-    int                        NumVertexes;
-    int                        glPrimType;
-    int                        maxAttribs;
-
-    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
-    TRACE("Drawing with hardware vertex shaders\n");
-
-    /* Retrieve the VS information */
-    vertex_shader = VERTEX_SHADER(This->stateBlock->VertexShader);
-
-    /* Enable the Vertex Shader */
-    GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertex_shader->prgId));
-    checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertex_shader->prgId);");
-    glEnable(GL_VERTEX_PROGRAM_ARB);
-    checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
-
-    /* Update the constants */
-    for (i = 0; i < D3D8_VSHADER_MAX_CONSTANTS; ++i) {
-        GL_EXTCALL(glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, i, (GLfloat *)&This->stateBlock->vertexShaderConstant[i]));
-        checkGLcall("glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB");
-    }
-
-    /* Set up the vertex.attr[n] inputs */
-    IDirect3DDeviceImpl_FillVertexShaderInputArbHW(This, vertex_shader, 0);
-
-    /* Ok, Work out which primitive is requested and how many vertexes that
-       will be                                                              */
-    NumVertexes = primitiveToGl(PrimitiveType, NumPrimitives, &glPrimType);
-
-    /* Finally do the drawing */
-    if (idxData != NULL) {
-
-        TRACE("glElements(%x, %d, %ld, ...)\n", glPrimType, NumVertexes, minIndex);
-#if 1  /* FIXME: Want to use DrawRangeElements, but wrong calculation! */
-        glDrawElements(glPrimType, NumVertexes, idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT,
-                      (const char *)idxData + (idxSize * startIdx));
-#else
-        glDrawRangeElements(glPrimType, minIndex, minIndex+NumVertexes - 1, NumVertexes,
-                      idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT,
-                      (const char *)idxData + (idxSize * startIdx));
 #endif
-        checkGLcall("glDrawRangeElements");
+
+void inline  drawPrimitiveDrawStrided(IWineD3DDevice *iface, BOOL useVertexShaderFunction, int useHW, Direct3DVertexStridedData *dataLocations,
+UINT numberOfvertices, UINT numberOfIndicies, GLenum glPrimType, const void *idxData, short idxSize, int minIndex, long StartIdx) {
+    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
+
+    /* Now draw the graphics to the screen */
+    if  (FALSE /* disable software vs for now */ && useVertexShaderFunction && !useHW) {
+        FIXME("drawing using software vertex shaders (line %d)\n", __LINE__);
+        /* Ideally, we should have software FV and hardware VS, possibly
+           depending on the device type? */
+#if 0 /* TODO: vertex and pixel shaders */
+        drawStridedSoftwareVS(iface, dataLocations, PrimitiveType, NumPrimitives,
+                    idxData, idxSize, minIndex, StartIdx);
+#endif
 
     } else {
 
-        /* Note first is now zero as we shuffled along earlier */
-        TRACE("glDrawArrays(%x, 0, %d)\n", glPrimType, NumVertexes);
-        glDrawArrays(glPrimType, 0, NumVertexes);
-        checkGLcall("glDrawArrays");
+        /* TODO: Work out if fixup are required at all (this can be a flag against the vertex declaration) */
+        int startStride = idxData == NULL ? 0 : idxData == (void *) -1 ? 0 :(idxSize == 2 ? *(((const short *) idxData) + StartIdx) : *((const int *) idxData) + StartIdx);
+        int endStride = startStride;
+        TRACE("begin Start stride %d, end stride %d, number of indices%d, number of vertices%d\n", startStride, endStride, numberOfIndicies, numberOfvertices);
 
-    }
-
-    {
-    GLint errPos;
-    glGetIntegerv( GL_PROGRAM_ERROR_POSITION_ARB, &errPos );
-    if (errPos != -1)
-        FIXME("HW VertexShader Error at position: %d\n%s\n", errPos, glGetString( GL_PROGRAM_ERROR_STRING_ARB) );
-    }
-
-
-    /* Leave all the attribs disabled */
-    glGetIntegerv( GL_MAX_VERTEX_ATTRIBS_ARB, &maxAttribs);
-    /* MESA does not support it right not */
-    if (glGetError() != GL_NO_ERROR)
-	maxAttribs = 16;
-    for (i = 0; i < maxAttribs; ++i) {
-        GL_EXTCALL(glDisableVertexAttribArrayARB(i));
-        checkGLcall("glDisableVertexAttribArrayARB(reg);");
-    }
-
-    /* Done */
-    glDisable(GL_VERTEX_PROGRAM_ARB);
-}
+#if 0 /* TODO: Vertex fixups (diffuse and specular) */
+        if (idxData != NULL) { /* index data isn't linear, so lookup the real start and end strides */
+            int t;
+            if (idxSize == 2) {
+                unsigned short *index = (unsigned short *)idxData;
+                index += StartIdx;
+                for (t = 0 ; t < numberOfIndicies; t++) {
+                    if (startStride >  *index)
+                        startStride = *index;
+                    if (endStride < *index)
+                        endStride = *index;
+                    index++;
+                }
+            } else {  /* idxSize == 4 */
+                unsigned int *index = (unsigned int *)idxData;
+                index += StartIdx;
+                for (t = 0 ; t < numberOfIndicies; t++) {
+                    if (startStride > *index)
+                        startStride = *index;
+                    if (endStride < *index)
+                        endStride = *index;
+                    index++;
+                }
+            }
+        } else {
+            endStride += numberOfvertices -1;
+        }
 #endif
+        TRACE("end Start stride %d, end stride %d, number of indices%d, number of vertices%d\n", startStride, endStride, numberOfIndicies, numberOfvertices);
+        /* pre-transform verticex */
+        /* TODO: Caching, VBO's etc.. */
+
+/* Generate some fixme's if unsupported functionality is being used */
+#define BUFFER_OR_DATA(_attribute) dataLocations->u.s._attribute.lpData
+    /* TODO: Either support missing functionality in fixupVertices or by creating a shader to replace the pipeline. */
+    if (!useVertexShaderFunction && (BUFFER_OR_DATA(blendMatrixIndices) || BUFFER_OR_DATA(blendWeights))) {
+        FIXME("Blending data is only valid with vertex shaders %p %p\n",dataLocations->u.s.blendWeights.lpData,dataLocations->u.s.blendWeights.lpData);
+    }
+    if (!useVertexShaderFunction && (BUFFER_OR_DATA(position2) || BUFFER_OR_DATA(normal2))) {
+        FIXME("Tweening is only valid with vertex shaders\n");
+    }
+    if (!useVertexShaderFunction && (BUFFER_OR_DATA(tangent) || BUFFER_OR_DATA(binormal))) {
+        FIXME("Tangent and binormal bump mapping is only valid with vertex shaders\n");
+    }
+    if (!useVertexShaderFunction && (BUFFER_OR_DATA(tessFactor) || BUFFER_OR_DATA(fog) || BUFFER_OR_DATA(depth) || BUFFER_OR_DATA(sample))) {
+        FIXME("Extended attributes are only valid with vertex shaders\n");
+    }
+#undef BUFFER_OR_DATA
+
+#if 0/* TODO: Vertex fixups (diffuse and specular) */
+        fixupVertices(This, dataLocations, &transformedDataLocations, 1 + endStride - startStride, startStride);
+#endif
+
+         /* vertex shaders */
+
+        /* If the only vertex data used by the shader is supported by OpenGL then*/
+        if ((!useVertexShaderFunction  && dataLocations->u.s.pSize.lpData == NULL
+                && dataLocations->u.s.diffuse.lpData == NULL  && dataLocations->u.s.specular.lpData == NULL) 
+                || (useVertexShaderFunction && ((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->namedArrays && !((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->declaredArrays)) {
+
+            /* Load the vertex data using named arrays */
+            TRACE("(%p) Loading vertex data\n", This);
+            loadVertexData(iface, dataLocations);
+
+        } else /* Otherwise */
+            if(useVertexShaderFunction && ((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->declaredArrays) {
+
+            /* load the array data using ordinal mapping */
+            loadNumberedArrays(iface, dataLocations, ((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->arrayUsageMap);
+
+        } else { /* If this happens we must drawStridedSlow later on */ 
+		TRACE("Not loading vertex data\n");
+        }
+
+        TRACE("Loaded arrays\n");
+
+        if (useVertexShaderFunction) {
+            int i;
+            GLint errPos;
+
+            FIXME("Using vertex shader\n");
+
+            /* Bind the vertex program */
+            GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->prgId));
+            checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertexShader->prgId);");
+
+            /* and enable gl vertex shaders */
+            glEnable(GL_VERTEX_PROGRAM_ARB);
+            checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
+            TRACE_(d3d_shader)("(%p) bound program %u and enabled vertex program ARB\n", This, ((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->prgId);
+
+            /* Update the constants */
+            for (i = 0; i < WINED3D_VSHADER_MAX_CONSTANTS; i++) {
+                /* TODO: add support for Integer and Boolean constants */
+                GL_EXTCALL(glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, i, &This->stateBlock->vertexShaderConstantF[i * 4]));
+                TRACE_(d3d_shader)("Loading constants %u = %f %f %f %f\n",i, This->stateBlock->vertexShaderConstantF[i *4 ], This->stateBlock->vertexShaderConstantF[i * 4 + 1], This->stateBlock->vertexShaderConstantF[i *4 + 2], This->stateBlock->vertexShaderConstantF[i * 4 + 3]);
+                checkGLcall("glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB");
+            }
+            /* TODO: Vertex Shader 8 constants*/
+
+            /* always draw strided fast if a vertex shader is being used */
+            drawStridedFast(iface, numberOfIndicies, glPrimType,
+                        idxData, idxSize, minIndex, StartIdx);
+
+            /* check for any errors */
+            glGetIntegerv( GL_PROGRAM_ERROR_POSITION_ARB, &errPos );
+            if (errPos != -1) {
+                FIXME("HW VertexShader Error at position: %d\n%s\n", errPos, glGetString( GL_PROGRAM_ERROR_STRING_ARB) );
+            }
+
+            /* disable any attribs */
+            if(((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->declaredArrays) {
+                GLint maxAttribs;
+                int i;
+                /* Leave all the attribs disabled */
+                glGetIntegerv(GL_MAX_VERTEX_ATTRIBS_ARB, &maxAttribs);
+                /* MESA does not support it right not */
+                if (glGetError() != GL_NO_ERROR)
+                maxAttribs = 16;
+                for (i = 0; i < maxAttribs; i++) {
+                    GL_EXTCALL(glDisableVertexAttribArrayARB(i));
+                    checkGLcall("glDisableVertexAttribArrayARB(reg);");
+                }
+            }
+
+            /* Done */
+            glDisable(GL_VERTEX_PROGRAM_ARB);
+        } else {
+
+            /* DirectX colours are in a different format to opengl colours
+            so if diffuse or specular are used then we need to use drawStridedSlow 
+            to correct the colours */
+            if ((dataLocations->u.s.pSize.lpData           != NULL)
+               || (dataLocations->u.s.diffuse.lpData      != NULL)
+               || (dataLocations->u.s.specular.lpData      != NULL)) {
+                /* TODO: replace drawStridedSlow with veretx fixups */
+#if 1
+
+		drawStridedSlow(iface, dataLocations, numberOfIndicies, glPrimType,
+				idxData, idxSize, minIndex,  StartIdx) ;
+			    
+/*
+ *                drawStridedSlow(iface, dataLocations, numberOfIndicies, glPrimType,
+ *                            idxData, idxSize, minIndex, StartIdx);
+ */
+#endif
+            } else {
+                /* OpenGL can manage everything in hardware so we can use drawStridedFast */
+                drawStridedFast(iface, numberOfIndicies, glPrimType,
+                    idxData, idxSize, minIndex, StartIdx);
+            }
+        }
+    }
+}
 
 void inline drawPrimitiveTraceDataLocations(Direct3DVertexStridedData *dataLocations,DWORD fvf) {
 
@@ -1568,6 +1813,15 @@
     TRACE_STRIDED((dataLocations), texCoords[5]);
     TRACE_STRIDED((dataLocations), texCoords[6]);
     TRACE_STRIDED((dataLocations), texCoords[7]);
+    TRACE_STRIDED((dataLocations), position2);
+    TRACE_STRIDED((dataLocations), normal2);
+    TRACE_STRIDED((dataLocations), tangent);
+    TRACE_STRIDED((dataLocations), binormal);
+    TRACE_STRIDED((dataLocations), tessFactor);
+    TRACE_STRIDED((dataLocations), fog);
+    TRACE_STRIDED((dataLocations), depth);
+    TRACE_STRIDED((dataLocations), sample);
+
     return;
 
 }
@@ -1673,10 +1927,11 @@
 
 /* Routine common to the draw primitive and draw indexed primitive routines */
 void drawPrimitive(IWineD3DDevice *iface,
-                    int PrimitiveType, long NumPrimitives,
-
+                    int PrimitiveType,
+                    long NumPrimitives,
                     /* for Indexed: */
                     long  StartVertexIndex,
+                    UINT  numberOfVertices,
                     long  StartIdx,
                     short idxSize,
                     const void *idxData,
@@ -1684,16 +1939,14 @@
 
     BOOL                          rc = FALSE;
     DWORD                         fvf = 0;
-#if 0 /* TODO: vertex and pixel shaders */
-    IDirect3DVertexShaderImpl    *vertex_shader = NULL;
-    IDirect3DPixelShaderImpl     *pixel_shader = NULL;
-#endif
     IWineD3DDeviceImpl           *This = (IWineD3DDeviceImpl *)iface;
     BOOL                          useVertexShaderFunction = FALSE;
     BOOL                          isLightingOn = FALSE;
     Direct3DVertexStridedData     dataLocations;
     int                           useHW = FALSE;
 
+    useVertexShaderFunction = This->stateBlock->vertexShader != NULL ? wined3d_settings.vs_mode != VS_NONE ? ((IWineD3DVertexShaderImpl *)This->stateBlock->vertexShader)->function != NULL ? TRUE: FALSE : FALSE : FALSE;
+
     if (This->stateBlock->vertexDecl == NULL) {
         /* Work out what the FVF should look like */
         rc = initializeFVF(iface, &fvf);
@@ -1702,25 +1955,6 @@
         TRACE("(%p) : using vertex declaration %p \n", iface, This->stateBlock->vertexDecl);
     }
 
-    /* If we will be using a vertex shader, do some initialization for it */
-    if (useVertexShaderFunction) {
-#if 0 /* TODO: vertex and pixel shaders */
-        vertex_shader = VERTEX_SHADER(This->stateBlock->VertexShader);
-        memset(&vertex_shader->input, 0, sizeof(VSHADERINPUTDATA8));
-
-        useHW = (((vs_mode == VS_HW) && GL_SUPPORT(ARB_VERTEX_PROGRAM)) &&
-                 This->devType != D3DDEVTYPE_REF &&
-                 !This->stateBlock->renderState[D3DRS_SOFTWAREVERTEXPROCESSING] &&
-                 vertex_shader->usage != D3DUSAGE_SOFTWAREPROCESSING);
-
-        /** init Constants */
-        if (This->stateBlock->Changed.vertexShaderConstant) {
-            TRACE_(d3d_shader)("vertex shader initializing constants\n");
-            IDirect3DVertexShaderImpl_SetConstantF(vertex_shader, 0, (CONST FLOAT*) &This->stateBlock->vertexShaderConstant[0], 96);
-        }
-#endif /* TODO: vertex and pixel shaders */
-    }
-
     /* Ok, we will be updating the screen from here onwards so grab the lock */
     ENTER_GL();
 
@@ -1749,24 +1983,20 @@
 #endif /* TODO: vertex and pixel shaders */
 
     /* Initialize all values to null */
-    if (useVertexShaderFunction == FALSE) {
-        memset(&dataLocations, 0x00, sizeof(dataLocations));
+    memset(&dataLocations, 0x00, sizeof(dataLocations));
+    /* convert the FVF or vertexDeclaration into a strided stream (this should be done when the fvf or declaration is created) */
 
-        /* Convert to strided data */
-         if (This->stateBlock->vertexDecl != NULL) {
-            TRACE("================ Vertex Declaration  ===================\n");
-            primitiveDeclarationConvertToStridedData(iface, &dataLocations, StartVertexIndex, &fvf);
-         } else {
-            TRACE("================ FVF ===================\n");
-            primitiveConvertToStridedData(iface, &dataLocations, StartVertexIndex);
-         }
-
-        /* write out some debug information*/
-        drawPrimitiveTraceDataLocations(&dataLocations, fvf);
+    if (This->stateBlock->vertexDecl != NULL) {
+        TRACE("================ Vertex Declaration  ===================\n");
+        primitiveDeclarationConvertToStridedData(iface, &dataLocations, StartVertexIndex, &fvf);
     } else {
-        FIXME("line %d, drawing using vertex shaders\n", __LINE__);
+        TRACE("================ FVF ===================\n");
+        primitiveConvertToStridedData(iface, &dataLocations, StartVertexIndex);
     }
 
+    /* write out some debug information*/
+    drawPrimitiveTraceDataLocations(&dataLocations, fvf);
+
     /* Setup transform matrices and sort out */
     if (useHW) {
         /* Lighting is not completely bypassed with ATI drivers although it should be. Mesa is ok from this respect...
@@ -1787,45 +2017,20 @@
 
     drawPrimitiveUploadTextures(This);
 
-    /* Now draw the graphics to the screen */
-    if  (useVertexShaderFunction) {
 
-        /* Ideally, we should have software FV and hardware VS, possibly
-           depending on the device type?                                 */
-
-        if (useHW) {
-            TRACE("Swap HW vertex shader\n");
-#if 0 /* TODO: vertex and pixel shaders */
-            drawStridedHardwareVS(iface, &dataLocations, PrimitiveType, NumPrimitives,
-                        idxData, idxSize, minIndex, StartIdx);
-#endif
-	} else {
-            /* We will have to use the very, very slow emulation layer */
-            TRACE("Swap SW vertex shader\n");
-#if 0 /* TODO: vertex and pixel shaders */
-	    drawStridedSoftwareVS(iface, &dataLocations, PrimitiveType, NumPrimitives,
-                        idxData, idxSize, minIndex, StartIdx);
-#endif
+    {
+        GLenum glPrimType;
+        /* Ok, Work out which primitive is requested and how many vertexes that
+           will be                                                              */
+        UINT calculatedNumberOfindices = primitiveToGl(PrimitiveType, NumPrimitives, &glPrimType);
+#if 0 /* debugging code... just information not an error */
+        if(numberOfVertices != 0 && numberOfVertices != calculatedNumberOfindices){
+            FIXME("Number of vertices %u and Caculated number of indicies %u differ\n", numberOfVertices, calculatedNumberOfindices);
         }
-
-    } else if ((dataLocations.u.s.pSize.lpData           != NULL)
-               || (dataLocations.u.s.diffuse.lpData      != NULL)
-	       /*|| (dataLocations.u.s.blendWeights.lpData != NULL)*/) {
-
-        /* Fixme, Ideally, only use the per-vertex code for software HAL
-           but until opengl supports all the functions returned to setup
-           vertex arrays, we need to drop down to the slow mechanism for
-           certain functions                                              */
-
-        /* We will have to use the slow version of GL per vertex setup */
-        drawStridedSlow(iface, &dataLocations, PrimitiveType, NumPrimitives,
-                        idxData, idxSize, minIndex, StartIdx);
-
-    } else {
-
-        /* We can use the fast version of GL pointers */
-        drawStridedFast(iface, &dataLocations, PrimitiveType, NumPrimitives,
-                        idxData, idxSize, minIndex, StartIdx);
+#endif
+        if (numberOfVertices == 0 )
+            numberOfVertices = calculatedNumberOfindices;
+        drawPrimitiveDrawStrided(iface, useVertexShaderFunction, useHW, &dataLocations, numberOfVertices, calculatedNumberOfindices, glPrimType, idxData, idxSize, minIndex, StartIdx);
     }
 
     /* If vertex shaders or no normals, restore previous lighting state */
diff --git a/dlls/wined3d/vertexshader.c b/dlls/wined3d/vertexshader.c
index 13556e2..0006846 100644
--- a/dlls/wined3d/vertexshader.c
+++ b/dlls/wined3d/vertexshader.c
@@ -1989,8 +1989,8 @@
 
 HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
-
-    *parent= (IUnknown*) parent;
+    
+    *parent = (IUnknown*)This->parent;
     IUnknown_AddRef(*parent);
     TRACE("(%p) : returning %p\n", This, *parent);
     return D3D_OK;
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index cb7395e..ee3d563 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -80,7 +80,6 @@
 
 extern DWORD minMipLookup[D3DTEXF_ANISOTROPIC + 1][D3DTEXF_LINEAR + 1];
 
-#if 0
 /* NOTE: Make sure these are in the correct numerical order. (see /include/d3d9types.h typedef enum _D3DDECLTYPE) */
 UINT static const glTypeLookup[D3DDECLTYPE_UNUSED][5] = {
                                   {D3DDECLTYPE_FLOAT1,    1, GL_FLOAT           , GL_FALSE ,sizeof(float)},
@@ -104,9 +103,8 @@
 #define WINED3D_ATR_TYPE(_attribute)          glTypeLookup[sd->u.s._attribute.dwType][0]
 #define WINED3D_ATR_SIZE(_attribute)          glTypeLookup[sd->u.s._attribute.dwType][1]
 #define WINED3D_ATR_GLTYPE(_attribute)        glTypeLookup[sd->u.s._attribute.dwType][2]
-#define WINED3D_ATR_GLSOMETHING(_attribute)   glTypeLookup[sd->u.s._attribute.dwType][3]
+#define WINED3D_ATR_NORMALIZED(_attribute)    glTypeLookup[sd->u.s._attribute.dwType][3]
 #define WINED3D_ATR_TYPESIZE(_attribute)      glTypeLookup[sd->u.s._attribute.dwType][4]
-#endif
 
 /**
  * Settings 
@@ -343,9 +341,9 @@
 void drawPrimitive(IWineD3DDevice *iface,
                     int PrimitiveType,
                     long NumPrimitives,
-
                     /* for Indexed: */
                     long  StartVertexIndex,
+                    UINT  numberOfVertices,
                     long  StartIdx,
                     short idxBytes,
                     const void *idxData,