wined3d: Instancing emulation.
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 757b147..3376bf9 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -2067,6 +2067,8 @@
*/
static HRESULT WINAPI IWineD3DDeviceImpl_SetStreamSourceFreq(IWineD3DDevice *iface, UINT StreamNumber, UINT Divider) {
IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
+ UINT oldFlags = This->updateStateBlock->streamFlags[StreamNumber];
+ UINT oldFreq = This->updateStateBlock->streamFreq[StreamNumber];
TRACE("(%p) StreamNumber(%d), Divider(%d)\n", This, StreamNumber, Divider);
This->updateStateBlock->streamFlags[StreamNumber] = Divider & (WINED3DSTREAMSOURCE_INSTANCEDATA | WINED3DSTREAMSOURCE_INDEXEDDATA );
@@ -2075,8 +2077,9 @@
This->updateStateBlock->set.streamFreq[StreamNumber] = TRUE;
This->updateStateBlock->streamFreq[StreamNumber] = Divider & 0x7FFFFF;
- if (This->updateStateBlock->streamFlags[StreamNumber] || This->updateStateBlock->streamFreq[StreamNumber] != 1) {
- FIXME("Stream indexing not fully supported\n");
+ if(This->updateStateBlock->streamFreq[StreamNumber] != oldFreq ||
+ This->updateStateBlock->streamFlags[StreamNumber] != oldFlags) {
+ IWineD3DDeviceImpl_MarkStateDirty(This, STATE_STREAMSRC);
}
return WINED3D_OK;
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
index d7f8d6d..7e2fc05 100644
--- a/dlls/wined3d/directx.c
+++ b/dlls/wined3d/directx.c
@@ -1770,6 +1770,26 @@
TRACE_(d3d_caps)("[FAILED]\n"); /* Enable when implemented */
return WINED3DERR_NOTAVAILABLE;
+ /* ATI instancing hack: Although ATI cards do not support Shader Model 3.0, they support
+ * instancing. To query if the card supports instancing CheckDeviceFormat with the special format
+ * MAKEFOURCC('I','N','S','T') is used. Should a (broken) app check for this provide a proper return value.
+ * We can do instancing with all shader versions, but we need vertex shaders.
+ *
+ * Additionally applications have to set the D3DRS_POINTSIZE render state to MAKEFOURCC('I','N','S','T') once
+ * to enable instancing. WineD3D doesn't need that and just ignores it.
+ *
+ * With Shader Model 3.0 capable cards Instancing 'just works' in Windows.
+ */
+ case MAKEFOURCC('I','N','S','T'):
+ TRACE("ATI Instancing check hack\n");
+ if(GL_SUPPORT(ARB_VERTEX_PROGRAM) || GL_SUPPORT(ARB_VERTEX_SHADER)) {
+ TRACE_(d3d_caps)("[OK]\n");
+ return WINED3D_OK;
+ } else {
+ TRACE_(d3d_caps)("[FAILED]\n");
+ return WINED3DERR_NOTAVAILABLE;
+ }
+
default:
break;
}
diff --git a/dlls/wined3d/drawprim.c b/dlls/wined3d/drawprim.c
index 329b5ee..1d2a66e 100644
--- a/dlls/wined3d/drawprim.c
+++ b/dlls/wined3d/drawprim.c
@@ -6,6 +6,7 @@
* Copyright 2004 Christian Costa
* Copyright 2005 Oliver Stieber
* Copyright 2006 Henri Verbeet
+ * Copyright 2007 Stefan Dösinger for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -1155,6 +1156,142 @@
}
}
+inline void drawStridedInstanced(IWineD3DDevice *iface, WineDirect3DVertexStridedData *sd, UINT numberOfVertices,
+ GLenum glPrimitiveType, const void *idxData, short idxSize, ULONG minIndex,
+ ULONG startIdx, ULONG startVertex) {
+ UINT numInstances = 0;
+ int numInstancedAttribs = 0, i, j;
+ UINT instancedData[sizeof(sd->u.input) / sizeof(sd->u.input[0]) /* 16 */];
+ IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface;
+ IWineD3DStateBlockImpl *stateblock = This->stateBlock;
+
+ if (idxData == NULL) {
+ /* This is a nasty thing. MSDN says no hardware supports that and apps have to use software vertex processing.
+ * We don't support this for now
+ *
+ * Shouldn't be too hard to support with opengl, in theory just call glDrawArrays instead of drawElements.
+ * But the StreamSourceFreq value has a different meaning in that situation.
+ */
+ FIXME("Non-indexed instanced drawing is not supported\n");
+ return;
+ }
+
+ TRACE("(%p) : glElements(%x, %d, %d, ...)\n", This, glPrimitiveType, numberOfVertices, minIndex);
+ idxData = idxData == (void *)-1 ? NULL : idxData;
+
+ /* First, figure out how many instances we have to draw */
+ for(i = 0; i < MAX_STREAMS; i++) {
+ /* Look at all non-instanced streams */
+ if(!(stateblock->streamFlags[i] & D3DSTREAMSOURCE_INSTANCEDATA) &&
+ stateblock->streamSource[i]) {
+ int inst = stateblock->streamFreq[i];
+
+ if(numInstances && inst != numInstances) {
+ ERR("Two streams specify a different number of instances. Got %d, new is %d\n", numInstances, inst);
+ }
+ numInstances = inst;
+ }
+ }
+
+ for(i = 0; i < sizeof(sd->u.input) / sizeof(sd->u.input[0]); i++) {
+ if(stateblock->streamFlags[sd->u.input[i].streamNo] & D3DSTREAMSOURCE_INSTANCEDATA) {
+ instancedData[numInstancedAttribs] = i;
+ numInstancedAttribs++;
+ }
+ }
+
+ /* now draw numInstances instances :-) */
+ for(i = 0; i < numInstances; i++) {
+ /* Specify the instanced attributes using immediate mode calls */
+ for(j = 0; j < numInstancedAttribs; j++) {
+ BYTE *ptr = sd->u.input[instancedData[j]].lpData +
+ sd->u.input[instancedData[j]].dwStride * i +
+ stateblock->streamOffset[sd->u.input[instancedData[j]].streamNo];
+ if(sd->u.input[instancedData[j]].VBO) {
+ IWineD3DVertexBufferImpl *vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[sd->u.input[instancedData[j]].streamNo];
+ ptr += (long) vb->resource.allocatedMemory;
+ }
+
+ switch(sd->u.input[instancedData[j]].dwType) {
+ case WINED3DDECLTYPE_FLOAT1:
+ GL_EXTCALL(glVertexAttrib1fvARB(instancedData[j], (float *) ptr));
+ break;
+ case WINED3DDECLTYPE_FLOAT2:
+ GL_EXTCALL(glVertexAttrib2fvARB(instancedData[j], (float *) ptr));
+ break;
+ case WINED3DDECLTYPE_FLOAT3:
+ GL_EXTCALL(glVertexAttrib3fvARB(instancedData[j], (float *) ptr));
+ break;
+ case WINED3DDECLTYPE_FLOAT4:
+ GL_EXTCALL(glVertexAttrib4fvARB(instancedData[j], (float *) ptr));
+ break;
+
+ case WINED3DDECLTYPE_UBYTE4:
+ GL_EXTCALL(glVertexAttrib4NubvARB(instancedData[j], ptr));
+ break;
+ case WINED3DDECLTYPE_UBYTE4N:
+ case WINED3DDECLTYPE_D3DCOLOR:
+ GL_EXTCALL(glVertexAttrib4NubvARB(instancedData[j], ptr));
+ break;
+
+ case WINED3DDECLTYPE_SHORT2:
+ GL_EXTCALL(glVertexAttrib4svARB(instancedData[j], (GLshort *) ptr));
+ break;
+ case WINED3DDECLTYPE_SHORT4:
+ GL_EXTCALL(glVertexAttrib4svARB(instancedData[j], (GLshort *) ptr));
+ break;
+
+ case WINED3DDECLTYPE_SHORT2N:
+ {
+ GLshort s[4] = {((short *) ptr)[0], ((short *) ptr)[1], 0, 1};
+ GL_EXTCALL(glVertexAttrib4NsvARB(instancedData[j], s));
+ break;
+ }
+ case WINED3DDECLTYPE_USHORT2N:
+ {
+ GLushort s[4] = {((unsigned short *) ptr)[0], ((unsigned short *) ptr)[1], 0, 1};
+ GL_EXTCALL(glVertexAttrib4NusvARB(instancedData[j], s));
+ break;
+ }
+ case WINED3DDECLTYPE_SHORT4N:
+ GL_EXTCALL(glVertexAttrib4NsvARB(instancedData[j], (GLshort *) ptr));
+ break;
+ case WINED3DDECLTYPE_USHORT4N:
+ GL_EXTCALL(glVertexAttrib4NusvARB(instancedData[j], (GLushort *) ptr));
+ break;
+
+ case WINED3DDECLTYPE_UDEC3:
+ FIXME("Unsure about WINED3DDECLTYPE_UDEC3\n");
+ /*glVertexAttrib3usvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+ break;
+ case WINED3DDECLTYPE_DEC3N:
+ FIXME("Unsure about WINED3DDECLTYPE_DEC3N\n");
+ /*glVertexAttrib3NusvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+ break;
+
+ case WINED3DDECLTYPE_FLOAT16_2:
+ /* Are those 16 bit floats. C doesn't have a 16 bit float type. I could read the single bits and calculate a 4
+ * byte float according to the IEEE standard
+ */
+ FIXME("Unsupported WINED3DDECLTYPE_FLOAT16_2\n");
+ break;
+ case WINED3DDECLTYPE_FLOAT16_4:
+ FIXME("Unsupported WINED3DDECLTYPE_FLOAT16_4\n");
+ break;
+
+ case WINED3DDECLTYPE_UNUSED:
+ default:
+ ERR("Unexpected declaration in instanced attributes\n");
+ break;
+ }
+ }
+
+ glDrawElements(glPrimitiveType, numberOfVertices, idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT,
+ (const char *)idxData+(idxSize * startIdx));
+ checkGLcall("glDrawElements");
+ }
+}
+
/* Routine common to the draw primitive and draw indexed primitive routines */
void drawPrimitive(IWineD3DDevice *iface,
int PrimitiveType,
@@ -1202,12 +1339,19 @@
if (numberOfVertices == 0 )
numberOfVertices = calculatedNumberOfindices;
- if (This->useDrawStridedSlow)
+ if (This->useDrawStridedSlow) {
+ /* Immediate mode drawing */
drawStridedSlow(iface, &This->strided_streams, calculatedNumberOfindices,
glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
- else
+ } else if(This->instancedDraw) {
+ /* Instancing emulation with mixing immediate mode and arrays */
+ drawStridedInstanced(iface, &This->strided_streams, calculatedNumberOfindices, glPrimType,
+ idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
+ } else {
+ /* Simple array draw call */
drawStridedFast(iface, calculatedNumberOfindices, glPrimType,
idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
+ }
}
/* Finshed updating the screen, restore lock */
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
index d083997..808d478 100644
--- a/dlls/wined3d/state.c
+++ b/dlls/wined3d/state.c
@@ -2146,11 +2146,21 @@
int i;
UINT *offset = stateblock->streamOffset;
+ /* Default to no instancing */
+ stateblock->wineD3DDevice->instancedDraw = FALSE;
+
for (i = 0; i < MAX_ATTRIBS; i++) {
if (!strided->u.input[i].lpData && !strided->u.input[i].VBO)
continue;
+ /* Do not load instance data. It will be specified using glTexCoord by drawprim */
+ if(stateblock->streamFlags[strided->u.input[i].streamNo] & D3DSTREAMSOURCE_INSTANCEDATA) {
+ GL_EXTCALL(glDisableVertexAttribArrayARB(i));
+ stateblock->wineD3DDevice->instancedDraw = TRUE;
+ continue;
+ }
+
TRACE_(d3d_shader)("Loading array %u [VBO=%u]\n", i, strided->u.input[i].VBO);
if(strided->u.input[i].dwStride) {
@@ -2227,11 +2237,11 @@
case WINED3DDECLTYPE_UDEC3:
FIXME("Unsure about WINED3DDECLTYPE_UDEC3\n");
- /*glVertexAttrib3usvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+ /*glVertexAttrib3usvARB(i, (GLushort *) ptr); Does not exist */
break;
case WINED3DDECLTYPE_DEC3N:
FIXME("Unsure about WINED3DDECLTYPE_DEC3N\n");
- /*glVertexAttrib3NusvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+ /*glVertexAttrib3NusvARB(i, (GLushort *) ptr); Does not exist */
break;
case WINED3DDECLTYPE_FLOAT16_2:
@@ -2262,6 +2272,10 @@
GLint curVBO = GL_SUPPORT(ARB_VERTEX_BUFFER_OBJECT) ? -1 : 0;
TRACE("Using fast vertex array code\n");
+
+ /* This is fixed function pipeline only, and the fixed function pipeline doesn't do instancing */
+ stateblock->wineD3DDevice->instancedDraw = FALSE;
+
/* Blend Data ---------------------------------------------- */
if( (sd->u.s.blendWeights.lpData) || (sd->u.s.blendWeights.VBO) ||
(sd->u.s.blendMatrixIndices.lpData) || (sd->u.s.blendMatrixIndices.VBO) ) {
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 4dc914b..6111f53 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -681,6 +681,7 @@
WineDirect3DVertexStridedData strided_streams;
WineDirect3DVertexStridedData *up_strided;
BOOL useDrawStridedSlow;
+ BOOL instancedDraw;
/* Context management */
WineD3DContext **contexts; /* Dynamic array containing pointers to context structures */