- New implementation of SendMessage, ReceiveMessage, ReplyMessage functions
  to support thread-safeness, and nested SendMessage.
- Addition of ReplyMessage32.

diff --git a/include/queue.h b/include/queue.h
index 8a9836b..3ad74a9 100644
--- a/include/queue.h
+++ b/include/queue.h
@@ -22,11 +22,43 @@
     struct tagQMSG *prevMsg;
 } QMSG;
 
-typedef struct
+
+typedef struct tagSMSG
 {
-  LRESULT   lResult;
-  BOOL16    bPending;
-} QSMCTRL;
+    struct tagSMSG *nextProcessing; /* next SMSG in the processing list */
+    struct tagSMSG *nextPending;    /* next SMSG in the pending list */
+    struct tagSMSG *nextWaiting;    /* next SMSG in the waiting list */
+    
+    HQUEUE16       hSrcQueue;       /* sending Queue, (NULL if it didn't wait) */
+    HQUEUE16       hDstQueue;       /* destination Queue */
+
+    HWND32         hWnd;            /* destinantion window */
+    UINT32         msg;             /* message sent */
+    WPARAM32       wParam;          /* wParam of the sent message */
+    LPARAM         lParam;          /* lParam of the sent message */
+
+    LRESULT        lResult;         /* result of SendMessage */
+    WORD           flags;           /* see below SMSG_XXXX */
+} SMSG;
+
+
+/* SMSG -> flags values */
+/* set when lResult contains a good value */
+#define SMSG_HAVE_RESULT            0x0001
+/* protection for multiple call to ReplyMessage16() */
+#define SMSG_ALREADY_REPLIED        0x0002
+/* use with EARLY_REPLY for forcing the receiver to clean SMSG */
+#define SMSG_RECEIVER_CLEANS        0x0010
+/* used with EARLY_REPLY to indicate to sender, receiver is done with SMSG */
+#define SMSG_RECEIVED               0x0020
+/* set in ReceiveMessage() to indicate it's not an early reply */
+#define SMSG_SENDING_REPLY          0x0040
+/* set when ReplyMessage16() is called by the application */
+#define SMSG_EARLY_REPLY            0x0080
+/* set when sender is Win32 thread */
+#define SMSG_WIN32                  0x1000
+/* set when sender is a unnicode thread */
+#define SMSG_UNICODE                0x2000
 
 /* Per-queue data for the message queue
  * Note that we currently only store the current values for
@@ -55,8 +87,6 @@
 
   DWORD     magic;                  /* magic number should be QUEUE_MAGIC */
   DWORD     lockCount;              /* reference counter */
-  
-  WORD      flags;                  /* Queue flags */
   WORD      wWinVersion;            /* Expected Windows version */
   
   WORD      msgCount;               /* Number of waiting messages */
@@ -76,19 +106,9 @@
   DWORD     GetMessagePosVal;       /* Value for GetMessagePos */
   DWORD     GetMessageExtraInfoVal; /* Value for GetMessageExtraInfo */
   
-  HQUEUE16  InSendMessageHandle;    /* Queue of task that sent a message */
-  HTASK16   hSendingTask;           /* Handle of task that sent a message */
-  HTASK16   hPrevSendingTask;       /* Handle of previous sender */
-  
-  HWND32    hWnd32;                 /* Send message arguments */
-  UINT32    msg32;                  
-  WPARAM32  wParam32;               
-  LPARAM    lParam;                 
-  DWORD     SendMessageReturn;      /* Return value for SendMessage */
-
-  QSMCTRL*  smResultInit;           /* SendMesage result pointers */
-  QSMCTRL*  smResultCurrent;        
-  QSMCTRL*  smResult;               
+  SMSG*     smWaiting;              /* SendMessage waiting for reply */
+  SMSG*     smProcessing;           /* SendMessage currently being processed */
+  SMSG*     smPending;              /* SendMessage waiting to be received */
   
   HANDLE16  hCurHook;               /* Current hook */
   HANDLE16  hooks[WH_NB_HOOKS];     /* Task hooks list */
@@ -100,11 +120,11 @@
 
 /* Extra (undocumented) queue wake bits - see "Undoc. Windows" */
 #define QS_SMRESULT      0x8000  /* Queue has a SendMessage() result */
-#define QS_SMPARAMSFREE  0x4000  /* SendMessage() parameters are available */
 
-/* Queue flags */
-#define QUEUE_SM_WIN32     0x0002  /* Currently sent message is Win32 */
-#define QUEUE_SM_UNICODE   0x0004  /* Currently sent message is Unicode */
+/* Types of SMSG stack */
+#define SM_PROCESSING_LIST    1  /* list of SM currently being processed */
+#define SM_PENDING_LIST       2  /* list of SM wating to be received */
+#define SM_WAITING_LIST       3  /* list of SM waiting for reply */
 
 #define QUEUE_MAGIC        0xD46E80AF
 
@@ -144,7 +164,8 @@
 extern QMSG* QUEUE_FindMsg( MESSAGEQUEUE * msgQueue, HWND32 hwnd,
                           int first, int last );
 extern void QUEUE_RemoveMsg( MESSAGEQUEUE * msgQueue, QMSG *qmsg );
-extern void QUEUE_FlushMessages(HQUEUE16);
+extern SMSG *QUEUE_RemoveSMSG( MESSAGEQUEUE *queue, int list, SMSG *smsg );
+extern BOOL32 QUEUE_AddSMSG( MESSAGEQUEUE *queue, int list, SMSG *smsg );
 extern void hardware_event( WORD message, WORD wParam, LONG lParam,
 			    int xPos, int yPos, DWORD time, DWORD extraInfo );
 
diff --git a/relay32/user32.spec b/relay32/user32.spec
index 942f100..e404195 100644
--- a/relay32/user32.spec
+++ b/relay32/user32.spec
@@ -445,7 +445,7 @@
 441 stdcall RemoveMenu(long long long) RemoveMenu32
 442 stdcall RemovePropA(long str) RemoveProp32A
 443 stdcall RemovePropW(long wstr) RemoveProp32W
-444 stub ReplyMessage
+444 stdcall ReplyMessage(long) ReplyMessage32
 445 stub ResetDisplay
 446 stdcall ReuseDDElParam(long long long long long) ReuseDDElParam
 447 stdcall ScreenToClient(long ptr) ScreenToClient32
diff --git a/windows/message.c b/windows/message.c
index 1723efd..b0f4ab9 100644
--- a/windows/message.c
+++ b/windows/message.c
@@ -43,7 +43,7 @@
 DWORD MSG_WineStartTicks; /* Ticks at Wine startup */
 
 static UINT32 doubleClickSpeed = 452;
-static INT32 debugSMRL = 0;       /* intertask SendMessage() recursion level */
+
 
 /***********************************************************************
  *           MSG_CheckFilter
@@ -621,16 +621,20 @@
  *
  * Implementation of an inter-task SendMessage.
  */
-static LRESULT MSG_SendMessage( HQUEUE16 hDestQueue, HWND16 hwnd, UINT16 msg,
+static LRESULT MSG_SendMessage( HQUEUE16 hDestQueue, HWND32 hwnd, UINT32 msg,
                                 WPARAM32 wParam, LPARAM lParam, WORD flags )
 {
-    INT32	  prevSMRL = debugSMRL;
-    QSMCTRL 	  qCtrl = { 0, 1};
     MESSAGEQUEUE *queue, *destQ;
+    SMSG         *smsg;
+    LRESULT      lResult = 0;
 
     if (IsTaskLocked() || !IsWindow32(hwnd))
         return 0;
 
+    /* create a SMSG structure to hold SendMessage() parameters */
+    if (! (smsg = (SMSG *) HeapAlloc( SystemHeap, 0, sizeof(SMSG) )) )
+        return 0;
+          
     if (!(queue = (MESSAGEQUEUE*)QUEUE_Lock( GetFastQueue() ))) return 0;
 
     if (!(destQ = (MESSAGEQUEUE*)QUEUE_Lock( hDestQueue )))
@@ -639,66 +643,84 @@
         return 0;
     }
 
-    debugSMRL+=4;
-    TRACE(sendmsg,"%*sSM: %s [%04x] (%04x -> %04x)\n", 
-		    prevSMRL, "", SPY_GetMsgName(msg), msg, queue->self, hDestQueue );
+    TRACE(sendmsg,"SM: %s [%04x] (%04x -> %04x)\n",
+		    SPY_GetMsgName(msg), msg, queue->self, hDestQueue );
 
-    if( !(queue->wakeBits & QS_SMPARAMSFREE) )
-    {
-      TRACE(sendmsg,"\tIntertask SendMessage: sleeping since unreplied SendMessage pending\n");
-      QUEUE_WaitBits( QS_SMPARAMSFREE );
-    }
+    /* fill up SMSG structure */
+    smsg->hWnd = hwnd;
+    smsg->msg = msg;
+    smsg->wParam = wParam;
+    smsg->lParam = lParam;
+    
+    smsg->lResult = 0;
+    smsg->hSrcQueue = GetFastQueue();
+    smsg->hDstQueue = hDestQueue;
+    smsg->flags = flags;
 
-    /* resume sending */ 
-    queue->hWnd32     = hwnd;
-    queue->msg32      = msg;
-    queue->wParam32   = wParam;
-    queue->lParam     = lParam;
-    queue->hPrevSendingTask = destQ->hSendingTask;
-    destQ->hSendingTask = GetFastQueue();
+    /* add smsg struct in the processing SM list of the source queue */
+    QUEUE_AddSMSG(queue, SM_PROCESSING_LIST, smsg);
 
-    QUEUE_ClearWakeBit( queue, QS_SMPARAMSFREE );
-    queue->flags = (queue->flags & ~(QUEUE_SM_WIN32|QUEUE_SM_UNICODE)) | flags;
-
-    TRACE(sendmsg,"%*ssm: smResultInit = %08x\n", prevSMRL, "", (unsigned)&qCtrl);
-
-    queue->smResultInit = &qCtrl;
-
-    QUEUE_SetWakeBit( destQ, QS_SENDMESSAGE );
+    /* add smsg struct in the pending list of the destination queue */
+    if (QUEUE_AddSMSG(destQ, SM_PENDING_LIST, smsg) == FALSE)
+        return 0;
 
     /* perform task switch and wait for the result */
-
-    while( qCtrl.bPending )
+    while( (smsg->flags & SMSG_HAVE_RESULT) == 0 )
     {
-      if (!(queue->wakeBits & QS_SMRESULT))
-      {
-        if (THREAD_IsWin16( THREAD_Current() ))
+        /* force destination task to run next, if 16 bit threads */
+        if (THREAD_IsWin16(THREAD_Current()) && THREAD_IsWin16(destQ->thdb) )
             DirectedYield( destQ->thdb->teb.htask16 );
+
         QUEUE_WaitBits( QS_SMRESULT );
-	TRACE(sendmsg,"\tsm: have result!\n");
-      }
-      /* got something */
 
-      TRACE(sendmsg,"%*ssm: smResult = %08x\n", prevSMRL, "", (unsigned)queue->smResult );
-
-      if (queue->smResult) { /* FIXME, smResult should always be set */
-        queue->smResult->lResult = queue->SendMessageReturn;
-        queue->smResult->bPending = FALSE;
+        if (! (smsg->flags & SMSG_HAVE_RESULT) )
+        {
+            /* not supposed to happen */
+            ERR(sendmsg, "SMSG_HAVE_RESULT not set smsg->flags=%x\n", smsg->flags);
       }
+        else
+        {
+            lResult = smsg->lResult;
+            TRACE(sendmsg,"smResult = %08x\n", (unsigned)lResult );
+        }
+
       QUEUE_ClearWakeBit( queue, QS_SMRESULT );
-
-      if( queue->smResult != &qCtrl )
-	  ERR(sendmsg, "%*ssm: weird scenes inside the goldmine!\n", prevSMRL, "");
     }
-    queue->smResultInit = NULL;
+
+    /* remove the smsg from the processingg list of the source queue */
+    QUEUE_RemoveSMSG( queue, SM_PROCESSING_LIST, smsg );
+
+    /* Note: the destination thread is in charge of removing the smsg from
+       the pending list */
+
+    /* sender thread is in charge of releasing smsg if it's not an
+     early reply */
+    if ( !(smsg->flags & SMSG_EARLY_REPLY) )
+    {
+        HeapFree(SystemHeap, 0, smsg);
+    }
+    else
+    {
+        /* In the case of an early reply, sender thread will released the
+         smsg structure if the receiver thread is done (SMSG_RECEIVED set).
+         If the receiver thread isn't done, SMSG_RECEIVER_CLEANS_UP flag
+         is set, and it will be the receiver responsability to released
+         smsg */
+        EnterCriticalSection( &queue->cSection );
     
-    TRACE(sendmsg,"%*sSM: [%04x] returning %08lx\n", prevSMRL, "", msg, qCtrl.lResult);
-    debugSMRL-=4;
+        if (smsg->flags & SMSG_RECEIVED)
+            HeapFree(SystemHeap, 0, smsg);
+        else
+            smsg->flags |= SMSG_RECEIVER_CLEANS;
+        
+        LeaveCriticalSection( &queue->cSection );
+    }
 
     QUEUE_Unlock( queue );
     QUEUE_Unlock( destQ );
     
-    return qCtrl.lResult;
+    TRACE(sendmsg,"done!\n");
+    return lResult;
 }
 
 
@@ -707,52 +729,81 @@
  */
 void WINAPI ReplyMessage16( LRESULT result )
 {
-    MESSAGEQUEUE *senderQ;
-    MESSAGEQUEUE *queue;
+    ReplyMessage32( result );
+}
 
-    if (!(queue = (MESSAGEQUEUE*)QUEUE_Lock( GetFastQueue() ))) return;
+/***********************************************************************
+ *           ReplyMessage   (USER.115)
+ */
+BOOL32 WINAPI ReplyMessage32( LRESULT result )
+{
+    MESSAGEQUEUE *senderQ = 0;
+    MESSAGEQUEUE *queue = 0;
+    SMSG         *smsg;
+    BOOL32       ret = FALSE;
 
-    TRACE(msg,"ReplyMessage, queue %04x\n", queue->self);
+    if (!(queue = (MESSAGEQUEUE*)QUEUE_Lock( GetFastQueue() ))) return FALSE;
 
-    while( (senderQ = (MESSAGEQUEUE*)QUEUE_Lock( queue->InSendMessageHandle)))
+    TRACE(sendmsg,"ReplyMessage, queue %04x\n", queue->self);
+
+    while ((smsg = queue->smWaiting) != 0)
     {
-      TRACE(msg,"\trpm: replying to %08x (%04x -> %04x)\n",
-            queue->msg32, queue->self, senderQ->self);
+        /* if message has already been reply, continue the loop of receving
+         message */
+        if ( smsg->flags & SMSG_ALREADY_REPLIED )
+            goto ReplyMessageDone;
 
+        senderQ = (MESSAGEQUEUE*)QUEUE_Lock( smsg->hSrcQueue );
+        if ( !senderQ )
+            goto ReplyMessageDone;
+
+        /* if send message pending, processed it */
       if( queue->wakeBits & QS_SENDMESSAGE )
       {
+            /* Note: QUEUE_ReceiveMessage() and ReplyMessage call each other */
 	QUEUE_ReceiveMessage( queue );
         QUEUE_Unlock( senderQ );
 	continue; /* ReceiveMessage() already called us */
       }
-
-      if(!(senderQ->wakeBits & QS_SMRESULT) ) break;
-      if (THREAD_IsWin16(THREAD_Current())) OldYield();
-      
-      QUEUE_Unlock( senderQ );
-    } 
-    if( !senderQ )
-    {
-      TRACE(msg,"\trpm: done\n");
-      QUEUE_Unlock( queue );
-      return;
+        break;   /* message to reply is in smsg */
     }
 
-    senderQ->SendMessageReturn = result;
-    TRACE(msg,"\trpm: smResult = %08x, result = %08lx\n", 
-			(unsigned)queue->smResultCurrent, result );
+    if ( !smsg )
+        goto ReplyMessageDone;
+      
+    smsg->lResult = result;
+    smsg->flags |= SMSG_ALREADY_REPLIED | SMSG_HAVE_RESULT;
 
-    senderQ->smResult = queue->smResultCurrent;
-    queue->InSendMessageHandle = 0;
+    /* check if it's an early reply (called by the application) or
+       a regular reply (called by ReceiveMessage) */
+    if ( !(smsg->flags & SMSG_SENDING_REPLY) )
+        smsg->flags |= SMSG_EARLY_REPLY;
 
+    TRACE( sendmsg,"\trpm: smResult = %08lx\n", (long) result );
+
+    /* remove smsg from the waiting list, if it's not an early reply */
+    /* it is important to leave it in the waiting list if it's an early
+     reply, to be protected aginst multiple call to ReplyMessage() */
+    if ( !(smsg->flags & SMSG_EARLY_REPLY) )
+        QUEUE_RemoveSMSG( queue, SM_WAITING_LIST, smsg );
+
+    /* tell the sending task that its reply is ready */
     QUEUE_SetWakeBit( senderQ, QS_SMRESULT );
+
+    /* switch directly to sending task (16 bit thread only) */
     if (THREAD_IsWin16( THREAD_Current() ))
         DirectedYield( senderQ->thdb->teb.htask16 );
 
+    ret = TRUE;
+    
+ReplyMessageDone:
+    if ( senderQ )
     QUEUE_Unlock( senderQ );
+    if ( queue )
     QUEUE_Unlock( queue );
-}
 
+    return ret;
+}
 
 /***********************************************************************
  *           MSG_PeekMessage
@@ -1454,7 +1505,7 @@
 
     if (wndPtr->hmemTaskQ != GetFastQueue())
         ret = MSG_SendMessage( wndPtr->hmemTaskQ, hwnd, msg, wParam, lParam,
-                               QUEUE_SM_WIN32 );
+                               SMSG_WIN32 );
     else
         ret = CallWindowProc32A( (WNDPROC32)wndPtr->winproc,
                                  hwnd, msg, wParam, lParam );
@@ -1525,7 +1576,7 @@
 
     if (wndPtr->hmemTaskQ != GetFastQueue())
         ret = MSG_SendMessage( wndPtr->hmemTaskQ, hwnd, msg, wParam, lParam,
-                                QUEUE_SM_WIN32 | QUEUE_SM_UNICODE );
+                                SMSG_WIN32 | SMSG_UNICODE );
     else
         ret = CallWindowProc32W( (WNDPROC32)wndPtr->winproc,
                                  hwnd, msg, wParam, lParam );
@@ -2115,7 +2166,7 @@
 
     if (!(queue = (MESSAGEQUEUE *)QUEUE_Lock( GetFastQueue() )))
         return 0;
-    ret = (BOOL32)queue->InSendMessageHandle;
+    ret = (BOOL32)queue->smProcessing;
 
     QUEUE_Unlock( queue );
     return ret;
diff --git a/windows/queue.c b/windows/queue.c
index 0a33aa0..d3f30f3 100644
--- a/windows/queue.c
+++ b/windows/queue.c
@@ -19,6 +19,7 @@
 #include "process.h"
 #include <assert.h>
 #include "debug.h"
+#include "spy.h"
 
 #define MAX_QUEUE_SIZE   120  /* Max. size of a message queue */
 
@@ -359,23 +360,21 @@
 
     DUMP(    "next: %12.4x  Intertask SendMessage:\n"
              "thread: %10p  ----------------------\n"
-             "hWnd: %12.8x\n"
-             "firstMsg: %8p   msg:     %11.8x\n"
-             "lastMsg:  %8p   wParam:   %10.8x\n"
-             "msgCount: %8.4x   lParam:   %10.8x\n"
-             "lockCount: %7.4x   lRet:   %12.8x\n"
-             "wWinVer: %9.4x  ISMH: %10.4x\n"
-             "paints: %10.4x  hSendTask: %5.4x\n"
-             "timers: %10.4x  hPrevSend: %5.4x\n"
+             "firstMsg: %8p   smWaiting:     %10p\n"
+             "lastMsg:  %8p   smPending:     %10p\n"
+             "msgCount: %8.4x   smProcessing:  %10p\n"
+             "lockCount: %7.4x\n"
+             "wWinVer: %9.4x\n"
+             "paints: %10.4x\n"
+             "timers: %10.4x\n"
              "wakeBits: %8.4x\n"
              "wakeMask: %8.4x\n"
              "hCurHook: %8.4x\n",
-             pq->next, pq->thdb, pq->hWnd32, pq->firstMsg, pq->msg32,
-             pq->lastMsg, pq->wParam32, pq->msgCount, (unsigned)pq->lParam,
-             (unsigned)pq->lockCount, (unsigned)pq->SendMessageReturn,
-             pq->wWinVersion, pq->InSendMessageHandle,
-             pq->wPaintCount, pq->hSendingTask, pq->wTimerCount,
-             pq->hPrevSendingTask, pq->wakeBits, pq->wakeMask, pq->hCurHook);
+             pq->next, pq->thdb, pq->firstMsg, pq->smWaiting, pq->lastMsg,
+             pq->smPending, pq->msgCount, pq->smProcessing,
+             (unsigned)pq->lockCount, pq->wWinVersion,
+             pq->wPaintCount, pq->wTimerCount,
+             pq->wakeBits, pq->wakeMask, pq->hCurHook);
 
     QUEUE_Unlock( pq );
 }
@@ -449,7 +448,7 @@
         return 0;
 
     msgQueue->self        = hQueue;
-    msgQueue->wakeBits    = msgQueue->changeBits = QS_SMPARAMSFREE;
+    msgQueue->wakeBits    = msgQueue->changeBits = 0;
     msgQueue->wWinVersion = pTask ? pTask->version : 0;
     
     InitializeCriticalSection( &msgQueue->cSection );
@@ -482,6 +481,44 @@
 
 
 /***********************************************************************
+ *           QUEUE_FlushMessage
+ * 
+ * Try to reply to all pending sent messages on exit.
+ */
+void QUEUE_FlushMessages( MESSAGEQUEUE *queue )
+{
+    SMSG *smsg;
+    MESSAGEQUEUE *senderQ = 0;
+
+    if( queue )
+    {
+        EnterCriticalSection( &queue->cSection );
+
+        /* empty the list of pending SendMessage waiting to be received */
+        while (queue->smPending)
+        {
+            smsg = QUEUE_RemoveSMSG( queue, SM_PENDING_LIST, 0);
+
+            senderQ = (MESSAGEQUEUE*)QUEUE_Lock( smsg->hSrcQueue );
+            if ( !senderQ )
+                continue;
+
+            /* return 0, to unblock other thread */
+            smsg->lResult = 0;
+            smsg->flags |= SMSG_HAVE_RESULT;
+            QUEUE_SetWakeBit( senderQ, QS_SMRESULT);
+            
+            QUEUE_Unlock( senderQ );
+        }
+
+        QUEUE_ClearWakeBit( queue, QS_SENDMESSAGE );
+        
+        LeaveCriticalSection( &queue->cSection );
+    }
+}
+
+
+/***********************************************************************
  *	     QUEUE_DeleteMsgQueue
  *
  * Unlinks and deletes a message queue.
@@ -492,7 +529,6 @@
 BOOL32 QUEUE_DeleteMsgQueue( HQUEUE16 hQueue )
 {
     MESSAGEQUEUE * msgQueue = (MESSAGEQUEUE*)QUEUE_Lock(hQueue);
-    HQUEUE16  senderQ;
     HQUEUE16 *pPrev;
 
     TRACE(msg,"(): Deleting message queue %04x\n", hQueue);
@@ -509,16 +545,7 @@
     if( hActiveQueue == hQueue ) hActiveQueue = 0;
 
     /* flush sent messages */
-    senderQ = msgQueue->hSendingTask;
-    while( senderQ )
-    {
-      MESSAGEQUEUE* sq = (MESSAGEQUEUE*)QUEUE_Lock(senderQ);
-      if( !sq ) break;
-      sq->SendMessageReturn = 0L;
-      QUEUE_SetWakeBit( sq, QS_SMRESULT );
-      senderQ = sq->hPrevSendingTask;
-      QUEUE_Unlock(sq);
-    }
+    QUEUE_FlushMessages( msgQueue );
 
     SYSTEM_LOCK();
 
@@ -687,130 +714,265 @@
 
 
 /***********************************************************************
- *           QUEUE_ReceiveMessage
+ *           QUEUE_AddSMSG
  *
+ * This routine is called when a SMSG need to be added to one of the three
+ * SM list.  (SM_PROCESSING_LIST, SM_PENDING_LIST, SM_WAITING_LIST)
+ */
+BOOL32 QUEUE_AddSMSG( MESSAGEQUEUE *queue, int list, SMSG *smsg )
+{
+    TRACE(sendmsg,"queue=%x, list=%d, smsg=%p msg=%s\n", queue->self, list,
+          smsg, SPY_GetMsgName(smsg->msg));
+    
+    switch (list)
+    {
+        case SM_PROCESSING_LIST:
+            /* don't need to be thread safe, only accessed by the
+             thread associated with the sender queue */
+            smsg->nextProcessing = queue->smProcessing;
+            queue->smProcessing = smsg;
+            break;
+            
+        case SM_WAITING_LIST:
+            /* don't need to be thread safe, only accessed by the
+             thread associated with the receiver queue */
+            smsg->nextWaiting = queue->smWaiting;
+            queue->smWaiting = smsg;
+            break;
+            
+        case SM_PENDING_LIST:
+            /* make it thread safe, could be accessed by the sender and
+             receiver thread */
+
+            EnterCriticalSection( &queue->cSection );
+            smsg->nextPending = queue->smPending;
+            queue->smPending = smsg;
+            QUEUE_SetWakeBit( queue, QS_SENDMESSAGE );
+            LeaveCriticalSection( &queue->cSection );
+            break;
+
+        default:
+            WARN(sendmsg, "Invalid list: %d", list);
+            break;
+    }
+
+    return TRUE;
+}
+
+
+/***********************************************************************
+ *           QUEUE_RemoveSMSG
+ *
+ * This routine is called when a SMSG need to be remove from one of the three
+ * SM list.  (SM_PROCESSING_LIST, SM_PENDING_LIST, SM_WAITING_LIST)
+ * If smsg == 0, remove the first smsg from the specified list
+ */
+SMSG *QUEUE_RemoveSMSG( MESSAGEQUEUE *queue, int list, SMSG *smsg )
+{
+
+    switch (list)
+    {
+        case SM_PROCESSING_LIST:
+            /* don't need to be thread safe, only accessed by the
+             thread associated with the sender queue */
+
+            /* if smsg is equal to null, it means the first in the list */
+            if (!smsg)
+                smsg = queue->smProcessing;
+
+            TRACE(sendmsg,"queue=%x, list=%d, smsg=%p msg=%s\n", queue->self, list,
+                  smsg, SPY_GetMsgName(smsg->msg));
+            /* In fact SM_PROCESSING_LIST is a stack, and smsg
+             should be always at the top of the list */
+            if ( (smsg != queue->smProcessing) || !queue->smProcessing )
+        {
+                ERR( sendmsg, "smsg not at the top of Processing list, smsg=0x%p queue=0x%p", smsg, queue);
+                return 0;
+            }
+            else
+            {
+                queue->smProcessing = smsg->nextProcessing;
+                smsg->nextProcessing = 0;
+        }
+            return smsg;
+
+        case SM_WAITING_LIST:
+            /* don't need to be thread safe, only accessed by the
+             thread associated with the receiver queue */
+
+            /* if smsg is equal to null, it means the first in the list */
+            if (!smsg)
+                smsg = queue->smWaiting;
+            
+            TRACE(sendmsg,"queue=%x, list=%d, smsg=%p msg=%s\n", queue->self, list,
+                  smsg, SPY_GetMsgName(smsg->msg));
+            /* In fact SM_WAITING_LIST is a stack, and smsg
+             should be always at the top of the list */
+            if ( (smsg != queue->smWaiting) || !queue->smWaiting )
+            {
+                ERR( sendmsg, "smsg not at the top of Waiting list, smsg=0x%p queue=0x%p", smsg, queue);
+                return 0;
+            }
+            else
+            {
+                queue->smWaiting = smsg->nextWaiting;
+                smsg->nextWaiting = 0;
+    }
+            return smsg;
+
+        case SM_PENDING_LIST:
+            /* make it thread safe, could be accessed by the sender and
+             receiver thread */
+            EnterCriticalSection( &queue->cSection );
+    
+            if (!smsg || !queue->smPending)
+                smsg = queue->smPending;
+            else
+            {
+                ERR( sendmsg, "should always remove the top one in Pending list, smsg=0x%p queue=0x%p", smsg, queue);
+                return 0;
+            }
+            
+            TRACE(sendmsg,"queue=%x, list=%d, smsg=%p msg=%s\n", queue->self, list,
+                  smsg, SPY_GetMsgName(smsg->msg));
+
+            queue->smPending = smsg->nextPending;
+            smsg->nextPending = 0;
+
+            /* if no more SMSG in Pending list, clear QS_SENDMESSAGE flag */
+            if (!queue->smPending)
+                QUEUE_ClearWakeBit( queue, QS_SENDMESSAGE );
+            
+            LeaveCriticalSection( &queue->cSection );
+            return smsg;
+
+        default:
+            WARN(sendmsg, "Invalid list: %d", list);
+            break;
+    }
+
+    return 0;
+}
+
+
+/***********************************************************************
+ *           QUEUE_ReceiveMessage
+ * 
  * This routine is called when a sent message is waiting for the queue.
  */
 void QUEUE_ReceiveMessage( MESSAGEQUEUE *queue )
 {
-    MESSAGEQUEUE *senderQ = NULL;
-    HQUEUE16      prevSender = 0;
-    QSMCTRL*      prevCtrlPtr = NULL;
     LRESULT       result = 0;
+    SMSG          *smsg;
+    MESSAGEQUEUE  *senderQ;
 
-    TRACE(msg, "ReceiveMessage, queue %04x\n", queue->self );
-    if (!(queue->wakeBits & QS_SENDMESSAGE) ||
-        !(senderQ = (MESSAGEQUEUE*)QUEUE_Lock( queue->hSendingTask)))
-	{ TRACE(msg,"\trcm: nothing to do\n"); return; }
+    TRACE(sendmsg, "queue %04x\n", queue->self );
 
-    if( !senderQ->hPrevSendingTask )
-        QUEUE_ClearWakeBit( queue, QS_SENDMESSAGE );   /* no more sent messages */
-
-    /* Save current state on stack */
-    prevSender                 = queue->InSendMessageHandle;
-    prevCtrlPtr		       = queue->smResultCurrent;
-
-    /* Remove sending queue from the list */
-    queue->InSendMessageHandle = queue->hSendingTask;
-    queue->smResultCurrent     = senderQ->smResultInit;
-    queue->hSendingTask	       = senderQ->hPrevSendingTask;
-
-    TRACE(msg, "\trcm: smResultCurrent = %08x, prevCtrl = %08x\n", 
-				(unsigned)queue->smResultCurrent, (unsigned)prevCtrlPtr );
-    QUEUE_SetWakeBit( senderQ, QS_SMPARAMSFREE );
-
-    TRACE(msg, "\trcm: calling wndproc - %08x %08x %08x %08x\n",
-                senderQ->hWnd32, senderQ->msg32,
-                senderQ->wParam32, (unsigned)senderQ->lParam );
-
-    if (IsWindow32( senderQ->hWnd32 ))
+    if ( !(queue->wakeBits & QS_SENDMESSAGE) && queue->smPending )
     {
-        WND *wndPtr = WIN_FindWndPtr( senderQ->hWnd32 );
-        DWORD extraInfo = queue->GetMessageExtraInfoVal;
-        queue->GetMessageExtraInfoVal = senderQ->GetMessageExtraInfoVal;
+        TRACE(sendmsg,"\trcm: nothing to do\n");
+        return;
+    }
 
-        if (senderQ->flags & QUEUE_SM_WIN32)
+    /* remove smsg on the top of the pending list and put it in the processing list */
+    smsg = QUEUE_RemoveSMSG(queue, SM_PENDING_LIST, 0);
+    QUEUE_AddSMSG(queue, SM_WAITING_LIST, smsg);
+
+    TRACE(sendmsg,"RM: %s [%04x] (%04x -> %04x)\n",
+       	    SPY_GetMsgName(smsg->msg), smsg->msg, smsg->hSrcQueue, smsg->hDstQueue );
+
+    if (IsWindow32( smsg->hWnd ))
+    {
+        WND *wndPtr = WIN_FindWndPtr( smsg->hWnd );
+        DWORD extraInfo = queue->GetMessageExtraInfoVal; /* save ExtraInfo */
+
+        /* use sender queue extra info value while calling the window proc */
+        senderQ = (MESSAGEQUEUE*)QUEUE_Lock( smsg->hSrcQueue );
+        if (senderQ)
+  {
+            queue->GetMessageExtraInfoVal = senderQ->GetMessageExtraInfoVal;
+            QUEUE_Unlock( senderQ );
+        }
+
+        /* call the right version of CallWindowProcXX */
+        if (smsg->flags & SMSG_WIN32)
         {
-            TRACE(msg, "\trcm: msg is Win32\n" );
-            if (senderQ->flags & QUEUE_SM_UNICODE)
+            TRACE(sendmsg, "\trcm: msg is Win32\n" );
+            if (smsg->flags & SMSG_UNICODE)
                 result = CallWindowProc32W( wndPtr->winproc,
-                                            senderQ->hWnd32, senderQ->msg32,
-                                            senderQ->wParam32, senderQ->lParam );
+                                            smsg->hWnd, smsg->msg,
+                                            smsg->wParam, smsg->lParam );
             else
                 result = CallWindowProc32A( wndPtr->winproc,
-                                            senderQ->hWnd32, senderQ->msg32,
-                                            senderQ->wParam32, senderQ->lParam );
+                                            smsg->hWnd, smsg->msg,
+                                            smsg->wParam, smsg->lParam );
         }
         else  /* Win16 message */
             result = CallWindowProc16( (WNDPROC16)wndPtr->winproc,
-                                       (HWND16) senderQ->hWnd32,
-                                       (UINT16) senderQ->msg32,
-                                       LOWORD (senderQ->wParam32),
-                                       senderQ->lParam );
+                                       (HWND16) smsg->hWnd,
+                                       (UINT16) smsg->msg,
+                                       LOWORD (smsg->wParam),
+                                       smsg->lParam );
 
         queue->GetMessageExtraInfoVal = extraInfo;  /* Restore extra info */
-	TRACE(msg,"\trcm: result =  %08x\n", (unsigned)result );
+	TRACE(sendmsg,"result =  %08x\n", (unsigned)result );
     }
-    else WARN(msg, "\trcm: bad hWnd\n");
+    else WARN(sendmsg, "\trcm: bad hWnd\n");
 
-    QUEUE_Unlock( senderQ );
-    
-    /* Return the result to the sender task */
-    ReplyMessage16( result );
-
-    queue->InSendMessageHandle = prevSender;
-    queue->smResultCurrent     = prevCtrlPtr;
-
-    TRACE(msg,"done!\n");
-}
-
-/***********************************************************************
- *           QUEUE_FlushMessage
- * 
- * Try to reply to all pending sent messages on exit.
- */
-void QUEUE_FlushMessages( HQUEUE16 hQueue )
-{
-  MESSAGEQUEUE *queue = (MESSAGEQUEUE*)QUEUE_Lock( hQueue );
-
-  if( queue )
-  {
-    MESSAGEQUEUE *senderQ = (MESSAGEQUEUE*)QUEUE_Lock( queue->hSendingTask );
-    QSMCTRL*      CtrlPtr = queue->smResultCurrent;
-
-    TRACE(msg,"Flushing queue %04x:\n", hQueue );
-
-    while( senderQ )
+    /* sometimes when we got early reply, the receiver is in charge of
+     freeing up memory associated with smsg */
+    /* when there is an early reply the sender will not release smsg
+     before SMSG_RECEIVED is set */
+    if ( smsg->flags & SMSG_EARLY_REPLY )
     {
-      if( !CtrlPtr )
-	   CtrlPtr = senderQ->smResultInit;
+        /* remove smsg from the waiting list */
+        QUEUE_RemoveSMSG( queue, SM_WAITING_LIST, smsg );
 
-      TRACE(msg,"\tfrom queue %04x, smResult %08x\n", queue->hSendingTask, (unsigned)CtrlPtr );
+        /* make thread safe when accessing SMSG_SENT_REPLY and
+         SMSG_RECEIVER_CLEANS_UP. Those fleags are used by both thread,
+         the sender and receiver, to find out which thread should released
+         smsg structure. The critical section of the sender queue is used. */
 
-      if( !(queue->hSendingTask = senderQ->hPrevSendingTask) )
-        QUEUE_ClearWakeBit( queue, QS_SENDMESSAGE );
+        senderQ = (MESSAGEQUEUE*)QUEUE_Lock( smsg->hSrcQueue );
 
-      QUEUE_SetWakeBit( senderQ, QS_SMPARAMSFREE );
+        /* synchronize with the sender */
+        if (senderQ)
+            EnterCriticalSection( &senderQ->cSection );
       
-      queue->smResultCurrent = CtrlPtr;
-      while( senderQ->wakeBits & QS_SMRESULT ) OldYield();
+        /* tell the sender we're all done with smsg structure */
+        smsg->flags |= SMSG_RECEIVED;
 
-      senderQ->SendMessageReturn = 0;
-      senderQ->smResult = queue->smResultCurrent;
-      QUEUE_SetWakeBit( senderQ, QS_SMRESULT);
+        /* sender will set SMSG_RECEIVER_CLEANS_UP if it wants the
+         receiver to clean up smsg, it could only happens when there is
+         an early reply */
+        if ( smsg->flags & SMSG_RECEIVER_CLEANS )
+        {
+            TRACE( sendmsg,"Receiver cleans up!\n" );
+            HeapFree( SystemHeap, 0, smsg );
+        }
 
+        /* release lock */
+        if (senderQ)
+        {
+            LeaveCriticalSection( &senderQ->cSection );
       QUEUE_Unlock( senderQ );
-
-      senderQ = (MESSAGEQUEUE*)QUEUE_Lock( queue->hSendingTask );
-      CtrlPtr = NULL;
+        }
     }
-    queue->InSendMessageHandle = 0;
+    else
+    {
+        /* no early reply, so do it now */
     
-    QUEUE_Unlock( queue );
+        /* set SMSG_SENDING_REPLY flag to tell ReplyMessage16, it's not
+         an early reply */
+        smsg->flags |= SMSG_SENDING_REPLY;
+        ReplyMessage32( result );
   }  
 
+    TRACE( sendmsg,"done! \n" );
 }
 
+
+
 /***********************************************************************
  *           QUEUE_AddMsg
  *
diff --git a/windows/user.c b/windows/user.c
index 5513dca..2e35712 100644
--- a/windows/user.c
+++ b/windows/user.c
@@ -173,7 +173,6 @@
 
         TIMER_RemoveQueueTimers( hQueue );
 
-        QUEUE_FlushMessages( hQueue );
         HOOK_FreeQueueHooks( hQueue );
 
         QUEUE_SetExitingQueue( hQueue );