Call 16-bit instead of 32-bit UserSignalProc entry point.
Don't load USER32.DLL unless requested by the program.
Bugfix: Pass the correct thread ID to UserSignalProc.

diff --git a/loader/task.c b/loader/task.c
index ca93800..35d8a8e 100644
--- a/loader/task.c
+++ b/loader/task.c
@@ -462,9 +462,9 @@
     /* Perform USER cleanup */
 
     TASK_CallTaskSignalProc( USIG16_TERMINATION, hTask );
-    PROCESS_CallUserSignalProc( USIG_PROCESS_EXIT, 0 );
-    PROCESS_CallUserSignalProc( USIG_THREAD_EXIT, 0 );     /* FIXME */
-    PROCESS_CallUserSignalProc( USIG_PROCESS_DESTROY, 0 );
+    PROCESS_CallUserSignalProc( USIG_PROCESS_EXIT, 0, 0 );
+    PROCESS_CallUserSignalProc( USIG_THREAD_EXIT, GetCurrentThreadId(), 0 );
+    PROCESS_CallUserSignalProc( USIG_PROCESS_DESTROY, 0, 0 );
 
     if (nTaskCount <= 1)
     {
@@ -716,6 +716,7 @@
 
     /* Initialize implicitly loaded DLLs */
     NE_InitializeDLLs( pTask->hModule );
+    NE_DllProcessAttach( pTask->hModule );
 
     /* Registers on return are:
      * ax     1 if OK, 0 on error