From efe1af294fd9f6c0c05178be5bcbfb1be565a1f5 Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Thu, 17 Feb 2011 09:13:12 -0800 Subject: [PATCH] Fixed bug 1128 Patrick Baggett 2011-02-16 22:58:33 PST This enhancement is for both x86/x64 Windows. The SDL implementation of mutexes uses the Win32 API interprocess synchronization primitive called a "Mutex". This implementation is subpar because it has a much higher overhead than an intraprocess mutex. The exact technical details are below, but my tests have shown that for reasonably high contention (10 threads on 4 physical cores), it has 13x higher overhead than the Win32 CriticalSection API. If this enhancement is accepted, I will write a patch to implement SDL mutexes using the critical section API, which should dramatically reduce overhead and improve scalability. -- Tech details -- Normally, Win32 Mutexes are used across process boundaries to synchronize separate processes. In order to lock or unlock them, a user->kernel space transition is necessary, even in the uncontented case on a single CPU machine. Win32 CriticalSection objects can only be used within the same process virtual address space and thus to lock one, does not require a user->kernel space transition for the uncontended case, and additionally may spin a short while before going into kernel wait. This small spin allows a thread to obtain the lock if the mutex is released shortly after the thread starts spinning, in effect bypassing the overhead of user->kernel space transition which has higher overhead than the spinning itself. --- src/thread/windows/SDL_sysmutex.c | 45 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/src/thread/windows/SDL_sysmutex.c b/src/thread/windows/SDL_sysmutex.c index 8440ae7dc..fbcb1d3e1 100644 --- a/src/thread/windows/SDL_sysmutex.c +++ b/src/thread/windows/SDL_sysmutex.c @@ -30,7 +30,7 @@ struct SDL_mutex { - HANDLE id; + CRITICAL_SECTION cs; }; /* Create a mutex */ @@ -38,17 +38,29 @@ SDL_mutex * SDL_CreateMutex(void) { SDL_mutex *mutex; + static DWORD (WINAPI*pf_SetCriticalSectionSpinCount)(LPCRITICAL_SECTION, DWORD) = NULL; + static HMODULE kernel32 = NULL; + + /* One time logic - detect WinNT */ + if(kernel32 == NULL) { + kernel32 = GetModuleHandleA("kernel32.dll"); + if(kernel32) { + /* Attempt to resolve symbol -- Win9x gets NULL */ + pf_SetCriticalSectionSpinCount = (DWORD (WINAPI*)(LPCRITICAL_SECTION, DWORD))GetProcAddress(kernel32, "SetCriticalSectionSpinCount"); + } + else + kernel32 = (HMODULE)0x01; /* don't try to init again */ + } + /* Allocate mutex memory */ mutex = (SDL_mutex *) SDL_malloc(sizeof(*mutex)); if (mutex) { - /* Create the mutex, with initial value signaled */ - mutex->id = CreateMutex(NULL, FALSE, NULL); - if (!mutex->id) { - SDL_SetError("Couldn't create mutex"); - SDL_free(mutex); - mutex = NULL; - } + /* Initialize */ + InitializeCriticalSection(&mutex->cs); + + /* On SMP systems, a non-zero spin count generally helps performance */ + if(pf_SetCriticalSectionSpinCount) pf_SetCriticalSectionSpinCount(&mutex->cs, 2000); } else { SDL_OutOfMemory(); } @@ -60,10 +72,7 @@ void SDL_DestroyMutex(SDL_mutex * mutex) { if (mutex) { - if (mutex->id) { - CloseHandle(mutex->id); - mutex->id = 0; - } + DeleteCriticalSection(&mutex->cs); SDL_free(mutex); } } @@ -76,10 +85,8 @@ SDL_mutexP(SDL_mutex * mutex) SDL_SetError("Passed a NULL mutex"); return -1; } - if (WaitForSingleObject(mutex->id, INFINITE) == WAIT_FAILED) { - SDL_SetError("Couldn't wait on mutex"); - return -1; - } + + EnterCriticalSection(&mutex->cs); return (0); } @@ -91,10 +98,8 @@ SDL_mutexV(SDL_mutex * mutex) SDL_SetError("Passed a NULL mutex"); return -1; } - if (ReleaseMutex(mutex->id) == FALSE) { - SDL_SetError("Couldn't release mutex"); - return -1; - } + + LeaveCriticalSection(&mutex->cs); return (0); }