Skip to content
This repository has been archived by the owner on Feb 11, 2021. It is now read-only.

Commit

Permalink
Added release/acquire memory barriers to the atomic API
Browse files Browse the repository at this point in the history
* Added a destructor to clean up TLS memory at thread shutdown
* Refactored the TLS code to have platform independent code and a small platform dependent core with a fallback to generic code if platform dependent functions fail.
* Fixed recursion issues with SDL_GetErrBuf()
  • Loading branch information
slouken committed Jul 11, 2013
1 parent bd6696d commit 31d2b67
Show file tree
Hide file tree
Showing 10 changed files with 334 additions and 319 deletions.
27 changes: 27 additions & 0 deletions include/SDL_atomic.h
Expand Up @@ -45,6 +45,7 @@
*
* There's also lots of good information here:
* http://www.1024cores.net/home/lock-free-algorithms
* http://preshing.com/
*
* These operations may or may not actually be implemented using
* processor specific atomic operations. When possible they are
Expand Down Expand Up @@ -135,6 +136,32 @@ void _ReadWriteBarrier(void);
{ SDL_SpinLock _tmp = 0; SDL_AtomicLock(&_tmp); SDL_AtomicUnlock(&_tmp); }
#endif

/**
* Memory barriers are designed to prevent reads and writes from being
* reordered by the compiler and being seen out of order on multi-core CPUs.
*
* A typical pattern would be for thread A to write some data and a flag,
* and for thread B to read the flag and get the data. In this case you
* would insert a release barrier between writing the data and the flag,
* guaranteeing that the data write completes no later than the flag is
* written, and you would insert an acquire barrier between reading the
* flag and reading the data, to ensure that all the reads associated
* with the flag have completed.
*
* In this pattern you should always see a release barrier paired with
* an acquire barrier and you should gate the data reads/writes with a
* single flag variable.
*
* For more information on these semantics, take a look at the blog post:
* http://preshing.com/20120913/acquire-and-release-semantics
*/
/* FIXME: This is correct for x86 and x64 but not other CPUs
For PPC we need the lwsync instruction, and on ARM some variant of dmb
*/
#define SDL_MemoryBarrierRelease() SDL_CompilerBarrier()
#define SDL_MemoryBarrierAcquire() SDL_CompilerBarrier()


/* Platform specific optimized versions of the atomic functions,
* you can disable these by defining SDL_DISABLE_ATOMIC_INLINE
*/
Expand Down
7 changes: 4 additions & 3 deletions include/SDL_thread.h
Expand Up @@ -48,8 +48,8 @@ typedef struct SDL_Thread SDL_Thread;
/* The SDL thread ID */
typedef unsigned long SDL_threadID;

/* Thread local storage ID */
typedef int SDL_TLSID;
/* Thread local storage ID, 0 is the invalid ID */
typedef unsigned SDL_TLSID;

/* The SDL thread priority
*
Expand Down Expand Up @@ -219,13 +219,14 @@ extern DECLSPEC void * SDLCALL SDL_TLSGet(SDL_TLSID id);
*
* \param id The thread local storage ID
* \param value The value to associate with the ID for the current thread
* \param destructor A function called when the thread exits, to free the value.
*
* \return 0 on success, -1 on error
*
* \sa SDL_TLSCreate()
* \sa SDL_TLSGet()
*/
extern DECLSPEC int SDLCALL SDL_TLSSet(SDL_TLSID id, const void *value);
extern DECLSPEC int SDLCALL SDL_TLSSet(SDL_TLSID id, const void *value, void (*destructor)(void*));


/* Ends C function definitions when using C++ */
Expand Down
6 changes: 6 additions & 0 deletions src/thread/SDL_systhread.h
Expand Up @@ -50,6 +50,12 @@ extern int SDL_SYS_SetThreadPriority(SDL_ThreadPriority priority);
*/
extern void SDL_SYS_WaitThread(SDL_Thread * thread);

/* Get the thread local storage for this thread */
extern SDL_TLSData *SDL_SYS_GetTLSData();

/* Set the thread local storage for this thread */
extern int SDL_SYS_SetTLSData(SDL_TLSData *data);

#endif /* _SDL_systhread_h */

/* vi: set ts=4 sw=4 expandtab: */
202 changes: 192 additions & 10 deletions src/thread/SDL_thread.c
Expand Up @@ -28,38 +28,219 @@
#include "../SDL_error_c.h"


SDL_TLSID
SDL_TLSCreate()
{
static SDL_atomic_t SDL_tls_id;
return SDL_AtomicIncRef(&SDL_tls_id)+1;
}

void *
SDL_TLSGet(SDL_TLSID id)
{
SDL_TLSData *storage;

storage = SDL_SYS_GetTLSData();
if (!storage || id == 0 || id > storage->limit) {
return NULL;
}
return storage->array[id-1].data;
}

int
SDL_TLSSet(SDL_TLSID id, const void *value, void (*destructor)(void *))
{
SDL_TLSData *storage;

if (id == 0) {
return SDL_InvalidParamError("id");
}

storage = SDL_SYS_GetTLSData();
if (!storage || id > storage->limit) {
int i, oldlimit, newlimit;

oldlimit = storage ? storage->limit : 0;
newlimit = (id + TLS_ALLOC_CHUNKSIZE);
storage = (SDL_TLSData *)SDL_realloc(storage, sizeof(*storage)+(newlimit-1)*sizeof(storage->array[0]));
if (!storage) {
return SDL_OutOfMemory();
}
storage->limit = newlimit;
for (i = oldlimit; i < newlimit; ++i) {
storage->array[i].data = NULL;
storage->array[i].destructor = NULL;
}
if (SDL_SYS_SetTLSData(storage) != 0) {
return -1;
}
}

storage->array[id-1].data = SDL_const_cast(void*, value);
storage->array[id-1].destructor = destructor;
return 0;
}

static void
SDL_TLSCleanup()
{
SDL_TLSData *storage;

storage = SDL_SYS_GetTLSData();
if (storage) {
int i;
for (i = 0; i < storage->limit; ++i) {
if (storage->array[i].destructor) {
storage->array[i].destructor(storage->array[i].data);
}
}
SDL_SYS_SetTLSData(NULL);
SDL_free(storage);
}
}


/* This is a generic implementation of thread-local storage which doesn't
require additional OS support.
It is not especially efficient and doesn't clean up thread-local storage
as threads exit. If there is a real OS that doesn't support thread-local
storage this implementation should be improved to be production quality.
*/

typedef struct SDL_TLSEntry {
SDL_threadID thread;
SDL_TLSData *storage;
struct SDL_TLSEntry *next;
} SDL_TLSEntry;

static SDL_mutex *SDL_generic_TLS_mutex;
static SDL_TLSEntry *SDL_generic_TLS;


SDL_TLSData *
SDL_Generic_GetTLSData()
{
SDL_threadID thread = SDL_ThreadID();
SDL_TLSEntry *entry;
SDL_TLSData *storage = NULL;

if (!SDL_generic_TLS_mutex) {
static SDL_SpinLock tls_lock;
SDL_AtomicLock(&tls_lock);
if (!SDL_generic_TLS_mutex) {
SDL_mutex *mutex = SDL_CreateMutex();
SDL_MemoryBarrierRelease();
SDL_generic_TLS_mutex = mutex;
if (!SDL_generic_TLS_mutex) {
SDL_AtomicUnlock(&tls_lock);
return NULL;
}
}
SDL_AtomicUnlock(&tls_lock);
}

SDL_MemoryBarrierAcquire();
SDL_LockMutex(SDL_generic_TLS_mutex);
for (entry = SDL_generic_TLS; entry; entry = entry->next) {
if (entry->thread == thread) {
storage = entry->storage;
break;
}
}
SDL_UnlockMutex(SDL_generic_TLS_mutex);

return storage;
}

int
SDL_Generic_SetTLSData(SDL_TLSData *storage)
{
SDL_threadID thread = SDL_ThreadID();
SDL_TLSEntry *prev, *entry;

/* SDL_Generic_GetTLSData() is always called first, so we can assume SDL_generic_TLS_mutex */
SDL_LockMutex(SDL_generic_TLS_mutex);
prev = NULL;
for (entry = SDL_generic_TLS; entry; entry = entry->next) {
if (entry->thread == thread) {
if (storage) {
entry->storage = storage;
} else {
if (prev) {
prev->next = entry->next;
} else {
SDL_generic_TLS = entry->next;
}
SDL_free(entry);
}
break;
}
prev = entry;
}
if (!entry) {
entry = (SDL_TLSEntry *)SDL_malloc(sizeof(*entry));
if (entry) {
entry->thread = thread;
entry->storage = storage;
entry->next = SDL_generic_TLS;
SDL_generic_TLS = entry;
}
}
SDL_UnlockMutex(SDL_generic_TLS_mutex);

if (!entry) {
return SDL_OutOfMemory();
}
return 0;
}

/* Routine to get the thread-specific error variable */
SDL_error *
SDL_GetErrBuf(void)
{
static SDL_SpinLock spinlock;
static SDL_SpinLock tls_lock;
static SDL_bool tls_being_created;
static SDL_TLSID tls_errbuf;
static SDL_error SDL_global_errbuf;
const SDL_error *ALLOCATION_IN_PROGRESS = (SDL_error *)-1;
SDL_error *errbuf;

/* tls_being_created is there simply to prevent recursion if SDL_TLSCreate() fails.
It also means it's possible for another thread to also use SDL_global_errbuf,
but that's very unlikely and hopefully won't cause issues.
*/
if (!tls_errbuf && !tls_being_created) {
SDL_AtomicLock(&spinlock);
SDL_AtomicLock(&tls_lock);
if (!tls_errbuf) {
/* SDL_TLSCreate() could fail and call SDL_SetError() */
SDL_TLSID slot;
tls_being_created = SDL_TRUE;
tls_errbuf = SDL_TLSCreate();
slot = SDL_TLSCreate();
tls_being_created = SDL_FALSE;
SDL_MemoryBarrierRelease();
tls_errbuf = slot;
}
SDL_AtomicUnlock(&spinlock);
SDL_AtomicUnlock(&tls_lock);
}
if (!tls_errbuf) {
return &SDL_global_errbuf;
}

errbuf = SDL_TLSGet(tls_errbuf);
SDL_MemoryBarrierAcquire();
errbuf = (SDL_error *)SDL_TLSGet(tls_errbuf);
if (errbuf == ALLOCATION_IN_PROGRESS) {
return &SDL_global_errbuf;
}
if (!errbuf) {
/* Mark that we're in the middle of allocating our buffer */
SDL_TLSSet(tls_errbuf, ALLOCATION_IN_PROGRESS, NULL);
errbuf = (SDL_error *)SDL_malloc(sizeof(*errbuf));
if (!errbuf) {
SDL_TLSSet(tls_errbuf, NULL, NULL);
return &SDL_global_errbuf;
}
SDL_zerop(errbuf);
SDL_TLSSet(tls_errbuf, errbuf);
SDL_TLSSet(tls_errbuf, errbuf, SDL_free);
}
return errbuf;
}
Expand All @@ -82,9 +263,7 @@ SDL_RunThread(void *data)
void *userdata = args->data;
int *statusloc = &args->info->status;

/* Perform any system-dependent setup
- this function cannot fail, and cannot use SDL_SetError()
*/
/* Perform any system-dependent setup - this function may not fail */
SDL_SYS_SetupThread(args->info->name);

/* Get the thread id */
Expand All @@ -95,6 +274,9 @@ SDL_RunThread(void *data)

/* Run the function */
*statusloc = userfunc(userdata);

/* Clean up thread-local storage */
SDL_TLSCleanup();
}

#ifdef SDL_PASSED_BEGINTHREAD_ENDTHREAD
Expand Down
24 changes: 24 additions & 0 deletions src/thread/SDL_thread_c.h
Expand Up @@ -56,6 +56,30 @@ struct SDL_Thread
/* This is the function called to run a thread */
extern void SDL_RunThread(void *data);

/* This is the system-independent thread local storage structure */
typedef struct {
int limit;
struct {
void *data;
void (*destructor)(void*);
} array[1];
} SDL_TLSData;

/* This is how many TLS entries we allocate at once */
#define TLS_ALLOC_CHUNKSIZE 4

/* Get cross-platform, slow, thread local storage for this thread.
This is only intended as a fallback if getting real thread-local
storage fails or isn't supported on this platform.
*/
extern SDL_TLSData *SDL_Generic_GetTLSData();

/* Set cross-platform, slow, thread local storage for this thread.
This is only intended as a fallback if getting real thread-local
storage fails or isn't supported on this platform.
*/
extern int SDL_Generic_SetTLSData(SDL_TLSData *data);

#endif /* _SDL_thread_c_h */

/* vi: set ts=4 sw=4 expandtab: */

0 comments on commit 31d2b67

Please sign in to comment.