Added release/acquire memory barriers to the atomic API
authorSam Lantinga <slouken@libsdl.org>
Wed, 10 Jul 2013 18:31:17 -0700
changeset 7393358696c354a8
parent 7392 7e32fcb41b44
child 7394 38dc4961ab15
Added release/acquire memory barriers to the atomic API
* Added a destructor to clean up TLS memory at thread shutdown
* Refactored the TLS code to have platform independent code and a small platform dependent core with a fallback to generic code if platform dependent functions fail.
* Fixed recursion issues with SDL_GetErrBuf()
include/SDL_atomic.h
include/SDL_thread.h
src/thread/SDL_systhread.h
src/thread/SDL_thread.c
src/thread/SDL_thread_c.h
src/thread/beos/SDL_systls.c
src/thread/generic/SDL_systls.c
src/thread/pthread/SDL_systls.c
src/thread/windows/SDL_systls.c
test/testthread.c
     1.1 --- a/include/SDL_atomic.h	Wed Jul 10 02:37:57 2013 -0700
     1.2 +++ b/include/SDL_atomic.h	Wed Jul 10 18:31:17 2013 -0700
     1.3 @@ -45,6 +45,7 @@
     1.4   *
     1.5   * There's also lots of good information here:
     1.6   * http://www.1024cores.net/home/lock-free-algorithms
     1.7 + * http://preshing.com/
     1.8   *
     1.9   * These operations may or may not actually be implemented using
    1.10   * processor specific atomic operations. When possible they are
    1.11 @@ -135,6 +136,32 @@
    1.12  { SDL_SpinLock _tmp = 0; SDL_AtomicLock(&_tmp); SDL_AtomicUnlock(&_tmp); }
    1.13  #endif
    1.14  
    1.15 +/**
    1.16 + * Memory barriers are designed to prevent reads and writes from being
    1.17 + * reordered by the compiler and being seen out of order on multi-core CPUs.
    1.18 + *
    1.19 + * A typical pattern would be for thread A to write some data and a flag,
    1.20 + * and for thread B to read the flag and get the data. In this case you
    1.21 + * would insert a release barrier between writing the data and the flag,
    1.22 + * guaranteeing that the data write completes no later than the flag is
    1.23 + * written, and you would insert an acquire barrier between reading the
    1.24 + * flag and reading the data, to ensure that all the reads associated
    1.25 + * with the flag have completed.
    1.26 + *
    1.27 + * In this pattern you should always see a release barrier paired with
    1.28 + * an acquire barrier and you should gate the data reads/writes with a
    1.29 + * single flag variable.
    1.30 + *
    1.31 + * For more information on these semantics, take a look at the blog post:
    1.32 + * http://preshing.com/20120913/acquire-and-release-semantics
    1.33 + */
    1.34 +/* FIXME: This is correct for x86 and x64 but not other CPUs
    1.35 +   For PPC we need the lwsync instruction, and on ARM some variant of dmb
    1.36 + */
    1.37 +#define SDL_MemoryBarrierRelease()  SDL_CompilerBarrier()
    1.38 +#define SDL_MemoryBarrierAcquire()  SDL_CompilerBarrier()
    1.39 +
    1.40 +
    1.41  /* Platform specific optimized versions of the atomic functions,
    1.42   * you can disable these by defining SDL_DISABLE_ATOMIC_INLINE
    1.43   */
     2.1 --- a/include/SDL_thread.h	Wed Jul 10 02:37:57 2013 -0700
     2.2 +++ b/include/SDL_thread.h	Wed Jul 10 18:31:17 2013 -0700
     2.3 @@ -48,8 +48,8 @@
     2.4  /* The SDL thread ID */
     2.5  typedef unsigned long SDL_threadID;
     2.6  
     2.7 -/* Thread local storage ID */
     2.8 -typedef int SDL_TLSID;
     2.9 +/* Thread local storage ID, 0 is the invalid ID */
    2.10 +typedef unsigned SDL_TLSID;
    2.11  
    2.12  /* The SDL thread priority
    2.13   *
    2.14 @@ -219,13 +219,14 @@
    2.15   *
    2.16   *  \param id The thread local storage ID
    2.17   *  \param value The value to associate with the ID for the current thread
    2.18 + *  \param destructor A function called when the thread exits, to free the value.
    2.19   *
    2.20   *  \return 0 on success, -1 on error
    2.21   *
    2.22   *  \sa SDL_TLSCreate()
    2.23   *  \sa SDL_TLSGet()
    2.24   */
    2.25 -extern DECLSPEC int SDLCALL SDL_TLSSet(SDL_TLSID id, const void *value);
    2.26 +extern DECLSPEC int SDLCALL SDL_TLSSet(SDL_TLSID id, const void *value, void (*destructor)(void*));
    2.27  
    2.28  
    2.29  /* Ends C function definitions when using C++ */
     3.1 --- a/src/thread/SDL_systhread.h	Wed Jul 10 02:37:57 2013 -0700
     3.2 +++ b/src/thread/SDL_systhread.h	Wed Jul 10 18:31:17 2013 -0700
     3.3 @@ -50,6 +50,12 @@
     3.4   */
     3.5  extern void SDL_SYS_WaitThread(SDL_Thread * thread);
     3.6  
     3.7 +/* Get the thread local storage for this thread */
     3.8 +extern SDL_TLSData *SDL_SYS_GetTLSData();
     3.9 +
    3.10 +/* Set the thread local storage for this thread */
    3.11 +extern int SDL_SYS_SetTLSData(SDL_TLSData *data);
    3.12 +
    3.13  #endif /* _SDL_systhread_h */
    3.14  
    3.15  /* vi: set ts=4 sw=4 expandtab: */
     4.1 --- a/src/thread/SDL_thread.c	Wed Jul 10 02:37:57 2013 -0700
     4.2 +++ b/src/thread/SDL_thread.c	Wed Jul 10 18:31:17 2013 -0700
     4.3 @@ -28,38 +28,219 @@
     4.4  #include "../SDL_error_c.h"
     4.5  
     4.6  
     4.7 +SDL_TLSID
     4.8 +SDL_TLSCreate()
     4.9 +{
    4.10 +    static SDL_atomic_t SDL_tls_id;
    4.11 +    return SDL_AtomicIncRef(&SDL_tls_id)+1;
    4.12 +}
    4.13 +
    4.14 +void *
    4.15 +SDL_TLSGet(SDL_TLSID id)
    4.16 +{
    4.17 +    SDL_TLSData *storage;
    4.18 +
    4.19 +    storage = SDL_SYS_GetTLSData();
    4.20 +    if (!storage || id == 0 || id > storage->limit) {
    4.21 +        return NULL;
    4.22 +    }
    4.23 +    return storage->array[id-1].data;
    4.24 +}
    4.25 +
    4.26 +int
    4.27 +SDL_TLSSet(SDL_TLSID id, const void *value, void (*destructor)(void *))
    4.28 +{
    4.29 +    SDL_TLSData *storage;
    4.30 +
    4.31 +    if (id == 0) {
    4.32 +        return SDL_InvalidParamError("id");
    4.33 +    }
    4.34 +
    4.35 +    storage = SDL_SYS_GetTLSData();
    4.36 +    if (!storage || id > storage->limit) {
    4.37 +        int i, oldlimit, newlimit;
    4.38 +
    4.39 +        oldlimit = storage ? storage->limit : 0;
    4.40 +        newlimit = (id + TLS_ALLOC_CHUNKSIZE);
    4.41 +        storage = (SDL_TLSData *)SDL_realloc(storage, sizeof(*storage)+(newlimit-1)*sizeof(storage->array[0]));
    4.42 +        if (!storage) {
    4.43 +            return SDL_OutOfMemory();
    4.44 +        }
    4.45 +        storage->limit = newlimit;
    4.46 +        for (i = oldlimit; i < newlimit; ++i) {
    4.47 +            storage->array[i].data = NULL;
    4.48 +            storage->array[i].destructor = NULL;
    4.49 +        }
    4.50 +        if (SDL_SYS_SetTLSData(storage) != 0) {
    4.51 +            return -1;
    4.52 +        }
    4.53 +    }
    4.54 +
    4.55 +    storage->array[id-1].data = SDL_const_cast(void*, value);
    4.56 +    storage->array[id-1].destructor = destructor;
    4.57 +    return 0;
    4.58 +}
    4.59 +
    4.60 +static void
    4.61 +SDL_TLSCleanup()
    4.62 +{
    4.63 +    SDL_TLSData *storage;
    4.64 +
    4.65 +    storage = SDL_SYS_GetTLSData();
    4.66 +    if (storage) {
    4.67 +        int i;
    4.68 +        for (i = 0; i < storage->limit; ++i) {
    4.69 +            if (storage->array[i].destructor) {
    4.70 +                storage->array[i].destructor(storage->array[i].data);
    4.71 +            }
    4.72 +        }
    4.73 +        SDL_SYS_SetTLSData(NULL);
    4.74 +        SDL_free(storage);
    4.75 +    }
    4.76 +}
    4.77 +
    4.78 +
    4.79 +/* This is a generic implementation of thread-local storage which doesn't
    4.80 +   require additional OS support.
    4.81 +
    4.82 +   It is not especially efficient and doesn't clean up thread-local storage
    4.83 +   as threads exit.  If there is a real OS that doesn't support thread-local
    4.84 +   storage this implementation should be improved to be production quality.
    4.85 +*/
    4.86 +
    4.87 +typedef struct SDL_TLSEntry {
    4.88 +    SDL_threadID thread;
    4.89 +    SDL_TLSData *storage;
    4.90 +    struct SDL_TLSEntry *next;
    4.91 +} SDL_TLSEntry;
    4.92 +
    4.93 +static SDL_mutex *SDL_generic_TLS_mutex;
    4.94 +static SDL_TLSEntry *SDL_generic_TLS;
    4.95 +
    4.96 +
    4.97 +SDL_TLSData *
    4.98 +SDL_Generic_GetTLSData()
    4.99 +{
   4.100 +    SDL_threadID thread = SDL_ThreadID();
   4.101 +    SDL_TLSEntry *entry;
   4.102 +    SDL_TLSData *storage = NULL;
   4.103 +
   4.104 +    if (!SDL_generic_TLS_mutex) {
   4.105 +        static SDL_SpinLock tls_lock;
   4.106 +        SDL_AtomicLock(&tls_lock);
   4.107 +        if (!SDL_generic_TLS_mutex) {
   4.108 +            SDL_mutex *mutex = SDL_CreateMutex();
   4.109 +            SDL_MemoryBarrierRelease();
   4.110 +            SDL_generic_TLS_mutex = mutex;
   4.111 +            if (!SDL_generic_TLS_mutex) {
   4.112 +                SDL_AtomicUnlock(&tls_lock);
   4.113 +                return NULL;
   4.114 +            }
   4.115 +        }
   4.116 +        SDL_AtomicUnlock(&tls_lock);
   4.117 +    }
   4.118 +
   4.119 +    SDL_MemoryBarrierAcquire();
   4.120 +    SDL_LockMutex(SDL_generic_TLS_mutex);
   4.121 +    for (entry = SDL_generic_TLS; entry; entry = entry->next) {
   4.122 +        if (entry->thread == thread) {
   4.123 +            storage = entry->storage;
   4.124 +            break;
   4.125 +        }
   4.126 +    }
   4.127 +    SDL_UnlockMutex(SDL_generic_TLS_mutex);
   4.128 +
   4.129 +    return storage;
   4.130 +}
   4.131 +
   4.132 +int
   4.133 +SDL_Generic_SetTLSData(SDL_TLSData *storage)
   4.134 +{
   4.135 +    SDL_threadID thread = SDL_ThreadID();
   4.136 +    SDL_TLSEntry *prev, *entry;
   4.137 +
   4.138 +    /* SDL_Generic_GetTLSData() is always called first, so we can assume SDL_generic_TLS_mutex */
   4.139 +    SDL_LockMutex(SDL_generic_TLS_mutex);
   4.140 +    prev = NULL;
   4.141 +    for (entry = SDL_generic_TLS; entry; entry = entry->next) {
   4.142 +        if (entry->thread == thread) {
   4.143 +            if (storage) {
   4.144 +                entry->storage = storage;
   4.145 +            } else {
   4.146 +                if (prev) {
   4.147 +                    prev->next = entry->next;
   4.148 +                } else {
   4.149 +                    SDL_generic_TLS = entry->next;
   4.150 +                }
   4.151 +                SDL_free(entry);
   4.152 +            }
   4.153 +            break;
   4.154 +        }
   4.155 +        prev = entry;
   4.156 +    }
   4.157 +    if (!entry) {
   4.158 +        entry = (SDL_TLSEntry *)SDL_malloc(sizeof(*entry));
   4.159 +        if (entry) {
   4.160 +            entry->thread = thread;
   4.161 +            entry->storage = storage;
   4.162 +            entry->next = SDL_generic_TLS;
   4.163 +            SDL_generic_TLS = entry;
   4.164 +        }
   4.165 +    }
   4.166 +    SDL_UnlockMutex(SDL_generic_TLS_mutex);
   4.167 +
   4.168 +    if (!entry) {
   4.169 +        return SDL_OutOfMemory();
   4.170 +    }
   4.171 +    return 0;
   4.172 +}
   4.173 +
   4.174  /* Routine to get the thread-specific error variable */
   4.175  SDL_error *
   4.176  SDL_GetErrBuf(void)
   4.177  {
   4.178 -    static SDL_SpinLock spinlock;
   4.179 +    static SDL_SpinLock tls_lock;
   4.180      static SDL_bool tls_being_created;
   4.181      static SDL_TLSID tls_errbuf;
   4.182      static SDL_error SDL_global_errbuf;
   4.183 +    const SDL_error *ALLOCATION_IN_PROGRESS = (SDL_error *)-1;
   4.184      SDL_error *errbuf;
   4.185  
   4.186 +    /* tls_being_created is there simply to prevent recursion if SDL_TLSCreate() fails.
   4.187 +       It also means it's possible for another thread to also use SDL_global_errbuf,
   4.188 +       but that's very unlikely and hopefully won't cause issues.
   4.189 +     */
   4.190      if (!tls_errbuf && !tls_being_created) {
   4.191 -        SDL_AtomicLock(&spinlock);
   4.192 +        SDL_AtomicLock(&tls_lock);
   4.193          if (!tls_errbuf) {
   4.194 -            /* SDL_TLSCreate() could fail and call SDL_SetError() */
   4.195 +            SDL_TLSID slot;
   4.196              tls_being_created = SDL_TRUE;
   4.197 -            tls_errbuf = SDL_TLSCreate();
   4.198 +            slot = SDL_TLSCreate();
   4.199              tls_being_created = SDL_FALSE;
   4.200 +            SDL_MemoryBarrierRelease();
   4.201 +            tls_errbuf = slot;
   4.202          }
   4.203 -        SDL_AtomicUnlock(&spinlock);
   4.204 +        SDL_AtomicUnlock(&tls_lock);
   4.205      }
   4.206      if (!tls_errbuf) {
   4.207          return &SDL_global_errbuf;
   4.208      }
   4.209  
   4.210 -    errbuf = SDL_TLSGet(tls_errbuf);
   4.211 +    SDL_MemoryBarrierAcquire();
   4.212 +    errbuf = (SDL_error *)SDL_TLSGet(tls_errbuf);
   4.213 +    if (errbuf == ALLOCATION_IN_PROGRESS) {
   4.214 +        return &SDL_global_errbuf;
   4.215 +    }
   4.216      if (!errbuf) {
   4.217 +        /* Mark that we're in the middle of allocating our buffer */
   4.218 +        SDL_TLSSet(tls_errbuf, ALLOCATION_IN_PROGRESS, NULL);
   4.219          errbuf = (SDL_error *)SDL_malloc(sizeof(*errbuf));
   4.220          if (!errbuf) {
   4.221 +            SDL_TLSSet(tls_errbuf, NULL, NULL);
   4.222              return &SDL_global_errbuf;
   4.223          }
   4.224          SDL_zerop(errbuf);
   4.225 -        SDL_TLSSet(tls_errbuf, errbuf);
   4.226 +        SDL_TLSSet(tls_errbuf, errbuf, SDL_free);
   4.227      }
   4.228      return errbuf;
   4.229  }
   4.230 @@ -82,9 +263,7 @@
   4.231      void *userdata = args->data;
   4.232      int *statusloc = &args->info->status;
   4.233  
   4.234 -    /* Perform any system-dependent setup
   4.235 -       - this function cannot fail, and cannot use SDL_SetError()
   4.236 -     */
   4.237 +    /* Perform any system-dependent setup - this function may not fail */
   4.238      SDL_SYS_SetupThread(args->info->name);
   4.239  
   4.240      /* Get the thread id */
   4.241 @@ -95,6 +274,9 @@
   4.242  
   4.243      /* Run the function */
   4.244      *statusloc = userfunc(userdata);
   4.245 +
   4.246 +    /* Clean up thread-local storage */
   4.247 +    SDL_TLSCleanup();
   4.248  }
   4.249  
   4.250  #ifdef SDL_PASSED_BEGINTHREAD_ENDTHREAD
     5.1 --- a/src/thread/SDL_thread_c.h	Wed Jul 10 02:37:57 2013 -0700
     5.2 +++ b/src/thread/SDL_thread_c.h	Wed Jul 10 18:31:17 2013 -0700
     5.3 @@ -56,6 +56,30 @@
     5.4  /* This is the function called to run a thread */
     5.5  extern void SDL_RunThread(void *data);
     5.6  
     5.7 +/* This is the system-independent thread local storage structure */
     5.8 +typedef struct {
     5.9 +    int limit;
    5.10 +    struct {
    5.11 +        void *data;
    5.12 +        void (*destructor)(void*);
    5.13 +    } array[1];
    5.14 +} SDL_TLSData;
    5.15 +
    5.16 +/* This is how many TLS entries we allocate at once */
    5.17 +#define TLS_ALLOC_CHUNKSIZE 4
    5.18 +
    5.19 +/* Get cross-platform, slow, thread local storage for this thread.
    5.20 +   This is only intended as a fallback if getting real thread-local
    5.21 +   storage fails or isn't supported on this platform.
    5.22 + */
    5.23 +extern SDL_TLSData *SDL_Generic_GetTLSData();
    5.24 +
    5.25 +/* Set cross-platform, slow, thread local storage for this thread.
    5.26 +   This is only intended as a fallback if getting real thread-local
    5.27 +   storage fails or isn't supported on this platform.
    5.28 + */
    5.29 +extern int SDL_Generic_SetTLSData(SDL_TLSData *data);
    5.30 +
    5.31  #endif /* _SDL_thread_c_h */
    5.32  
    5.33  /* vi: set ts=4 sw=4 expandtab: */
     6.1 --- a/src/thread/beos/SDL_systls.c	Wed Jul 10 02:37:57 2013 -0700
     6.2 +++ b/src/thread/beos/SDL_systls.c	Wed Jul 10 18:31:17 2013 -0700
     6.3 @@ -21,81 +21,47 @@
     6.4  
     6.5  #include "SDL_config.h"
     6.6  #include "SDL_thread.h"
     6.7 +#include "../SDL_thread_c.h"
     6.8  
     6.9  #if SDL_THREAD_BEOS
    6.10  
    6.11  #include <support/TLS.h>
    6.12  
    6.13  
    6.14 -#define TLS_ALLOC_CHUNKSIZE 8
    6.15 +static int32 thread_local_storage = B_NO_MEMORY;
    6.16 +static SDL_bool generic_local_storage = SDL_FALSE;
    6.17  
    6.18 -typedef struct {
    6.19 -    int limit;
    6.20 -    void *data[1];
    6.21 -} SDL_TLSData;
    6.22 -
    6.23 -static SDL_SpinLock tls_lock;
    6.24 -static int32 thread_local_storage = B_NO_MEMORY;
    6.25 -static SDL_atomic_t tls_id;
    6.26 -
    6.27 -
    6.28 -SDL_TLSID
    6.29 -SDL_TLSCreate()
    6.30 +SDL_TLSData *
    6.31 +SDL_SYS_GetTLSData()
    6.32  {
    6.33 -    if (thread_local_storage == B_NO_MEMORY) {
    6.34 -        SDL_AtomicLock(&tls_lock);
    6.35 -        if (thread_local_storage == B_NO_MEMORY) {
    6.36 -            thread_local_storage = tls_allocate();
    6.37 -            if (thread_local_storage == B_NO_MEMORY) {
    6.38 -                SDL_SetError("tls_allocate() failed");
    6.39 -                SDL_AtomicUnlock(&tls_lock);
    6.40 -                return 0;
    6.41 +    if (thread_local_storage == B_NO_MEMORY && !generic_local_storage) {
    6.42 +        static SDL_SpinLock lock;
    6.43 +        SDL_AtomicLock(&lock);
    6.44 +        if (thread_local_storage == B_NO_MEMORY && !generic_local_storage) {
    6.45 +            int32 storage = tls_allocate();
    6.46 +            if (storage != B_NO_MEMORY) {
    6.47 +                SDL_MemoryBarrierRelease();
    6.48 +                thread_local_storage = storage;
    6.49 +            } else {
    6.50 +                generic_local_storage = SDL_TRUE;
    6.51              }
    6.52          }
    6.53 -        SDL_AtomicUnlock(&tls_lock);
    6.54 +        SDL_AtomicUnlock(&lock);
    6.55      }
    6.56 -    return SDL_AtomicIncRef(&tls_id)+1;
    6.57 -}
    6.58 -
    6.59 -void *
    6.60 -SDL_TLSGet(SDL_TLSID id)
    6.61 -{
    6.62 -    SDL_TLSData *data;
    6.63 -
    6.64 -    data = (SDL_TLSData *)tls_get(thread_local_storage);
    6.65 -    if (!data || id <= 0 || id > data->limit) {
    6.66 -        return NULL;
    6.67 +    if (generic_local_storage) {
    6.68 +        return SDL_Generic_GetTLSData();
    6.69      }
    6.70 -    return data->data[id-1];
    6.71 +    SDL_MemoryBarrierAcquire();
    6.72 +    return (SDL_TLSData *)tls_get(thread_local_storage);
    6.73  }
    6.74  
    6.75  int
    6.76 -SDL_TLSSet(SDL_TLSID id, const void *value)
    6.77 +SDL_SYS_SetTLSData(SDL_TLSData *data)
    6.78  {
    6.79 -    SDL_TLSData *data;
    6.80 -
    6.81 -    if (thread_local_storage == B_NO_MEMORY || id <= 0) {
    6.82 -        return SDL_InvalidParamError(id);
    6.83 +    if (generic_local_storage) {
    6.84 +        return SDL_Generic_SetTLSData(data);
    6.85      }
    6.86 -
    6.87 -    data = (SDL_TLSData *)tls_get(thread_local_storage);
    6.88 -    if (!data || id > data->limit) {
    6.89 -        int i, oldlimit, newlimit;
    6.90 -
    6.91 -        oldlimit = data ? data->limit : 0;
    6.92 -        newlimit = (id + TLS_ALLOC_CHUNKSIZE);
    6.93 -        data = (SDL_TLSData *)SDL_realloc(data, sizeof(*data)+(newlimit-1)*sizeof(void*));
    6.94 -        if (!data) {
    6.95 -            return SDL_OutOfMemory();
    6.96 -        }
    6.97 -        data->limit = newlimit;
    6.98 -        for (i = oldlimit; i < newlimit; ++i) {
    6.99 -            data->data[i] = NULL;
   6.100 -        }
   6.101 -        tls_set(thread_local_storage, data);
   6.102 -    }
   6.103 -
   6.104 -    data->data[id-1] = SDL_const_cast(void*, value);
   6.105 +    tls_set(thread_local_storage, data);
   6.106      return 0;
   6.107  }
   6.108  
     7.1 --- a/src/thread/generic/SDL_systls.c	Wed Jul 10 02:37:57 2013 -0700
     7.2 +++ b/src/thread/generic/SDL_systls.c	Wed Jul 10 18:31:17 2013 -0700
     7.3 @@ -20,144 +20,19 @@
     7.4  */
     7.5  
     7.6  #include "SDL_config.h"
     7.7 -#include "SDL_thread.h"
     7.8 +#include "../SDL_thread_c.h"
     7.9  
    7.10 -/* This is a generic implementation of thread-local storage which doesn't
    7.11 -   require additional OS support.
    7.12  
    7.13 -   It is not especially efficient and doesn't clean up thread-local storage
    7.14 -   as threads exit.  If there is a real OS that doesn't support thread-local
    7.15 -   storage this implementation should be improved to be production quality.
    7.16 -*/
    7.17 -
    7.18 -#define TLS_ALLOC_CHUNKSIZE 8
    7.19 -
    7.20 -typedef struct {
    7.21 -    int limit;
    7.22 -    void *data[1];
    7.23 -} SDL_TLSData;
    7.24 -
    7.25 -typedef struct SDL_TLSEntry {
    7.26 -    SDL_threadID thread;
    7.27 -    SDL_TLSData *data;
    7.28 -    struct SDL_TLSEntry *next;
    7.29 -} SDL_TLSEntry;
    7.30 -
    7.31 -static SDL_SpinLock tls_lock;
    7.32 -static SDL_mutex *tls_mutex;
    7.33 -static SDL_TLSEntry *thread_local_storage;
    7.34 -static SDL_atomic_t tls_id;
    7.35 -
    7.36 -
    7.37 -static SDL_TLSData *GetTLSData()
    7.38 +SDL_TLSData *
    7.39 +SDL_SYS_GetTLSData()
    7.40  {
    7.41 -    SDL_threadID thread = SDL_ThreadID();
    7.42 -    SDL_TLSEntry *entry;
    7.43 -    SDL_TLSData *data = NULL;
    7.44 -
    7.45 -    if (!tls_mutex) {
    7.46 -        SDL_AtomicLock(&tls_lock);
    7.47 -        if (!tls_mutex) {
    7.48 -            tls_mutex = SDL_CreateMutex();
    7.49 -            if (!tls_mutex) {
    7.50 -                SDL_AtomicUnlock(&tls_lock);
    7.51 -                return NULL;
    7.52 -            }
    7.53 -        }
    7.54 -        SDL_AtomicUnlock(&tls_lock);
    7.55 -    }
    7.56 -
    7.57 -    SDL_LockMutex(tls_mutex);
    7.58 -    for (entry = thread_local_storage; entry; entry = entry->next) {
    7.59 -        if (entry->thread == thread) {
    7.60 -            data = entry->data;
    7.61 -            break;
    7.62 -        }
    7.63 -    }
    7.64 -    SDL_UnlockMutex(tls_mutex);
    7.65 -
    7.66 -    return data;
    7.67 -}
    7.68 -
    7.69 -static int SetTLSData(SDL_TLSData *data)
    7.70 -{
    7.71 -    SDL_threadID thread = SDL_ThreadID();
    7.72 -    SDL_TLSEntry *entry;
    7.73 -
    7.74 -    /* GetTLSData() is always called first, so we can assume tls_mutex */
    7.75 -    SDL_LockMutex(tls_mutex);
    7.76 -    for (entry = thread_local_storage; entry; entry = entry->next) {
    7.77 -        if (entry->thread == thread) {
    7.78 -            entry->data = data;
    7.79 -            break;
    7.80 -        }
    7.81 -    }
    7.82 -    if (!entry) {
    7.83 -        entry = (SDL_TLSEntry *)SDL_malloc(sizeof(*entry));
    7.84 -        if (entry) {
    7.85 -            entry->thread = thread;
    7.86 -            entry->data = data;
    7.87 -            entry->next = thread_local_storage;
    7.88 -            thread_local_storage = entry;
    7.89 -        }
    7.90 -    }
    7.91 -    SDL_UnlockMutex(tls_mutex);
    7.92 -
    7.93 -    if (!entry) {
    7.94 -        return SDL_OutOfMemory();
    7.95 -    }
    7.96 -    return 0;
    7.97 -}
    7.98 -
    7.99 -
   7.100 -SDL_TLSID
   7.101 -SDL_TLSCreate()
   7.102 -{
   7.103 -    return SDL_AtomicIncRef(&tls_id)+1;
   7.104 -}
   7.105 -
   7.106 -void *
   7.107 -SDL_TLSGet(SDL_TLSID id)
   7.108 -{
   7.109 -    SDL_TLSData *data;
   7.110 -
   7.111 -    data = GetTLSData();
   7.112 -    if (!data || id <= 0 || id > data->limit) {
   7.113 -        return NULL;
   7.114 -    }
   7.115 -    return data->data[id-1];
   7.116 +    return SDL_Generic_GetTLSData();
   7.117  }
   7.118  
   7.119  int
   7.120 -SDL_TLSSet(SDL_TLSID id, const void *value)
   7.121 +SDL_SYS_SetTLSData(SDL_TLSData *data)
   7.122  {
   7.123 -    SDL_TLSData *data;
   7.124 -
   7.125 -    if (id <= 0) {
   7.126 -        return SDL_InvalidParamError(id);
   7.127 -    }
   7.128 -
   7.129 -    data = GetTLSData();
   7.130 -    if (!data || id > data->limit) {
   7.131 -        int i, oldlimit, newlimit;
   7.132 -
   7.133 -        oldlimit = data ? data->limit : 0;
   7.134 -        newlimit = (id + TLS_ALLOC_CHUNKSIZE);
   7.135 -        data = (SDL_TLSData *)SDL_realloc(data, sizeof(*data)+(newlimit-1)*sizeof(void*));
   7.136 -        if (!data) {
   7.137 -            return SDL_OutOfMemory();
   7.138 -        }
   7.139 -        data->limit = newlimit;
   7.140 -        for (i = oldlimit; i < newlimit; ++i) {
   7.141 -            data->data[i] = NULL;
   7.142 -        }
   7.143 -        if (SetTLSData(data) != 0) {
   7.144 -            return -1;
   7.145 -        }
   7.146 -    }
   7.147 -
   7.148 -    data->data[id-1] = SDL_const_cast(void*, value);
   7.149 -    return 0;
   7.150 +    return SDL_Generic_SetTLSData(data);
   7.151  }
   7.152  
   7.153  /* vi: set ts=4 sw=4 expandtab: */
     8.1 --- a/src/thread/pthread/SDL_systls.c	Wed Jul 10 02:37:57 2013 -0700
     8.2 +++ b/src/thread/pthread/SDL_systls.c	Wed Jul 10 18:31:17 2013 -0700
     8.3 @@ -18,83 +18,51 @@
     8.4       misrepresented as being the original software.
     8.5    3. This notice may not be removed or altered from any source distribution.
     8.6  */
     8.7 -
     8.8  #include "SDL_config.h"
     8.9  #include "SDL_thread.h"
    8.10 +#include "../SDL_thread_c.h"
    8.11  
    8.12  #include <pthread.h>
    8.13  
    8.14  
    8.15 -#define TLS_ALLOC_CHUNKSIZE 8
    8.16 +#define INVALID_PTHREAD_KEY ((pthread_key_t)-1)
    8.17  
    8.18 -typedef struct {
    8.19 -    int limit;
    8.20 -    void *data[1];
    8.21 -} SDL_TLSData;
    8.22 +static pthread_key_t thread_local_storage = INVALID_PTHREAD_KEY;
    8.23 +static SDL_bool generic_local_storage = SDL_FALSE;
    8.24  
    8.25 -static SDL_SpinLock tls_lock;
    8.26 -static pthread_key_t thread_local_storage;
    8.27 -static SDL_atomic_t tls_id;
    8.28 -
    8.29 -
    8.30 -SDL_TLSID
    8.31 -SDL_TLSCreate()
    8.32 +SDL_TLSData *
    8.33 +SDL_SYS_GetTLSData()
    8.34  {
    8.35 -    if (!thread_local_storage) {
    8.36 -        SDL_AtomicLock(&tls_lock);
    8.37 -        if (!thread_local_storage) {
    8.38 -            if (pthread_key_create(&thread_local_storage, NULL) != 0) {
    8.39 -                SDL_SetError("pthread_key_create() failed");
    8.40 -                SDL_AtomicUnlock(&tls_lock);
    8.41 -                return 0;
    8.42 +    if (thread_local_storage == INVALID_PTHREAD_KEY && !generic_local_storage) {
    8.43 +        static SDL_SpinLock lock;
    8.44 +        SDL_AtomicLock(&lock);
    8.45 +        if (thread_local_storage == INVALID_PTHREAD_KEY && !generic_local_storage) {
    8.46 +            pthread_key_t storage;
    8.47 +            if (pthread_key_create(&storage, NULL) == 0) {
    8.48 +                SDL_MemoryBarrierRelease();
    8.49 +                thread_local_storage = storage;
    8.50 +            } else {
    8.51 +                generic_local_storage = SDL_TRUE;
    8.52              }
    8.53          }
    8.54 -        SDL_AtomicUnlock(&tls_lock);
    8.55 +        SDL_AtomicUnlock(&lock);
    8.56      }
    8.57 -    return SDL_AtomicIncRef(&tls_id)+1;
    8.58 -}
    8.59 -
    8.60 -void *
    8.61 -SDL_TLSGet(SDL_TLSID id)
    8.62 -{
    8.63 -    SDL_TLSData *data;
    8.64 -
    8.65 -    data = (SDL_TLSData *)pthread_getspecific(thread_local_storage);
    8.66 -    if (!data || id <= 0 || id > data->limit) {
    8.67 -        return NULL;
    8.68 +    if (generic_local_storage) {
    8.69 +        return SDL_Generic_GetTLSData();
    8.70      }
    8.71 -    return data->data[id-1];
    8.72 +    SDL_MemoryBarrierAcquire();
    8.73 +    return (SDL_TLSData *)pthread_getspecific(thread_local_storage);
    8.74  }
    8.75  
    8.76  int
    8.77 -SDL_TLSSet(SDL_TLSID id, const void *value)
    8.78 +SDL_SYS_SetTLSData(SDL_TLSData *data)
    8.79  {
    8.80 -    SDL_TLSData *data;
    8.81 -
    8.82 -    if (!thread_local_storage || id <= 0) {
    8.83 -        return SDL_InvalidParamError(id);
    8.84 +    if (generic_local_storage) {
    8.85 +        return SDL_Generic_SetTLSData(data);
    8.86      }
    8.87 -
    8.88 -    data = (SDL_TLSData *)pthread_getspecific(thread_local_storage);
    8.89 -    if (!data || id > data->limit) {
    8.90 -        int i, oldlimit, newlimit;
    8.91 -
    8.92 -        oldlimit = data ? data->limit : 0;
    8.93 -        newlimit = (id + TLS_ALLOC_CHUNKSIZE);
    8.94 -        data = (SDL_TLSData *)SDL_realloc(data, sizeof(*data)+(newlimit-1)*sizeof(void*));
    8.95 -        if (!data) {
    8.96 -            return SDL_OutOfMemory();
    8.97 -        }
    8.98 -        data->limit = newlimit;
    8.99 -        for (i = oldlimit; i < newlimit; ++i) {
   8.100 -            data->data[i] = NULL;
   8.101 -        }
   8.102 -        if (pthread_setspecific(thread_local_storage, data) != 0) {
   8.103 -            return SDL_SetError("pthread_setspecific() failed");
   8.104 -        }
   8.105 +    if (pthread_setspecific(thread_local_storage, data) != 0) {
   8.106 +        return SDL_SetError("pthread_setspecific() failed");
   8.107      }
   8.108 -
   8.109 -    data->data[id-1] = SDL_const_cast(void*, value);
   8.110      return 0;
   8.111  }
   8.112  
     9.1 --- a/src/thread/windows/SDL_systls.c	Wed Jul 10 02:37:57 2013 -0700
     9.2 +++ b/src/thread/windows/SDL_systls.c	Wed Jul 10 18:31:17 2013 -0700
     9.3 @@ -21,83 +21,49 @@
     9.4  
     9.5  #include "SDL_config.h"
     9.6  #include "SDL_thread.h"
     9.7 +#include "../SDL_thread_c.h"
     9.8  
     9.9  #if SDL_THREAD_WINDOWS
    9.10  
    9.11  #include "../../core/windows/SDL_windows.h"
    9.12  
    9.13  
    9.14 -#define TLS_ALLOC_CHUNKSIZE 8
    9.15 +static DWORD thread_local_storage = TLS_OUT_OF_INDEXES;
    9.16 +static SDL_bool generic_local_storage = SDL_FALSE;
    9.17  
    9.18 -typedef struct {
    9.19 -    int limit;
    9.20 -    void *data[1];
    9.21 -} SDL_TLSData;
    9.22 -
    9.23 -static SDL_SpinLock tls_lock;
    9.24 -static DWORD thread_local_storage = TLS_OUT_OF_INDEXES;
    9.25 -static SDL_atomic_t tls_id;
    9.26 -
    9.27 -
    9.28 -SDL_TLSID
    9.29 -SDL_TLSCreate()
    9.30 +SDL_TLSData *
    9.31 +SDL_SYS_GetTLSData()
    9.32  {
    9.33 -    if (thread_local_storage == TLS_OUT_OF_INDEXES) {
    9.34 -        SDL_AtomicLock(&tls_lock);
    9.35 -        if (thread_local_storage == TLS_OUT_OF_INDEXES) {
    9.36 -            thread_local_storage = TlsAlloc();
    9.37 -            if (thread_local_storage == TLS_OUT_OF_INDEXES) {
    9.38 -                SDL_SetError("TlsAlloc() failed");
    9.39 -                SDL_AtomicUnlock(&tls_lock);
    9.40 -                return 0;
    9.41 +    if (thread_local_storage == TLS_OUT_OF_INDEXES && !generic_local_storage) {
    9.42 +        static SDL_SpinLock lock;
    9.43 +        SDL_AtomicLock(&lock);
    9.44 +        if (thread_local_storage == TLS_OUT_OF_INDEXES && !generic_local_storage) {
    9.45 +            DWORD storage = TlsAlloc();
    9.46 +            if (storage != TLS_OUT_OF_INDEXES) {
    9.47 +                SDL_MemoryBarrierRelease();
    9.48 +                thread_local_storage = storage;
    9.49 +            } else {
    9.50 +                generic_local_storage = SDL_TRUE;
    9.51              }
    9.52          }
    9.53 -        SDL_AtomicUnlock(&tls_lock);
    9.54 +        SDL_AtomicUnlock(&lock);
    9.55      }
    9.56 -    return SDL_AtomicIncRef(&tls_id)+1;
    9.57 -}
    9.58 -
    9.59 -void *
    9.60 -SDL_TLSGet(SDL_TLSID id)
    9.61 -{
    9.62 -    SDL_TLSData *data;
    9.63 -
    9.64 -    data = (SDL_TLSData *)TlsGetValue(thread_local_storage);
    9.65 -    if (!data || id <= 0 || id > data->limit) {
    9.66 -        return NULL;
    9.67 +    if (generic_local_storage) {
    9.68 +        return SDL_Generic_GetTLSData();
    9.69      }
    9.70 -    return data->data[id-1];
    9.71 +    SDL_MemoryBarrierAcquire();
    9.72 +    return (SDL_TLSData *)TlsGetValue(thread_local_storage);
    9.73  }
    9.74  
    9.75  int
    9.76 -SDL_TLSSet(SDL_TLSID id, const void *value)
    9.77 +SDL_SYS_SetTLSData(SDL_TLSData *data)
    9.78  {
    9.79 -    SDL_TLSData *data;
    9.80 -
    9.81 -    if (thread_local_storage == TLS_OUT_OF_INDEXES || id <= 0) {
    9.82 -        return SDL_InvalidParamError(id);
    9.83 +    if (generic_local_storage) {
    9.84 +        return SDL_Generic_SetTLSData(data);
    9.85      }
    9.86 -
    9.87 -    data = (SDL_TLSData *)TlsGetValue(thread_local_storage);
    9.88 -    if (!data || id > data->limit) {
    9.89 -        int i, oldlimit, newlimit;
    9.90 -
    9.91 -        oldlimit = data ? data->limit : 0;
    9.92 -        newlimit = (id + TLS_ALLOC_CHUNKSIZE);
    9.93 -        data = (SDL_TLSData *)SDL_realloc(data, sizeof(*data)+(newlimit-1)*sizeof(void*));
    9.94 -        if (!data) {
    9.95 -            return SDL_OutOfMemory();
    9.96 -        }
    9.97 -        data->limit = newlimit;
    9.98 -        for (i = oldlimit; i < newlimit; ++i) {
    9.99 -            data->data[i] = NULL;
   9.100 -        }
   9.101 -        if (!TlsSetValue(thread_local_storage, data)) {
   9.102 -            return SDL_SetError("TlsSetValue() failed");
   9.103 -        }
   9.104 +    if (!TlsSetValue(thread_local_storage, data)) {
   9.105 +        return SDL_SetError("TlsSetValue() failed");
   9.106      }
   9.107 -
   9.108 -    data->data[id-1] = SDL_const_cast(void*, value);
   9.109      return 0;
   9.110  }
   9.111  
    10.1 --- a/test/testthread.c	Wed Jul 10 02:37:57 2013 -0700
    10.2 +++ b/test/testthread.c	Wed Jul 10 18:31:17 2013 -0700
    10.3 @@ -33,7 +33,7 @@
    10.4  int SDLCALL
    10.5  ThreadFunc(void *data)
    10.6  {
    10.7 -    SDL_TLSSet(tls, "baby thread");
    10.8 +    SDL_TLSSet(tls, "baby thread", NULL);
    10.9      printf("Started thread %s: My thread id is %lu, thread data = %s\n",
   10.10             (char *) data, SDL_ThreadID(), (const char *)SDL_TLSGet(tls));
   10.11      while (alive) {
   10.12 @@ -66,7 +66,7 @@
   10.13  
   10.14      tls = SDL_TLSCreate();
   10.15      SDL_assert(tls);
   10.16 -    SDL_TLSSet(tls, "main thread");
   10.17 +    SDL_TLSSet(tls, "main thread", NULL);
   10.18      printf("Main thread data initially: %s\n", (const char *)SDL_TLSGet(tls));
   10.19  
   10.20      alive = 1;