/* * Copyright (c) 2008, 2009, Bjoern B. Brandenburg * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* Starvation-free, real-time-friendly reader-writer locks. * * This is an implementation of phase-fair reader-writer locks for i386. Readers are * only blocked by at most one reader and one writer phase. Writers gain access * in FIFO order. Both reader and writer paths require two atomic instructions * each (lock/xadd and lock/add). * * For details, see: * * B. Brandenburg and J. Anderson, "Reader-Writer Synchronization for * Shared-Memory Multiprocessor Real-Time Systems", Proceedings of the 21th * Euromicro Conference on Real-Time Systems (ECRTS 2009), * pp. 184-193. IEEE, July 2009. * * Tested on Linux with gcc 4.3.3. */ #ifndef PFRW_H #define PFRW_H typedef struct pf_rwlock{ uint32_t rin, rout, wout, win; } pf_rwlock_t; #define DEFINE_PF_RWLOCK(x) pf_rwlock_t x = \ { 0, 0, 0, 0 } #define PF_RW_LOCK_BIAS 0x100 #define PF_WRITER_BITS 0x0ff static inline void pf_read_lock(pf_rwlock_t *l) { unsigned int tmp = PF_RW_LOCK_BIAS, tmp2; asm volatile(" lock/xaddl %0, %2 \n" " testb %b0, %b0 \n" " jnz 1f \n" ".subsection 2 \n" "1:" " movb %2, %b1 \n" " cmpb %b1, %b0 \n" " jne 2f \n" " rep; nop \n" " jmp 1b \n" ".previous \n" "2:" : "+&q" (tmp), "=&q" (tmp2) : "m" (l->rin) : "memory", "cc"); } static inline int pf_read_trylock(pf_rwlock_t *l) { unsigned int tmp, was_locked; asm volatile(" movl %2, %1 \n" "1:" " testb %b1,%b1 \n" " jnz 2f \n" " movl %1, %0 \n" " addl %3, %0 \n" " lock/cmpxchgl %0,%2\n" /* implicit movl %2, %1 on failure */ " jnz 1b \n" "2:" : "=&q" (tmp), "=&a" (was_locked) : "m" (l->rin), "i" (PF_RW_LOCK_BIAS) : "memory", "cc"); return !(was_locked & PF_WRITER_BITS); } static inline void pf_read_unlock(pf_rwlock_t *l) { asm volatile("lock/addl %1, %0" : /* no output */ : "m" (l->rout), "i" (PF_RW_LOCK_BIAS) : "memory", "cc"); } static inline void pf_write_lock(pf_rwlock_t *l) { int tmp = 1; asm volatile(" lock/xaddl %0, %1 \n" " cmpl %0, %2 \n" " jne 1f \n" ".subsection 2 \n" "1:" " rep; nop \n" " cmpl %0, %2 \n" " je 2f \n" " jmp 1b \n" ".previous \n" "2:" " and $0xff, %0\n" " or $0x80, %0\n" " lock/xaddl %0, %3 \n" " movb $0, %b0 \n" " cmpl %0, %4 \n" " jne 3f \n" ".subsection 2 \n" "3:" " rep; nop \n" " cmpl %0, %4 \n" " je 4f \n" " jmp 3b \n" ".previous \n" "4:" : "+q"(tmp) : "m" (l->win), "m" (l->wout), "m" (l->rin), "m" (l->rout) : "memory", "cc"); } static inline int pf_write_trylock(pf_rwlock_t *l) { unsigned int tmp, tmp2, was_locked; /* Once a writer has incremented the writer ticket, it can't safely * back out in all situations. This limits the write_trylock() * operation. This approximation only works for CPU counts up to * 127. If more than 127 "fixup" operations are executed consecutively, * a slow reader may fail to discern consecutive writer phases (PHID is * 7 bits in this implementation). If a truly safe trylock() operation * is required, then a compact phase-fair RW lock should be used * instead or readers must execute a while (!read_trylock()); loop. The * following workaround is sufficient for our purposes. */ /* pre-check for reader first */ if (l->rin != l->rout) return 0; asm volatile(" movl %4, %0 \n" " movl %3, %1 \n" " cmpl %0, %1 \n" /* writer active? */ " jne 3f \n" " incl %0 \n" " lock/cmpxchgl %0,%3\n" /* writer arrived? */ " jnz 3f \n" " movl %1, %2 \n" " movl %6, %0 \n" " andl $0xff, %2\n" " movl %5, %1 \n" " or $0x80, %2\n" " cmpl %0, %1 \n" /* reader active? */ " jne 2f \n" " addl %2, %0 \n" " lock/cmpxchgl %0,%5\n" /* reader arrived? */ " jnz 2f \n" /* success */ " incl %1 \n" "1: \n" /* cleanup */ ".subsection 2 \n" "2:" /* finish write */ " incl %4 \n" "3:" /* return failure */ " xorl %1, %1 \n" " jmp 1b \n" ".previous" : "=&r" (tmp), /* 0 */ "=&a" (was_locked), /* 1 */ "=&r" (tmp2) /* 2 */ : "m" (l->win), /* 3 */ "m" (l->wout), /* 4 */ "m" (l->rin), /* 5 */ "m" (l->rout) /* 6 */ : "memory", "cc"); return was_locked; } static inline void pf_write_unlock(pf_rwlock_t *l) { unsigned int tmp; asm volatile("movl %2, %0 \n" "movb $0, %1 \n" "incl %0 \n" "movl %0, %2 " : "=&r" (tmp) : "m" (l->rin), "m" (l->wout) : "memory", "cc"); } #ifdef CONFIG_TRACK_LOCK_STATS typedef struct { struct pf_rwlock lock; unsigned long wcontended; unsigned long wuncontended; unsigned long rcontended; unsigned long runcontended; } rwlock_t; static inline void rw_init(rwlock_t *lock) { lock->lock.rin = 0; lock->lock.rout = 0; lock->lock.win = 0; lock->lock.wout = 0; lock->rcontended = 0; lock->runcontended = 0; lock->wcontended = 0; lock->wuncontended = 0; } static inline void rw_writelock(rwlock_t *lock) { if (pf_write_trylock(&lock->lock)) { /* got it without contention */ lock->wuncontended++; } else { /* nope, must wait */ pf_write_lock(&lock->lock); lock->wcontended++; } } static inline void rw_readlock(rwlock_t *lock) { if (pf_read_trylock(&lock->lock)) { /* got it without contention */ atomic_inc(&lock->runcontended); } else { /* nope, must wait */ atomic_inc(&lock->rcontended); pf_read_lock(&lock->lock); } } static inline void rw_writeunlock(rwlock_t *lock) { pf_write_unlock(&lock->lock); } static inline void rw_readunlock(rwlock_t *lock) { pf_read_unlock(&lock->lock); } #else typedef struct pf_rwlock rwlock_t; #define rw_init(x) do {(x)->rin = 0; (x)->rout = 0; (x)->wout = 0; (x)->win = 0;} while (0) #define rw_readlock(x) pf_read_lock(x) #define rw_readunlock(x) pf_read_unlock(x) #define rw_writelock(x) pf_write_lock(x) #define rw_writeunlock(x) pf_write_unlock(x) #endif #endif