include/linux/queuelock.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

#ifndef _UNC_QUEUELOCK_H_
#define _UNC_QUEUELOCK_H_
/**
* Queue lock
*
* This is an implementation of T. Anderson's queue lock.
* It strives to follow the normal Linux locking conventions
* as much as possible. The rules for acquiring a lock are:
*
*  1) The caller must ensure interrupts and preemptions are disabled.
* 
*  2) The caller _cannot_ recursively acquire the lock.
* 
*  3) The caller may not sleep while holding the lock. This is currently
*     not enforced, but it will not work.
*/

#include <linux/cache.h>
#include <asm/atomic.h>
#include <linux/smp.h>

typedef struct {
	/* pad the values being spun on to make sure
	   that they are cache local
	 */
	union {
		volatile enum {
			MUST_WAIT,
			HAS_LOCK
		} val;
		char 	padding[SMP_CACHE_BYTES];
	} slots[NR_CPUS];

	/* since spin_slot is not being spun on it can be
	 * in a shared cache line. next_slot will be evicted
	 * anyway on every attempt to acquire the lock.
	 */
	int		spin_slot[NR_CPUS];

	/* The next slot that will be available.
	 */
	atomic_t 	next_slot;
} queuelock_t;


static inline void queue_lock_init(queuelock_t *lock)
{
	int i;
	for (i = 0; i < NR_CPUS; i++) {
		lock->slots[i].val 	= MUST_WAIT;
		lock->spin_slot[i]	= i;
	}
	lock->slots[0].val 	= HAS_LOCK;
	atomic_set(&lock->next_slot, 0);
}


static inline void queue_lock(queuelock_t *lock)
{
	int me = smp_processor_id();
	volatile int* spin_var;
	/* Get slot to spin on. atomic_inc_return() returns the incremented
	 * value, so take one of again
	 */
	lock->spin_slot[me] = atomic_inc_return(&lock->next_slot) - 1;
	/* check for wrap-around
	 * This could probably optimized away if we ensure that NR_CPUS divides 
	 * INT_MAX...
	 */
	if (unlikely(lock->spin_slot[me] == NR_CPUS - 1))
		atomic_add(-NR_CPUS, &lock->next_slot);
	/* range limit*/
	lock->spin_slot[me] %= NR_CPUS;
	/* spin until you acquire the lock */
	spin_var = (int*) &lock->slots[lock->spin_slot[me]].val;
	while (*spin_var == MUST_WAIT)
		cpu_relax();		

	/* reset the lock */
	lock->slots[lock->spin_slot[me]].val = MUST_WAIT;
	barrier();
}


static inline void queue_unlock(queuelock_t *lock)
{
	int me = smp_processor_id();
	barrier();
	lock->slots[(lock->spin_slot[me] + 1) % NR_CPUS].val = HAS_LOCK;
}

#define queue_lock_irqsave(lock, flags) \
	do { local_irq_save(flags); queue_lock(lock); } while (0);

#define queue_unlock_irqrestore(lock, flags) \
	do { queue_unlock(lock); local_irq_restore(flags); } while (0);

#endif /*	_UNC_QUEUELOCK_H_	*/