diff options
Diffstat (limited to 'lib/percpu-refcount.c')
-rw-r--r-- | lib/percpu-refcount.c | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c new file mode 100644 index 000000000000..6f0ffd702a09 --- /dev/null +++ b/lib/percpu-refcount.c | |||
@@ -0,0 +1,128 @@ | |||
1 | #define pr_fmt(fmt) "%s: " fmt "\n", __func__ | ||
2 | |||
3 | #include <linux/kernel.h> | ||
4 | #include <linux/percpu-refcount.h> | ||
5 | |||
6 | /* | ||
7 | * Initially, a percpu refcount is just a set of percpu counters. Initially, we | ||
8 | * don't try to detect the ref hitting 0 - which means that get/put can just | ||
9 | * increment or decrement the local counter. Note that the counter on a | ||
10 | * particular cpu can (and will) wrap - this is fine, when we go to shutdown the | ||
11 | * percpu counters will all sum to the correct value | ||
12 | * | ||
13 | * (More precisely: because moduler arithmatic is commutative the sum of all the | ||
14 | * pcpu_count vars will be equal to what it would have been if all the gets and | ||
15 | * puts were done to a single integer, even if some of the percpu integers | ||
16 | * overflow or underflow). | ||
17 | * | ||
18 | * The real trick to implementing percpu refcounts is shutdown. We can't detect | ||
19 | * the ref hitting 0 on every put - this would require global synchronization | ||
20 | * and defeat the whole purpose of using percpu refs. | ||
21 | * | ||
22 | * What we do is require the user to keep track of the initial refcount; we know | ||
23 | * the ref can't hit 0 before the user drops the initial ref, so as long as we | ||
24 | * convert to non percpu mode before the initial ref is dropped everything | ||
25 | * works. | ||
26 | * | ||
27 | * Converting to non percpu mode is done with some RCUish stuff in | ||
28 | * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t | ||
29 | * can't hit 0 before we've added up all the percpu refs. | ||
30 | */ | ||
31 | |||
32 | #define PCPU_COUNT_BIAS (1U << 31) | ||
33 | |||
34 | /** | ||
35 | * percpu_ref_init - initialize a percpu refcount | ||
36 | * @ref: ref to initialize | ||
37 | * @release: function which will be called when refcount hits 0 | ||
38 | * | ||
39 | * Initializes the refcount in single atomic counter mode with a refcount of 1; | ||
40 | * analagous to atomic_set(ref, 1). | ||
41 | * | ||
42 | * Note that @release must not sleep - it may potentially be called from RCU | ||
43 | * callback context by percpu_ref_kill(). | ||
44 | */ | ||
45 | int percpu_ref_init(struct percpu_ref *ref, percpu_ref_release *release) | ||
46 | { | ||
47 | atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); | ||
48 | |||
49 | ref->pcpu_count = alloc_percpu(unsigned); | ||
50 | if (!ref->pcpu_count) | ||
51 | return -ENOMEM; | ||
52 | |||
53 | ref->release = release; | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static void percpu_ref_kill_rcu(struct rcu_head *rcu) | ||
58 | { | ||
59 | struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); | ||
60 | unsigned __percpu *pcpu_count; | ||
61 | unsigned count = 0; | ||
62 | int cpu; | ||
63 | |||
64 | pcpu_count = ACCESS_ONCE(ref->pcpu_count); | ||
65 | |||
66 | /* Mask out PCPU_REF_DEAD */ | ||
67 | pcpu_count = (unsigned __percpu *) | ||
68 | (((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK); | ||
69 | |||
70 | for_each_possible_cpu(cpu) | ||
71 | count += *per_cpu_ptr(pcpu_count, cpu); | ||
72 | |||
73 | free_percpu(pcpu_count); | ||
74 | |||
75 | pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); | ||
76 | |||
77 | /* | ||
78 | * It's crucial that we sum the percpu counters _before_ adding the sum | ||
79 | * to &ref->count; since gets could be happening on one cpu while puts | ||
80 | * happen on another, adding a single cpu's count could cause | ||
81 | * @ref->count to hit 0 before we've got a consistent value - but the | ||
82 | * sum of all the counts will be consistent and correct. | ||
83 | * | ||
84 | * Subtracting the bias value then has to happen _after_ adding count to | ||
85 | * &ref->count; we need the bias value to prevent &ref->count from | ||
86 | * reaching 0 before we add the percpu counts. But doing it at the same | ||
87 | * time is equivalent and saves us atomic operations: | ||
88 | */ | ||
89 | |||
90 | atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); | ||
91 | |||
92 | /* | ||
93 | * Now we're in single atomic_t mode with a consistent refcount, so it's | ||
94 | * safe to drop our initial ref: | ||
95 | */ | ||
96 | percpu_ref_put(ref); | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * percpu_ref_kill - safely drop initial ref | ||
101 | * | ||
102 | * Must be used to drop the initial ref on a percpu refcount; must be called | ||
103 | * precisely once before shutdown. | ||
104 | * | ||
105 | * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the | ||
106 | * percpu counters and dropping the initial ref. | ||
107 | */ | ||
108 | void percpu_ref_kill(struct percpu_ref *ref) | ||
109 | { | ||
110 | unsigned __percpu *pcpu_count, *old, *new; | ||
111 | |||
112 | pcpu_count = ACCESS_ONCE(ref->pcpu_count); | ||
113 | |||
114 | do { | ||
115 | if (REF_STATUS(pcpu_count) == PCPU_REF_DEAD) { | ||
116 | WARN(1, "percpu_ref_kill() called more than once!\n"); | ||
117 | return; | ||
118 | } | ||
119 | |||
120 | old = pcpu_count; | ||
121 | new = (unsigned __percpu *) | ||
122 | (((unsigned long) pcpu_count)|PCPU_REF_DEAD); | ||
123 | |||
124 | pcpu_count = cmpxchg(&ref->pcpu_count, old, new); | ||
125 | } while (pcpu_count != old); | ||
126 | |||
127 | call_rcu(&ref->rcu, percpu_ref_kill_rcu); | ||
128 | } | ||