diff options
Diffstat (limited to 'lib/percpu-refcount.c')
-rw-r--r-- | lib/percpu-refcount.c | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c new file mode 100644 index 000000000000..7deeb6297a48 --- /dev/null +++ b/lib/percpu-refcount.c | |||
@@ -0,0 +1,158 @@ | |||
1 | #define pr_fmt(fmt) "%s: " fmt "\n", __func__ | ||
2 | |||
3 | #include <linux/kernel.h> | ||
4 | #include <linux/percpu-refcount.h> | ||
5 | |||
6 | /* | ||
7 | * Initially, a percpu refcount is just a set of percpu counters. Initially, we | ||
8 | * don't try to detect the ref hitting 0 - which means that get/put can just | ||
9 | * increment or decrement the local counter. Note that the counter on a | ||
10 | * particular cpu can (and will) wrap - this is fine, when we go to shutdown the | ||
11 | * percpu counters will all sum to the correct value | ||
12 | * | ||
13 | * (More precisely: because moduler arithmatic is commutative the sum of all the | ||
14 | * pcpu_count vars will be equal to what it would have been if all the gets and | ||
15 | * puts were done to a single integer, even if some of the percpu integers | ||
16 | * overflow or underflow). | ||
17 | * | ||
18 | * The real trick to implementing percpu refcounts is shutdown. We can't detect | ||
19 | * the ref hitting 0 on every put - this would require global synchronization | ||
20 | * and defeat the whole purpose of using percpu refs. | ||
21 | * | ||
22 | * What we do is require the user to keep track of the initial refcount; we know | ||
23 | * the ref can't hit 0 before the user drops the initial ref, so as long as we | ||
24 | * convert to non percpu mode before the initial ref is dropped everything | ||
25 | * works. | ||
26 | * | ||
27 | * Converting to non percpu mode is done with some RCUish stuff in | ||
28 | * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t | ||
29 | * can't hit 0 before we've added up all the percpu refs. | ||
30 | */ | ||
31 | |||
32 | #define PCPU_COUNT_BIAS (1U << 31) | ||
33 | |||
34 | /** | ||
35 | * percpu_ref_init - initialize a percpu refcount | ||
36 | * @ref: percpu_ref to initialize | ||
37 | * @release: function which will be called when refcount hits 0 | ||
38 | * | ||
39 | * Initializes the refcount in single atomic counter mode with a refcount of 1; | ||
40 | * analagous to atomic_set(ref, 1). | ||
41 | * | ||
42 | * Note that @release must not sleep - it may potentially be called from RCU | ||
43 | * callback context by percpu_ref_kill(). | ||
44 | */ | ||
45 | int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) | ||
46 | { | ||
47 | atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); | ||
48 | |||
49 | ref->pcpu_count = alloc_percpu(unsigned); | ||
50 | if (!ref->pcpu_count) | ||
51 | return -ENOMEM; | ||
52 | |||
53 | ref->release = release; | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * percpu_ref_cancel_init - cancel percpu_ref_init() | ||
59 | * @ref: percpu_ref to cancel init for | ||
60 | * | ||
61 | * Once a percpu_ref is initialized, its destruction is initiated by | ||
62 | * percpu_ref_kill() and completes asynchronously, which can be painful to | ||
63 | * do when destroying a half-constructed object in init failure path. | ||
64 | * | ||
65 | * This function destroys @ref without invoking @ref->release and the | ||
66 | * memory area containing it can be freed immediately on return. To | ||
67 | * prevent accidental misuse, it's required that @ref has finished | ||
68 | * percpu_ref_init(), whether successful or not, but never used. | ||
69 | * | ||
70 | * The weird name and usage restriction are to prevent people from using | ||
71 | * this function by mistake for normal shutdown instead of | ||
72 | * percpu_ref_kill(). | ||
73 | */ | ||
74 | void percpu_ref_cancel_init(struct percpu_ref *ref) | ||
75 | { | ||
76 | unsigned __percpu *pcpu_count = ref->pcpu_count; | ||
77 | int cpu; | ||
78 | |||
79 | WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); | ||
80 | |||
81 | if (pcpu_count) { | ||
82 | for_each_possible_cpu(cpu) | ||
83 | WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); | ||
84 | free_percpu(ref->pcpu_count); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | static void percpu_ref_kill_rcu(struct rcu_head *rcu) | ||
89 | { | ||
90 | struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); | ||
91 | unsigned __percpu *pcpu_count = ref->pcpu_count; | ||
92 | unsigned count = 0; | ||
93 | int cpu; | ||
94 | |||
95 | /* Mask out PCPU_REF_DEAD */ | ||
96 | pcpu_count = (unsigned __percpu *) | ||
97 | (((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK); | ||
98 | |||
99 | for_each_possible_cpu(cpu) | ||
100 | count += *per_cpu_ptr(pcpu_count, cpu); | ||
101 | |||
102 | free_percpu(pcpu_count); | ||
103 | |||
104 | pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); | ||
105 | |||
106 | /* | ||
107 | * It's crucial that we sum the percpu counters _before_ adding the sum | ||
108 | * to &ref->count; since gets could be happening on one cpu while puts | ||
109 | * happen on another, adding a single cpu's count could cause | ||
110 | * @ref->count to hit 0 before we've got a consistent value - but the | ||
111 | * sum of all the counts will be consistent and correct. | ||
112 | * | ||
113 | * Subtracting the bias value then has to happen _after_ adding count to | ||
114 | * &ref->count; we need the bias value to prevent &ref->count from | ||
115 | * reaching 0 before we add the percpu counts. But doing it at the same | ||
116 | * time is equivalent and saves us atomic operations: | ||
117 | */ | ||
118 | |||
119 | atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); | ||
120 | |||
121 | /* @ref is viewed as dead on all CPUs, send out kill confirmation */ | ||
122 | if (ref->confirm_kill) | ||
123 | ref->confirm_kill(ref); | ||
124 | |||
125 | /* | ||
126 | * Now we're in single atomic_t mode with a consistent refcount, so it's | ||
127 | * safe to drop our initial ref: | ||
128 | */ | ||
129 | percpu_ref_put(ref); | ||
130 | } | ||
131 | |||
132 | /** | ||
133 | * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation | ||
134 | * @ref: percpu_ref to kill | ||
135 | * @confirm_kill: optional confirmation callback | ||
136 | * | ||
137 | * Equivalent to percpu_ref_kill() but also schedules kill confirmation if | ||
138 | * @confirm_kill is not NULL. @confirm_kill, which may not block, will be | ||
139 | * called after @ref is seen as dead from all CPUs - all further | ||
140 | * invocations of percpu_ref_tryget() will fail. See percpu_ref_tryget() | ||
141 | * for more details. | ||
142 | * | ||
143 | * Due to the way percpu_ref is implemented, @confirm_kill will be called | ||
144 | * after at least one full RCU grace period has passed but this is an | ||
145 | * implementation detail and callers must not depend on it. | ||
146 | */ | ||
147 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | ||
148 | percpu_ref_func_t *confirm_kill) | ||
149 | { | ||
150 | WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD, | ||
151 | "percpu_ref_kill() called more than once!\n"); | ||
152 | |||
153 | ref->pcpu_count = (unsigned __percpu *) | ||
154 | (((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD); | ||
155 | ref->confirm_kill = confirm_kill; | ||
156 | |||
157 | call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); | ||
158 | } | ||