diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 22:52:14 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 22:52:14 -0400 |
commit | 13cc56013842a847a0f6ff805d9ed9181e753ef8 (patch) | |
tree | 76b55717efc36f83c934ee894a8522e8a28eb57f /lib | |
parent | 7c6809ff2bd63d4c97ce9e0b94d39d5180842c48 (diff) | |
parent | a4244454df1296e90cc961c1b636b1176ef0d9a0 (diff) |
Merge branch 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull per-cpu changes from Tejun Heo:
"This pull request contains Kent's per-cpu reference counter. It has
gone through several iterations since the last time and the dynamic
allocation is gone.
The usual usage is relatively straight-forward although async kill
confirm interface, which is not used int most cases, is somewhat icky.
There also are some interface concerns - e.g. I'm not sure about
passing in @relesae callback during init as that becomes funny when we
later implement synchronous kill_and_drain - but nothing too serious
and it's quite useable now.
cgroup_subsys_state refcnting has already been converted and we should
convert module refcnt (Kent?)"
* 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
percpu-refcount: use RCU-sched insted of normal RCU
percpu-refcount: implement percpu_tryget() along with percpu_ref_kill_and_confirm()
percpu-refcount: implement percpu_ref_cancel_init()
percpu-refcount: add __must_check to percpu_ref_init() and don't use ACCESS_ONCE() in percpu_ref_kill_rcu()
percpu-refcount: cosmetic updates
percpu-refcount: consistently use plain (non-sched) RCU
percpu-refcount: Don't use silly cmpxchg()
percpu: implement generic percpu refcounting
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/percpu-refcount.c | 158 |
2 files changed, 159 insertions, 1 deletions
diff --git a/lib/Makefile b/lib/Makefile index 22f0f4e8a9e1..8f8d385187f2 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ | |||
13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ | 13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ |
14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ | 14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ |
15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ | 15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ |
16 | earlycpio.o | 16 | earlycpio.o percpu-refcount.o |
17 | 17 | ||
18 | obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o | 18 | obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o |
19 | lib-$(CONFIG_MMU) += ioremap.o | 19 | lib-$(CONFIG_MMU) += ioremap.o |
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c new file mode 100644 index 000000000000..7deeb6297a48 --- /dev/null +++ b/lib/percpu-refcount.c | |||
@@ -0,0 +1,158 @@ | |||
1 | #define pr_fmt(fmt) "%s: " fmt "\n", __func__ | ||
2 | |||
3 | #include <linux/kernel.h> | ||
4 | #include <linux/percpu-refcount.h> | ||
5 | |||
6 | /* | ||
7 | * Initially, a percpu refcount is just a set of percpu counters. Initially, we | ||
8 | * don't try to detect the ref hitting 0 - which means that get/put can just | ||
9 | * increment or decrement the local counter. Note that the counter on a | ||
10 | * particular cpu can (and will) wrap - this is fine, when we go to shutdown the | ||
11 | * percpu counters will all sum to the correct value | ||
12 | * | ||
13 | * (More precisely: because moduler arithmatic is commutative the sum of all the | ||
14 | * pcpu_count vars will be equal to what it would have been if all the gets and | ||
15 | * puts were done to a single integer, even if some of the percpu integers | ||
16 | * overflow or underflow). | ||
17 | * | ||
18 | * The real trick to implementing percpu refcounts is shutdown. We can't detect | ||
19 | * the ref hitting 0 on every put - this would require global synchronization | ||
20 | * and defeat the whole purpose of using percpu refs. | ||
21 | * | ||
22 | * What we do is require the user to keep track of the initial refcount; we know | ||
23 | * the ref can't hit 0 before the user drops the initial ref, so as long as we | ||
24 | * convert to non percpu mode before the initial ref is dropped everything | ||
25 | * works. | ||
26 | * | ||
27 | * Converting to non percpu mode is done with some RCUish stuff in | ||
28 | * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t | ||
29 | * can't hit 0 before we've added up all the percpu refs. | ||
30 | */ | ||
31 | |||
32 | #define PCPU_COUNT_BIAS (1U << 31) | ||
33 | |||
34 | /** | ||
35 | * percpu_ref_init - initialize a percpu refcount | ||
36 | * @ref: percpu_ref to initialize | ||
37 | * @release: function which will be called when refcount hits 0 | ||
38 | * | ||
39 | * Initializes the refcount in single atomic counter mode with a refcount of 1; | ||
40 | * analagous to atomic_set(ref, 1). | ||
41 | * | ||
42 | * Note that @release must not sleep - it may potentially be called from RCU | ||
43 | * callback context by percpu_ref_kill(). | ||
44 | */ | ||
45 | int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) | ||
46 | { | ||
47 | atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); | ||
48 | |||
49 | ref->pcpu_count = alloc_percpu(unsigned); | ||
50 | if (!ref->pcpu_count) | ||
51 | return -ENOMEM; | ||
52 | |||
53 | ref->release = release; | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * percpu_ref_cancel_init - cancel percpu_ref_init() | ||
59 | * @ref: percpu_ref to cancel init for | ||
60 | * | ||
61 | * Once a percpu_ref is initialized, its destruction is initiated by | ||
62 | * percpu_ref_kill() and completes asynchronously, which can be painful to | ||
63 | * do when destroying a half-constructed object in init failure path. | ||
64 | * | ||
65 | * This function destroys @ref without invoking @ref->release and the | ||
66 | * memory area containing it can be freed immediately on return. To | ||
67 | * prevent accidental misuse, it's required that @ref has finished | ||
68 | * percpu_ref_init(), whether successful or not, but never used. | ||
69 | * | ||
70 | * The weird name and usage restriction are to prevent people from using | ||
71 | * this function by mistake for normal shutdown instead of | ||
72 | * percpu_ref_kill(). | ||
73 | */ | ||
74 | void percpu_ref_cancel_init(struct percpu_ref *ref) | ||
75 | { | ||
76 | unsigned __percpu *pcpu_count = ref->pcpu_count; | ||
77 | int cpu; | ||
78 | |||
79 | WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); | ||
80 | |||
81 | if (pcpu_count) { | ||
82 | for_each_possible_cpu(cpu) | ||
83 | WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); | ||
84 | free_percpu(ref->pcpu_count); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | static void percpu_ref_kill_rcu(struct rcu_head *rcu) | ||
89 | { | ||
90 | struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); | ||
91 | unsigned __percpu *pcpu_count = ref->pcpu_count; | ||
92 | unsigned count = 0; | ||
93 | int cpu; | ||
94 | |||
95 | /* Mask out PCPU_REF_DEAD */ | ||
96 | pcpu_count = (unsigned __percpu *) | ||
97 | (((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK); | ||
98 | |||
99 | for_each_possible_cpu(cpu) | ||
100 | count += *per_cpu_ptr(pcpu_count, cpu); | ||
101 | |||
102 | free_percpu(pcpu_count); | ||
103 | |||
104 | pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); | ||
105 | |||
106 | /* | ||
107 | * It's crucial that we sum the percpu counters _before_ adding the sum | ||
108 | * to &ref->count; since gets could be happening on one cpu while puts | ||
109 | * happen on another, adding a single cpu's count could cause | ||
110 | * @ref->count to hit 0 before we've got a consistent value - but the | ||
111 | * sum of all the counts will be consistent and correct. | ||
112 | * | ||
113 | * Subtracting the bias value then has to happen _after_ adding count to | ||
114 | * &ref->count; we need the bias value to prevent &ref->count from | ||
115 | * reaching 0 before we add the percpu counts. But doing it at the same | ||
116 | * time is equivalent and saves us atomic operations: | ||
117 | */ | ||
118 | |||
119 | atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); | ||
120 | |||
121 | /* @ref is viewed as dead on all CPUs, send out kill confirmation */ | ||
122 | if (ref->confirm_kill) | ||
123 | ref->confirm_kill(ref); | ||
124 | |||
125 | /* | ||
126 | * Now we're in single atomic_t mode with a consistent refcount, so it's | ||
127 | * safe to drop our initial ref: | ||
128 | */ | ||
129 | percpu_ref_put(ref); | ||
130 | } | ||
131 | |||
132 | /** | ||
133 | * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation | ||
134 | * @ref: percpu_ref to kill | ||
135 | * @confirm_kill: optional confirmation callback | ||
136 | * | ||
137 | * Equivalent to percpu_ref_kill() but also schedules kill confirmation if | ||
138 | * @confirm_kill is not NULL. @confirm_kill, which may not block, will be | ||
139 | * called after @ref is seen as dead from all CPUs - all further | ||
140 | * invocations of percpu_ref_tryget() will fail. See percpu_ref_tryget() | ||
141 | * for more details. | ||
142 | * | ||
143 | * Due to the way percpu_ref is implemented, @confirm_kill will be called | ||
144 | * after at least one full RCU grace period has passed but this is an | ||
145 | * implementation detail and callers must not depend on it. | ||
146 | */ | ||
147 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | ||
148 | percpu_ref_func_t *confirm_kill) | ||
149 | { | ||
150 | WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD, | ||
151 | "percpu_ref_kill() called more than once!\n"); | ||
152 | |||
153 | ref->pcpu_count = (unsigned __percpu *) | ||
154 | (((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD); | ||
155 | ref->confirm_kill = confirm_kill; | ||
156 | |||
157 | call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); | ||
158 | } | ||