aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-06-29 04:07:11 -0400
committerTejun Heo <tj@kernel.org>2010-06-29 04:07:11 -0400
commit0f900049cbe2767d47c2a62b54f0e822e1d66840 (patch)
treeb7d2ce3e00ef49c038d0641a860b98b4c24fb203 /kernel
parent1537663f5763892cacf1409ac0efef1b4f332d1e (diff)
workqueue: update cwq alignement
work->data field is used for two purposes. It points to cwq it's queued on and the lower bits are used for flags. Currently, two bits are reserved which is always safe as 4 byte alignment is guaranteed on every architecture. However, future changes will need more flag bits. On SMP, the percpu allocator is capable of honoring larger alignment (there are other users which depend on it) and larger alignment works just fine. On UP, percpu allocator is a thin wrapper around kzalloc/kfree() and don't honor alignment request. This patch introduces WORK_STRUCT_FLAG_BITS and implements alloc/free_cwqs() which guarantees max(1 << WORK_STRUCT_FLAG_BITS, __alignof__(unsigned long long) alignment both on SMP and UP. On SMP, simply wrapping percpu allocator is enough. On UP, extra space is allocated so that cwq can be aligned and the original pointer can be stored after it which is used in the free path. * Alignment problem on UP is reported by Michal Simek. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Frederic Weisbecker <fweisbec@gmail.com> Reported-by: Michal Simek <michal.simek@petalogix.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/workqueue.c60
1 files changed, 55 insertions, 5 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dc78956ccf03..74a38499b19a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,7 +46,9 @@
46 46
47/* 47/*
48 * The per-CPU workqueue (if single thread, we always use the first 48 * The per-CPU workqueue (if single thread, we always use the first
49 * possible cpu). 49 * possible cpu). The lower WORK_STRUCT_FLAG_BITS of
50 * work_struct->data are used for flags and thus cwqs need to be
51 * aligned at two's power of the number of flag bits.
50 */ 52 */
51struct cpu_workqueue_struct { 53struct cpu_workqueue_struct {
52 54
@@ -59,7 +61,7 @@ struct cpu_workqueue_struct {
59 61
60 struct workqueue_struct *wq; /* I: the owning workqueue */ 62 struct workqueue_struct *wq; /* I: the owning workqueue */
61 struct task_struct *thread; 63 struct task_struct *thread;
62} ____cacheline_aligned; 64};
63 65
64/* 66/*
65 * The externally visible workqueue abstraction is an array of 67 * The externally visible workqueue abstraction is an array of
@@ -967,6 +969,53 @@ int current_is_keventd(void)
967 969
968} 970}
969 971
972static struct cpu_workqueue_struct *alloc_cwqs(void)
973{
974 /*
975 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
976 * Make sure that the alignment isn't lower than that of
977 * unsigned long long.
978 */
979 const size_t size = sizeof(struct cpu_workqueue_struct);
980 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
981 __alignof__(unsigned long long));
982 struct cpu_workqueue_struct *cwqs;
983#ifndef CONFIG_SMP
984 void *ptr;
985
986 /*
987 * On UP, percpu allocator doesn't honor alignment parameter
988 * and simply uses arch-dependent default. Allocate enough
989 * room to align cwq and put an extra pointer at the end
990 * pointing back to the originally allocated pointer which
991 * will be used for free.
992 *
993 * FIXME: This really belongs to UP percpu code. Update UP
994 * percpu code to honor alignment and remove this ugliness.
995 */
996 ptr = __alloc_percpu(size + align + sizeof(void *), 1);
997 cwqs = PTR_ALIGN(ptr, align);
998 *(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
999#else
1000 /* On SMP, percpu allocator can do it itself */
1001 cwqs = __alloc_percpu(size, align);
1002#endif
1003 /* just in case, make sure it's actually aligned */
1004 BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
1005 return cwqs;
1006}
1007
1008static void free_cwqs(struct cpu_workqueue_struct *cwqs)
1009{
1010#ifndef CONFIG_SMP
1011 /* on UP, the pointer to free is stored right after the cwq */
1012 if (cwqs)
1013 free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
1014#else
1015 free_percpu(cwqs);
1016#endif
1017}
1018
970static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 1019static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
971{ 1020{
972 struct workqueue_struct *wq = cwq->wq; 1021 struct workqueue_struct *wq = cwq->wq;
@@ -1012,7 +1061,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
1012 if (!wq) 1061 if (!wq)
1013 goto err; 1062 goto err;
1014 1063
1015 wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct); 1064 wq->cpu_wq = alloc_cwqs();
1016 if (!wq->cpu_wq) 1065 if (!wq->cpu_wq)
1017 goto err; 1066 goto err;
1018 1067
@@ -1031,6 +1080,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
1031 for_each_possible_cpu(cpu) { 1080 for_each_possible_cpu(cpu) {
1032 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 1081 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1033 1082
1083 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
1034 cwq->wq = wq; 1084 cwq->wq = wq;
1035 cwq->cpu = cpu; 1085 cwq->cpu = cpu;
1036 spin_lock_init(&cwq->lock); 1086 spin_lock_init(&cwq->lock);
@@ -1059,7 +1109,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
1059 return wq; 1109 return wq;
1060err: 1110err:
1061 if (wq) { 1111 if (wq) {
1062 free_percpu(wq->cpu_wq); 1112 free_cwqs(wq->cpu_wq);
1063 kfree(wq); 1113 kfree(wq);
1064 } 1114 }
1065 return NULL; 1115 return NULL;
@@ -1112,7 +1162,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
1112 for_each_possible_cpu(cpu) 1162 for_each_possible_cpu(cpu)
1113 cleanup_workqueue_thread(get_cwq(cpu, wq)); 1163 cleanup_workqueue_thread(get_cwq(cpu, wq));
1114 1164
1115 free_percpu(wq->cpu_wq); 1165 free_cwqs(wq->cpu_wq);
1116 kfree(wq); 1166 kfree(wq);
1117} 1167}
1118EXPORT_SYMBOL_GPL(destroy_workqueue); 1168EXPORT_SYMBOL_GPL(destroy_workqueue);