1 files changed, 55 insertions, 5 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dc78956ccf03..74a38499b19a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,7 +46,9 @@
 /*
 * The per-CPU workqueue (if single thread, we always use the first
- * possible cpu).
+ * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
+ * work_struct->data are used for flags and thus cwqs need to be
+ * aligned at two's power of the number of flag bits.
 */
 struct cpu_workqueue_struct {
@@ -59,7 +61,7 @@ struct cpu_workqueue_struct {
        struct workqueue_struct *wq;            /* I: the owning workqueue */
        struct task_struct      *thread;
-} ____cacheline_aligned;
+};
 /*
 * The externally visible workqueue abstraction is an array of
@@ -967,6 +969,53 @@ int current_is_keventd(void)
 }
+static struct cpu_workqueue_struct *alloc_cwqs(void)
+{
+        /*
+         * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+         * Make sure that the alignment isn't lower than that of
+         * unsigned long long.
+         */
+        const size_t size = sizeof(struct cpu_workqueue_struct);
+        const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
+                                   __alignof__(unsigned long long));
+        struct cpu_workqueue_struct *cwqs;
+#ifndef CONFIG_SMP
+        void *ptr;
+        /*
+         * On UP, percpu allocator doesn't honor alignment parameter
+         * and simply uses arch-dependent default.  Allocate enough
+         * room to align cwq and put an extra pointer at the end
+         * pointing back to the originally allocated pointer which
+         * will be used for free.
+         *
+         * FIXME: This really belongs to UP percpu code.  Update UP
+         * percpu code to honor alignment and remove this ugliness.
+         */
+        ptr = __alloc_percpu(size + align + sizeof(void *), 1);
+        cwqs = PTR_ALIGN(ptr, align);
+        *(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
+#else
+        /* On SMP, percpu allocator can do it itself */
+        cwqs = __alloc_percpu(size, align);
+#endif
+        /* just in case, make sure it's actually aligned */
+        BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
+        return cwqs;
+}
+static void free_cwqs(struct cpu_workqueue_struct *cwqs)
+{
+#ifndef CONFIG_SMP
+        /* on UP, the pointer to free is stored right after the cwq */
+        if (cwqs)
+                free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
+#else
+        free_percpu(cwqs);
+#endif
+}
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
        struct workqueue_struct *wq = cwq->wq;
@@ -1012,7 +1061,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
        if (!wq)
                goto err;
-        wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
+        wq->cpu_wq = alloc_cwqs();
        if (!wq->cpu_wq)
                goto err;
@@ -1031,6 +1080,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
        for_each_possible_cpu(cpu) {
                struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+                BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
                cwq->wq = wq;
                cwq->cpu = cpu;
                spin_lock_init(&cwq->lock);
@@ -1059,7 +1109,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
        return wq;
 err:
        if (wq) {
-                free_percpu(wq->cpu_wq);
+                free_cwqs(wq->cpu_wq);
                kfree(wq);
        }
        return NULL;
@@ -1112,7 +1162,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
        for_each_possible_cpu(cpu)
                cleanup_workqueue_thread(get_cwq(cpu, wq));
-        free_percpu(wq->cpu_wq);
+        free_cwqs(wq->cpu_wq);
        kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c index dc78956ccf03..74a38499b19a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -46,7 +46,9 @@
46		46
47	/*	47	/*
48	* The per-CPU workqueue (if single thread, we always use the first	48	* The per-CPU workqueue (if single thread, we always use the first
49	* possible cpu).	49	* possible cpu). The lower WORK_STRUCT_FLAG_BITS of
		50	* work_struct->data are used for flags and thus cwqs need to be
		51	* aligned at two's power of the number of flag bits.
50	*/	52	*/
51	struct cpu_workqueue_struct {	53	struct cpu_workqueue_struct {
52		54
@@ -59,7 +61,7 @@ struct cpu_workqueue_struct {
59		61
60	struct workqueue_struct wq; / I: the owning workqueue */	62	struct workqueue_struct wq; / I: the owning workqueue */
61	struct task_struct *thread;	63	struct task_struct *thread;
62	} ____cacheline_aligned;	64	};
63		65
64	/*	66	/*
65	* The externally visible workqueue abstraction is an array of	67	* The externally visible workqueue abstraction is an array of
@@ -967,6 +969,53 @@ int current_is_keventd(void)
967		969
968	}	970	}
969		971
		972	static struct cpu_workqueue_struct *alloc_cwqs(void)
		973	{
		974	/*
		975	* cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
		976	* Make sure that the alignment isn't lower than that of
		977	* unsigned long long.
		978	*/
		979	const size_t size = sizeof(struct cpu_workqueue_struct);
		980	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
		981	__alignof__(unsigned long long));
		982	struct cpu_workqueue_struct *cwqs;
		983	#ifndef CONFIG_SMP
		984	void *ptr;
		985
		986	/*
		987	* On UP, percpu allocator doesn't honor alignment parameter
		988	* and simply uses arch-dependent default. Allocate enough
		989	* room to align cwq and put an extra pointer at the end
		990	* pointing back to the originally allocated pointer which
		991	* will be used for free.
		992	*
		993	* FIXME: This really belongs to UP percpu code. Update UP
		994	* percpu code to honor alignment and remove this ugliness.
		995	*/
		996	ptr = __alloc_percpu(size + align + sizeof(void *), 1);
		997	cwqs = PTR_ALIGN(ptr, align);
		998	(void *)per_cpu_ptr(cwqs + 1, 0) = ptr;
		999	#else
		1000	/* On SMP, percpu allocator can do it itself */
		1001	cwqs = __alloc_percpu(size, align);
		1002	#endif
		1003	/* just in case, make sure it's actually aligned */
		1004	BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
		1005	return cwqs;
		1006	}
		1007
		1008	static void free_cwqs(struct cpu_workqueue_struct *cwqs)
		1009	{
		1010	#ifndef CONFIG_SMP
		1011	/* on UP, the pointer to free is stored right after the cwq */
		1012	if (cwqs)
		1013	free_percpu((void *)per_cpu_ptr(cwqs + 1, 0));
		1014	#else
		1015	free_percpu(cwqs);
		1016	#endif
		1017	}
		1018
970	static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)	1019	static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
971	{	1020	{
972	struct workqueue_struct *wq = cwq->wq;	1021	struct workqueue_struct *wq = cwq->wq;
@@ -1012,7 +1061,7 @@ struct workqueue_struct __create_workqueue_key(const char name,
1012	if (!wq)	1061	if (!wq)
1013	goto err;	1062	goto err;
1014		1063
1015	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);	1064	wq->cpu_wq = alloc_cwqs();
1016	if (!wq->cpu_wq)	1065	if (!wq->cpu_wq)
1017	goto err;	1066	goto err;
1018		1067
@@ -1031,6 +1080,7 @@ struct workqueue_struct __create_workqueue_key(const char name,
1031	for_each_possible_cpu(cpu) {	1080	for_each_possible_cpu(cpu) {
1032	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);	1081	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1033		1082
		1083	BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
1034	cwq->wq = wq;	1084	cwq->wq = wq;
1035	cwq->cpu = cpu;	1085	cwq->cpu = cpu;
1036	spin_lock_init(&cwq->lock);	1086	spin_lock_init(&cwq->lock);
@@ -1059,7 +1109,7 @@ struct workqueue_struct __create_workqueue_key(const char name,
1059	return wq;	1109	return wq;
1060	err:	1110	err:
1061	if (wq) {	1111	if (wq) {
1062	free_percpu(wq->cpu_wq);	1112	free_cwqs(wq->cpu_wq);
1063	kfree(wq);	1113	kfree(wq);
1064	}	1114	}
1065	return NULL;	1115	return NULL;
@@ -1112,7 +1162,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
1112	for_each_possible_cpu(cpu)	1162	for_each_possible_cpu(cpu)
1113	cleanup_workqueue_thread(get_cwq(cpu, wq));	1163	cleanup_workqueue_thread(get_cwq(cpu, wq));
1114		1164
1115	free_percpu(wq->cpu_wq);	1165	free_cwqs(wq->cpu_wq);
1116	kfree(wq);	1166	kfree(wq);
1117	}	1167	}
1118	EXPORT_SYMBOL_GPL(destroy_workqueue);	1168	EXPORT_SYMBOL_GPL(destroy_workqueue);