From 89ada67917f516212452443a56b9fd3b65b74dc7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 30 Oct 2005 15:01:59 -0800 Subject: [PATCH] Use alloc_percpu to allocate workqueues locally This patch makes the workqueus use alloc_percpu instead of an array. The workqueues are placed on nodes local to each processor. The workqueue structure can grow to a significant size on a system with lots of processors if this patch is not applied. 64 bit architectures with all debugging features enabled and configured for 512 processors will not be able to boot without this patch. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/workqueue.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 91bacb13a7e2..7cee222231bc 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -12,6 +12,8 @@ * Andrew Morton * Kai Petzke * Theodore Ts'o + * + * Made to use alloc_percpu by Christoph Lameter . */ #include @@ -57,7 +59,7 @@ struct cpu_workqueue_struct { * per-CPU workqueues: */ struct workqueue_struct { - struct cpu_workqueue_struct cpu_wq[NR_CPUS]; + struct cpu_workqueue_struct *cpu_wq; const char *name; struct list_head list; /* Empty if single thread */ }; @@ -102,7 +104,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) if (unlikely(is_single_threaded(wq))) cpu = 0; BUG_ON(!list_empty(&work->entry)); - __queue_work(wq->cpu_wq + cpu, work); + __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); ret = 1; } put_cpu(); @@ -118,7 +120,7 @@ static void delayed_work_timer_fn(unsigned long __data) if (unlikely(is_single_threaded(wq))) cpu = 0; - __queue_work(wq->cpu_wq + cpu, work); + __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); } int fastcall queue_delayed_work(struct workqueue_struct *wq, @@ -265,13 +267,13 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) if (is_single_threaded(wq)) { /* Always use cpu 0's area. */ - flush_cpu_workqueue(wq->cpu_wq + 0); + flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, 0)); } else { int cpu; lock_cpu_hotplug(); for_each_online_cpu(cpu) - flush_cpu_workqueue(wq->cpu_wq + cpu); + flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); unlock_cpu_hotplug(); } } @@ -279,7 +281,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, int cpu) { - struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu; + struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); struct task_struct *p; spin_lock_init(&cwq->lock); @@ -312,6 +314,7 @@ struct workqueue_struct *__create_workqueue(const char *name, if (!wq) return NULL; + wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct); wq->name = name; /* We don't need the distraction of CPUs appearing and vanishing. */ lock_cpu_hotplug(); @@ -353,7 +356,7 @@ static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu) unsigned long flags; struct task_struct *p; - cwq = wq->cpu_wq + cpu; + cwq = per_cpu_ptr(wq->cpu_wq, cpu); spin_lock_irqsave(&cwq->lock, flags); p = cwq->thread; cwq->thread = NULL; @@ -380,6 +383,7 @@ void destroy_workqueue(struct workqueue_struct *wq) spin_unlock(&workqueue_lock); } unlock_cpu_hotplug(); + free_percpu(wq->cpu_wq); kfree(wq); } @@ -458,7 +462,7 @@ int current_is_keventd(void) BUG_ON(!keventd_wq); - cwq = keventd_wq->cpu_wq + cpu; + cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu); if (current == cwq->thread) ret = 1; @@ -470,7 +474,7 @@ int current_is_keventd(void) /* Take the work from this (downed) CPU. */ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) { - struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu; + struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); LIST_HEAD(list); struct work_struct *work; @@ -481,7 +485,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) printk("Taking work for %s\n", wq->name); work = list_entry(list.next,struct work_struct,entry); list_del(&work->entry); - __queue_work(wq->cpu_wq + smp_processor_id(), work); + __queue_work(per_cpu_ptr(wq->cpu_wq, smp_processor_id()), work); } spin_unlock_irq(&cwq->lock); } @@ -508,15 +512,18 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, case CPU_ONLINE: /* Kick off worker threads. */ list_for_each_entry(wq, &workqueues, list) { - kthread_bind(wq->cpu_wq[hotcpu].thread, hotcpu); - wake_up_process(wq->cpu_wq[hotcpu].thread); + struct cpu_workqueue_struct *cwq; + + cwq = per_cpu_ptr(wq->cpu_wq, hotcpu); + kthread_bind(cwq->thread, hotcpu); + wake_up_process(cwq->thread); } break; case CPU_UP_CANCELED: list_for_each_entry(wq, &workqueues, list) { /* Unbind so it can run. */ - kthread_bind(wq->cpu_wq[hotcpu].thread, + kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, smp_processor_id()); cleanup_workqueue_thread(wq, hotcpu); } -- cgit v1.2.2 From a4c4af7c8dc1eccdfb8c57e1684f08179b4407e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Nov 2005 00:58:38 -0800 Subject: [PATCH] cpu hoptlug: avoid usage of smp_processor_id() in preemptible code Replace smp_processor_id() with any_online_cpu(cpu_online_map) in order to avoid lots of "BUG: using smp_processor_id() in preemptible [00000001] code:..." messages in case taking a cpu online fails. All the traces start at the last notifier_call_chain(...) in kernel/cpu.c. Since we hold the cpu_control semaphore it shouldn't be any problem to access cpu_online_map. The reason why cpu_up failed is simply that the cpu that was supposed to be taken online wasn't even there. That is because on s390 we never know when a new cpu comes and therefore cpu_possible_map consists of only ones and doesn't reflect reality. Signed-off-by: Heiko Carstens Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cee222231bc..42df83d7fad2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -524,7 +524,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, list_for_each_entry(wq, &workqueues, list) { /* Unbind so it can run. */ kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, - smp_processor_id()); + any_online_cpu(cpu_online_map)); cleanup_workqueue_thread(wq, hotcpu); } break; -- cgit v1.2.2 From bce61dd49d6ba7799be2de17c772e4c701558f14 Mon Sep 17 00:00:00 2001 From: Ben Collins Date: Mon, 28 Nov 2005 13:43:56 -0800 Subject: [PATCH] Fix hardcoded cpu=0 in workqueue for per_cpu_ptr() calls Tracked this down on an Ultra Enterprise 3000. It's a 6-way machine. Odd thing about this machine (and it's good for finding bugs like this) is that the CPU id's are not 0 based. For instance, on my machine the CPU's are 6/7/10/11/14/15. This caused some NULL pointer dereference in kernel/workqueue.c because for single_threaded workqueue's, it hardcoded the cpu to 0. I changed the 0's to any_online_cpu(cpu_online_mask), which cpumask.h claims is "First cpu in mask". So this fits the same usage. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/workqueue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 42df83d7fad2..2bd5aee1c736 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -102,7 +102,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) if (!test_and_set_bit(0, &work->pending)) { if (unlikely(is_single_threaded(wq))) - cpu = 0; + cpu = any_online_cpu(cpu_online_map); BUG_ON(!list_empty(&work->entry)); __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); ret = 1; @@ -118,7 +118,7 @@ static void delayed_work_timer_fn(unsigned long __data) int cpu = smp_processor_id(); if (unlikely(is_single_threaded(wq))) - cpu = 0; + cpu = any_online_cpu(cpu_online_map); __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); } @@ -266,8 +266,8 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) might_sleep(); if (is_single_threaded(wq)) { - /* Always use cpu 0's area. */ - flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, 0)); + /* Always use first cpu's area. */ + flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, any_online_cpu(cpu_online_map))); } else { int cpu; @@ -320,7 +320,7 @@ struct workqueue_struct *__create_workqueue(const char *name, lock_cpu_hotplug(); if (singlethread) { INIT_LIST_HEAD(&wq->list); - p = create_workqueue_thread(wq, 0); + p = create_workqueue_thread(wq, any_online_cpu(cpu_online_map)); if (!p) destroy = 1; else @@ -374,7 +374,7 @@ void destroy_workqueue(struct workqueue_struct *wq) /* We don't need the distraction of CPUs appearing and vanishing. */ lock_cpu_hotplug(); if (is_single_threaded(wq)) - cleanup_workqueue_thread(wq, 0); + cleanup_workqueue_thread(wq, any_online_cpu(cpu_online_map)); else { for_each_online_cpu(cpu) cleanup_workqueue_thread(wq, cpu); -- cgit v1.2.2