diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 33 | ||||
-rw-r--r-- | kernel/power/main.c | 2 | ||||
-rw-r--r-- | kernel/power/power.h | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 83 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 210 | ||||
-rw-r--r-- | kernel/ptrace.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 160 | ||||
-rw-r--r-- | kernel/softlockup.c | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 136 |
9 files changed, 400 insertions, 235 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 3619e939182e..d61ba88f34e5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -21,6 +21,24 @@ EXPORT_SYMBOL_GPL(cpucontrol); | |||
21 | 21 | ||
22 | static struct notifier_block *cpu_chain; | 22 | static struct notifier_block *cpu_chain; |
23 | 23 | ||
24 | /* | ||
25 | * Used to check by callers if they need to acquire the cpucontrol | ||
26 | * or not to protect a cpu from being removed. Its sometimes required to | ||
27 | * call these functions both for normal operations, and in response to | ||
28 | * a cpu being added/removed. If the context of the call is in the same | ||
29 | * thread context as a CPU hotplug thread, we dont need to take the lock | ||
30 | * since its already protected | ||
31 | * check drivers/cpufreq/cpufreq.c for its usage - Ashok Raj | ||
32 | */ | ||
33 | |||
34 | int current_in_cpu_hotplug(void) | ||
35 | { | ||
36 | return (current->flags & PF_HOTPLUG_CPU); | ||
37 | } | ||
38 | |||
39 | EXPORT_SYMBOL_GPL(current_in_cpu_hotplug); | ||
40 | |||
41 | |||
24 | /* Need to know about CPUs going up/down? */ | 42 | /* Need to know about CPUs going up/down? */ |
25 | int register_cpu_notifier(struct notifier_block *nb) | 43 | int register_cpu_notifier(struct notifier_block *nb) |
26 | { | 44 | { |
@@ -94,6 +112,13 @@ int cpu_down(unsigned int cpu) | |||
94 | goto out; | 112 | goto out; |
95 | } | 113 | } |
96 | 114 | ||
115 | /* | ||
116 | * Leave a trace in current->flags indicating we are already in | ||
117 | * process of performing CPU hotplug. Callers can check if cpucontrol | ||
118 | * is already acquired by current thread, and if so not cause | ||
119 | * a dead lock by not acquiring the lock | ||
120 | */ | ||
121 | current->flags |= PF_HOTPLUG_CPU; | ||
97 | err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, | 122 | err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, |
98 | (void *)(long)cpu); | 123 | (void *)(long)cpu); |
99 | if (err == NOTIFY_BAD) { | 124 | if (err == NOTIFY_BAD) { |
@@ -146,6 +171,7 @@ out_thread: | |||
146 | out_allowed: | 171 | out_allowed: |
147 | set_cpus_allowed(current, old_allowed); | 172 | set_cpus_allowed(current, old_allowed); |
148 | out: | 173 | out: |
174 | current->flags &= ~PF_HOTPLUG_CPU; | ||
149 | unlock_cpu_hotplug(); | 175 | unlock_cpu_hotplug(); |
150 | return err; | 176 | return err; |
151 | } | 177 | } |
@@ -163,6 +189,12 @@ int __devinit cpu_up(unsigned int cpu) | |||
163 | ret = -EINVAL; | 189 | ret = -EINVAL; |
164 | goto out; | 190 | goto out; |
165 | } | 191 | } |
192 | |||
193 | /* | ||
194 | * Leave a trace in current->flags indicating we are already in | ||
195 | * process of performing CPU hotplug. | ||
196 | */ | ||
197 | current->flags |= PF_HOTPLUG_CPU; | ||
166 | ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); | 198 | ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); |
167 | if (ret == NOTIFY_BAD) { | 199 | if (ret == NOTIFY_BAD) { |
168 | printk("%s: attempt to bring up CPU %u failed\n", | 200 | printk("%s: attempt to bring up CPU %u failed\n", |
@@ -185,6 +217,7 @@ out_notify: | |||
185 | if (ret != 0) | 217 | if (ret != 0) |
186 | notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); | 218 | notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); |
187 | out: | 219 | out: |
220 | current->flags &= ~PF_HOTPLUG_CPU; | ||
188 | up(&cpucontrol); | 221 | up(&cpucontrol); |
189 | return ret; | 222 | return ret; |
190 | } | 223 | } |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 18d7d693fbba..6ee2cad530e8 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -167,7 +167,7 @@ static int enter_state(suspend_state_t state) | |||
167 | { | 167 | { |
168 | int error; | 168 | int error; |
169 | 169 | ||
170 | if (pm_ops->valid && !pm_ops->valid(state)) | 170 | if (pm_ops && pm_ops->valid && !pm_ops->valid(state)) |
171 | return -ENODEV; | 171 | return -ENODEV; |
172 | if (down_trylock(&pm_sem)) | 172 | if (down_trylock(&pm_sem)) |
173 | return -EBUSY; | 173 | return -EBUSY; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index d4fd96a135ab..6c042b5ee14b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -65,8 +65,8 @@ extern suspend_pagedir_t *pagedir_save; | |||
65 | extern asmlinkage int swsusp_arch_suspend(void); | 65 | extern asmlinkage int swsusp_arch_suspend(void); |
66 | extern asmlinkage int swsusp_arch_resume(void); | 66 | extern asmlinkage int swsusp_arch_resume(void); |
67 | 67 | ||
68 | extern int restore_highmem(void); | 68 | extern void free_pagedir(struct pbe *pblist); |
69 | extern struct pbe * alloc_pagedir(unsigned nr_pages); | 69 | extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); |
70 | extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); | 70 | extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); |
71 | extern void swsusp_free(void); | 71 | extern void swsusp_free(void); |
72 | extern int enough_swap(unsigned nr_pages); | 72 | extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 723f5179883e..4a6dbcefd378 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -88,8 +88,7 @@ static int save_highmem_zone(struct zone *zone) | |||
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
90 | 90 | ||
91 | 91 | int save_highmem(void) | |
92 | static int save_highmem(void) | ||
93 | { | 92 | { |
94 | struct zone *zone; | 93 | struct zone *zone; |
95 | int res = 0; | 94 | int res = 0; |
@@ -120,11 +119,7 @@ int restore_highmem(void) | |||
120 | } | 119 | } |
121 | return 0; | 120 | return 0; |
122 | } | 121 | } |
123 | #else | 122 | #endif |
124 | static int save_highmem(void) { return 0; } | ||
125 | int restore_highmem(void) { return 0; } | ||
126 | #endif /* CONFIG_HIGHMEM */ | ||
127 | |||
128 | 123 | ||
129 | static int pfn_is_nosave(unsigned long pfn) | 124 | static int pfn_is_nosave(unsigned long pfn) |
130 | { | 125 | { |
@@ -216,7 +211,7 @@ static void copy_data_pages(struct pbe *pblist) | |||
216 | * free_pagedir - free pages allocated with alloc_pagedir() | 211 | * free_pagedir - free pages allocated with alloc_pagedir() |
217 | */ | 212 | */ |
218 | 213 | ||
219 | static void free_pagedir(struct pbe *pblist) | 214 | void free_pagedir(struct pbe *pblist) |
220 | { | 215 | { |
221 | struct pbe *pbe; | 216 | struct pbe *pbe; |
222 | 217 | ||
@@ -269,9 +264,30 @@ void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) | |||
269 | pr_debug("create_pbe_list(): initialized %d PBEs\n", num); | 264 | pr_debug("create_pbe_list(): initialized %d PBEs\n", num); |
270 | } | 265 | } |
271 | 266 | ||
272 | static void *alloc_image_page(void) | 267 | /** |
268 | * @safe_needed - on resume, for storing the PBE list and the image, | ||
269 | * we can only use memory pages that do not conflict with the pages | ||
270 | * which had been used before suspend. | ||
271 | * | ||
272 | * The unsafe pages are marked with the PG_nosave_free flag | ||
273 | * | ||
274 | * Allocated but unusable (ie eaten) memory pages should be marked | ||
275 | * so that swsusp_free() can release them | ||
276 | */ | ||
277 | |||
278 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | ||
273 | { | 279 | { |
274 | void *res = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); | 280 | void *res; |
281 | |||
282 | if (safe_needed) | ||
283 | do { | ||
284 | res = (void *)get_zeroed_page(gfp_mask); | ||
285 | if (res && PageNosaveFree(virt_to_page(res))) | ||
286 | /* This is for swsusp_free() */ | ||
287 | SetPageNosave(virt_to_page(res)); | ||
288 | } while (res && PageNosaveFree(virt_to_page(res))); | ||
289 | else | ||
290 | res = (void *)get_zeroed_page(gfp_mask); | ||
275 | if (res) { | 291 | if (res) { |
276 | SetPageNosave(virt_to_page(res)); | 292 | SetPageNosave(virt_to_page(res)); |
277 | SetPageNosaveFree(virt_to_page(res)); | 293 | SetPageNosaveFree(virt_to_page(res)); |
@@ -279,6 +295,11 @@ static void *alloc_image_page(void) | |||
279 | return res; | 295 | return res; |
280 | } | 296 | } |
281 | 297 | ||
298 | unsigned long get_safe_page(gfp_t gfp_mask) | ||
299 | { | ||
300 | return (unsigned long)alloc_image_page(gfp_mask, 1); | ||
301 | } | ||
302 | |||
282 | /** | 303 | /** |
283 | * alloc_pagedir - Allocate the page directory. | 304 | * alloc_pagedir - Allocate the page directory. |
284 | * | 305 | * |
@@ -292,7 +313,7 @@ static void *alloc_image_page(void) | |||
292 | * On each page we set up a list of struct_pbe elements. | 313 | * On each page we set up a list of struct_pbe elements. |
293 | */ | 314 | */ |
294 | 315 | ||
295 | struct pbe *alloc_pagedir(unsigned int nr_pages) | 316 | struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) |
296 | { | 317 | { |
297 | unsigned int num; | 318 | unsigned int num; |
298 | struct pbe *pblist, *pbe; | 319 | struct pbe *pblist, *pbe; |
@@ -301,12 +322,12 @@ struct pbe *alloc_pagedir(unsigned int nr_pages) | |||
301 | return NULL; | 322 | return NULL; |
302 | 323 | ||
303 | pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); | 324 | pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); |
304 | pblist = alloc_image_page(); | 325 | pblist = alloc_image_page(gfp_mask, safe_needed); |
305 | /* FIXME: rewrite this ugly loop */ | 326 | /* FIXME: rewrite this ugly loop */ |
306 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; | 327 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; |
307 | pbe = pbe->next, num += PBES_PER_PAGE) { | 328 | pbe = pbe->next, num += PBES_PER_PAGE) { |
308 | pbe += PB_PAGE_SKIP; | 329 | pbe += PB_PAGE_SKIP; |
309 | pbe->next = alloc_image_page(); | 330 | pbe->next = alloc_image_page(gfp_mask, safe_needed); |
310 | } | 331 | } |
311 | if (!pbe) { /* get_zeroed_page() failed */ | 332 | if (!pbe) { /* get_zeroed_page() failed */ |
312 | free_pagedir(pblist); | 333 | free_pagedir(pblist); |
@@ -354,24 +375,32 @@ static int enough_free_mem(unsigned int nr_pages) | |||
354 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | 375 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); |
355 | } | 376 | } |
356 | 377 | ||
378 | int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) | ||
379 | { | ||
380 | struct pbe *p; | ||
381 | |||
382 | for_each_pbe (p, pblist) { | ||
383 | p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed); | ||
384 | if (!p->address) | ||
385 | return -ENOMEM; | ||
386 | } | ||
387 | return 0; | ||
388 | } | ||
357 | 389 | ||
358 | static struct pbe *swsusp_alloc(unsigned int nr_pages) | 390 | static struct pbe *swsusp_alloc(unsigned int nr_pages) |
359 | { | 391 | { |
360 | struct pbe *pblist, *p; | 392 | struct pbe *pblist; |
361 | 393 | ||
362 | if (!(pblist = alloc_pagedir(nr_pages))) { | 394 | if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { |
363 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); | 395 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); |
364 | return NULL; | 396 | return NULL; |
365 | } | 397 | } |
366 | create_pbe_list(pblist, nr_pages); | 398 | create_pbe_list(pblist, nr_pages); |
367 | 399 | ||
368 | for_each_pbe (p, pblist) { | 400 | if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { |
369 | p->address = (unsigned long)alloc_image_page(); | 401 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); |
370 | if (!p->address) { | 402 | swsusp_free(); |
371 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); | 403 | return NULL; |
372 | swsusp_free(); | ||
373 | return NULL; | ||
374 | } | ||
375 | } | 404 | } |
376 | 405 | ||
377 | return pblist; | 406 | return pblist; |
@@ -382,11 +411,6 @@ asmlinkage int swsusp_save(void) | |||
382 | unsigned int nr_pages; | 411 | unsigned int nr_pages; |
383 | 412 | ||
384 | pr_debug("swsusp: critical section: \n"); | 413 | pr_debug("swsusp: critical section: \n"); |
385 | if (save_highmem()) { | ||
386 | printk(KERN_CRIT "swsusp: Not enough free pages for highmem\n"); | ||
387 | restore_highmem(); | ||
388 | return -ENOMEM; | ||
389 | } | ||
390 | 414 | ||
391 | drain_local_pages(); | 415 | drain_local_pages(); |
392 | nr_pages = count_data_pages(); | 416 | nr_pages = count_data_pages(); |
@@ -406,11 +430,6 @@ asmlinkage int swsusp_save(void) | |||
406 | return -ENOMEM; | 430 | return -ENOMEM; |
407 | } | 431 | } |
408 | 432 | ||
409 | if (!enough_swap(nr_pages)) { | ||
410 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | ||
411 | return -ENOSPC; | ||
412 | } | ||
413 | |||
414 | pagedir_nosave = swsusp_alloc(nr_pages); | 433 | pagedir_nosave = swsusp_alloc(nr_pages); |
415 | if (!pagedir_nosave) | 434 | if (!pagedir_nosave) |
416 | return -ENOMEM; | 435 | return -ENOMEM; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index e1ab28b9b217..c05f46e7348f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -73,6 +73,14 @@ | |||
73 | 73 | ||
74 | #include "power.h" | 74 | #include "power.h" |
75 | 75 | ||
76 | #ifdef CONFIG_HIGHMEM | ||
77 | int save_highmem(void); | ||
78 | int restore_highmem(void); | ||
79 | #else | ||
80 | static int save_highmem(void) { return 0; } | ||
81 | static int restore_highmem(void) { return 0; } | ||
82 | #endif | ||
83 | |||
76 | #define CIPHER "aes" | 84 | #define CIPHER "aes" |
77 | #define MAXKEY 32 | 85 | #define MAXKEY 32 |
78 | #define MAXIV 32 | 86 | #define MAXIV 32 |
@@ -500,6 +508,26 @@ static int write_pagedir(void) | |||
500 | } | 508 | } |
501 | 509 | ||
502 | /** | 510 | /** |
511 | * enough_swap - Make sure we have enough swap to save the image. | ||
512 | * | ||
513 | * Returns TRUE or FALSE after checking the total amount of swap | ||
514 | * space avaiable. | ||
515 | * | ||
516 | * FIXME: si_swapinfo(&i) returns all swap devices information. | ||
517 | * We should only consider resume_device. | ||
518 | */ | ||
519 | |||
520 | static int enough_swap(unsigned int nr_pages) | ||
521 | { | ||
522 | struct sysinfo i; | ||
523 | |||
524 | si_swapinfo(&i); | ||
525 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | ||
526 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | ||
527 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
528 | } | ||
529 | |||
530 | /** | ||
503 | * write_suspend_image - Write entire image and metadata. | 531 | * write_suspend_image - Write entire image and metadata. |
504 | * | 532 | * |
505 | */ | 533 | */ |
@@ -507,6 +535,11 @@ static int write_suspend_image(void) | |||
507 | { | 535 | { |
508 | int error; | 536 | int error; |
509 | 537 | ||
538 | if (!enough_swap(nr_copy_pages)) { | ||
539 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | ||
540 | return -ENOSPC; | ||
541 | } | ||
542 | |||
510 | init_header(); | 543 | init_header(); |
511 | if ((error = data_write())) | 544 | if ((error = data_write())) |
512 | goto FreeData; | 545 | goto FreeData; |
@@ -526,27 +559,6 @@ static int write_suspend_image(void) | |||
526 | goto Done; | 559 | goto Done; |
527 | } | 560 | } |
528 | 561 | ||
529 | /** | ||
530 | * enough_swap - Make sure we have enough swap to save the image. | ||
531 | * | ||
532 | * Returns TRUE or FALSE after checking the total amount of swap | ||
533 | * space avaiable. | ||
534 | * | ||
535 | * FIXME: si_swapinfo(&i) returns all swap devices information. | ||
536 | * We should only consider resume_device. | ||
537 | */ | ||
538 | |||
539 | int enough_swap(unsigned int nr_pages) | ||
540 | { | ||
541 | struct sysinfo i; | ||
542 | |||
543 | si_swapinfo(&i); | ||
544 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | ||
545 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | ||
546 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
547 | } | ||
548 | |||
549 | |||
550 | /* It is important _NOT_ to umount filesystems at this point. We want | 562 | /* It is important _NOT_ to umount filesystems at this point. We want |
551 | * them synced (in case something goes wrong) but we DO not want to mark | 563 | * them synced (in case something goes wrong) but we DO not want to mark |
552 | * filesystem clean: it is not. (And it does not matter, if we resume | 564 | * filesystem clean: it is not. (And it does not matter, if we resume |
@@ -556,12 +568,15 @@ int swsusp_write(void) | |||
556 | { | 568 | { |
557 | int error; | 569 | int error; |
558 | 570 | ||
571 | if ((error = swsusp_swap_check())) { | ||
572 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | ||
573 | return error; | ||
574 | } | ||
559 | lock_swapdevices(); | 575 | lock_swapdevices(); |
560 | error = write_suspend_image(); | 576 | error = write_suspend_image(); |
561 | /* This will unlock ignored swap devices since writing is finished */ | 577 | /* This will unlock ignored swap devices since writing is finished */ |
562 | lock_swapdevices(); | 578 | lock_swapdevices(); |
563 | return error; | 579 | return error; |
564 | |||
565 | } | 580 | } |
566 | 581 | ||
567 | 582 | ||
@@ -569,6 +584,7 @@ int swsusp_write(void) | |||
569 | int swsusp_suspend(void) | 584 | int swsusp_suspend(void) |
570 | { | 585 | { |
571 | int error; | 586 | int error; |
587 | |||
572 | if ((error = arch_prepare_suspend())) | 588 | if ((error = arch_prepare_suspend())) |
573 | return error; | 589 | return error; |
574 | local_irq_disable(); | 590 | local_irq_disable(); |
@@ -580,15 +596,12 @@ int swsusp_suspend(void) | |||
580 | */ | 596 | */ |
581 | if ((error = device_power_down(PMSG_FREEZE))) { | 597 | if ((error = device_power_down(PMSG_FREEZE))) { |
582 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); | 598 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); |
583 | local_irq_enable(); | 599 | goto Enable_irqs; |
584 | return error; | ||
585 | } | 600 | } |
586 | 601 | ||
587 | if ((error = swsusp_swap_check())) { | 602 | if ((error = save_highmem())) { |
588 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | 603 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); |
589 | device_power_up(); | 604 | goto Restore_highmem; |
590 | local_irq_enable(); | ||
591 | return error; | ||
592 | } | 605 | } |
593 | 606 | ||
594 | save_processor_state(); | 607 | save_processor_state(); |
@@ -596,8 +609,10 @@ int swsusp_suspend(void) | |||
596 | printk(KERN_ERR "Error %d suspending\n", error); | 609 | printk(KERN_ERR "Error %d suspending\n", error); |
597 | /* Restore control flow magically appears here */ | 610 | /* Restore control flow magically appears here */ |
598 | restore_processor_state(); | 611 | restore_processor_state(); |
612 | Restore_highmem: | ||
599 | restore_highmem(); | 613 | restore_highmem(); |
600 | device_power_up(); | 614 | device_power_up(); |
615 | Enable_irqs: | ||
601 | local_irq_enable(); | 616 | local_irq_enable(); |
602 | return error; | 617 | return error; |
603 | } | 618 | } |
@@ -629,127 +644,43 @@ int swsusp_resume(void) | |||
629 | } | 644 | } |
630 | 645 | ||
631 | /** | 646 | /** |
632 | * On resume, for storing the PBE list and the image, | 647 | * mark_unsafe_pages - mark the pages that cannot be used for storing |
633 | * we can only use memory pages that do not conflict with the pages | 648 | * the image during resume, because they conflict with the pages that |
634 | * which had been used before suspend. | 649 | * had been used before suspend |
635 | * | ||
636 | * We don't know which pages are usable until we allocate them. | ||
637 | * | ||
638 | * Allocated but unusable (ie eaten) memory pages are marked so that | ||
639 | * swsusp_free() can release them | ||
640 | */ | ||
641 | |||
642 | unsigned long get_safe_page(gfp_t gfp_mask) | ||
643 | { | ||
644 | unsigned long m; | ||
645 | |||
646 | do { | ||
647 | m = get_zeroed_page(gfp_mask); | ||
648 | if (m && PageNosaveFree(virt_to_page(m))) | ||
649 | /* This is for swsusp_free() */ | ||
650 | SetPageNosave(virt_to_page(m)); | ||
651 | } while (m && PageNosaveFree(virt_to_page(m))); | ||
652 | if (m) { | ||
653 | /* This is for swsusp_free() */ | ||
654 | SetPageNosave(virt_to_page(m)); | ||
655 | SetPageNosaveFree(virt_to_page(m)); | ||
656 | } | ||
657 | return m; | ||
658 | } | ||
659 | |||
660 | /** | ||
661 | * check_pagedir - We ensure here that pages that the PBEs point to | ||
662 | * won't collide with pages where we're going to restore from the loaded | ||
663 | * pages later | ||
664 | */ | ||
665 | |||
666 | static int check_pagedir(struct pbe *pblist) | ||
667 | { | ||
668 | struct pbe *p; | ||
669 | |||
670 | /* This is necessary, so that we can free allocated pages | ||
671 | * in case of failure | ||
672 | */ | ||
673 | for_each_pbe (p, pblist) | ||
674 | p->address = 0UL; | ||
675 | |||
676 | for_each_pbe (p, pblist) { | ||
677 | p->address = get_safe_page(GFP_ATOMIC); | ||
678 | if (!p->address) | ||
679 | return -ENOMEM; | ||
680 | } | ||
681 | return 0; | ||
682 | } | ||
683 | |||
684 | /** | ||
685 | * swsusp_pagedir_relocate - It is possible, that some memory pages | ||
686 | * occupied by the list of PBEs collide with pages where we're going to | ||
687 | * restore from the loaded pages later. We relocate them here. | ||
688 | */ | 650 | */ |
689 | 651 | ||
690 | static struct pbe *swsusp_pagedir_relocate(struct pbe *pblist) | 652 | static void mark_unsafe_pages(struct pbe *pblist) |
691 | { | 653 | { |
692 | struct zone *zone; | 654 | struct zone *zone; |
693 | unsigned long zone_pfn; | 655 | unsigned long zone_pfn; |
694 | struct pbe *pbpage, *tail, *p; | 656 | struct pbe *p; |
695 | void *m; | ||
696 | int rel = 0; | ||
697 | 657 | ||
698 | if (!pblist) /* a sanity check */ | 658 | if (!pblist) /* a sanity check */ |
699 | return NULL; | 659 | return; |
700 | |||
701 | pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n", | ||
702 | swsusp_info.pagedir_pages); | ||
703 | 660 | ||
704 | /* Clear page flags */ | 661 | /* Clear page flags */ |
705 | |||
706 | for_each_zone (zone) { | 662 | for_each_zone (zone) { |
707 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 663 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) |
708 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) | 664 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) |
709 | ClearPageNosaveFree(pfn_to_page(zone_pfn + | 665 | ClearPageNosaveFree(pfn_to_page(zone_pfn + |
710 | zone->zone_start_pfn)); | 666 | zone->zone_start_pfn)); |
711 | } | 667 | } |
712 | 668 | ||
713 | /* Mark orig addresses */ | 669 | /* Mark orig addresses */ |
714 | |||
715 | for_each_pbe (p, pblist) | 670 | for_each_pbe (p, pblist) |
716 | SetPageNosaveFree(virt_to_page(p->orig_address)); | 671 | SetPageNosaveFree(virt_to_page(p->orig_address)); |
717 | 672 | ||
718 | tail = pblist + PB_PAGE_SKIP; | 673 | } |
719 | |||
720 | /* Relocate colliding pages */ | ||
721 | |||
722 | for_each_pb_page (pbpage, pblist) { | ||
723 | if (PageNosaveFree(virt_to_page((unsigned long)pbpage))) { | ||
724 | m = (void *)get_safe_page(GFP_ATOMIC | __GFP_COLD); | ||
725 | if (!m) | ||
726 | return NULL; | ||
727 | memcpy(m, (void *)pbpage, PAGE_SIZE); | ||
728 | if (pbpage == pblist) | ||
729 | pblist = (struct pbe *)m; | ||
730 | else | ||
731 | tail->next = (struct pbe *)m; | ||
732 | pbpage = (struct pbe *)m; | ||
733 | |||
734 | /* We have to link the PBEs again */ | ||
735 | for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++) | ||
736 | if (p->next) /* needed to save the end */ | ||
737 | p->next = p + 1; | ||
738 | |||
739 | rel++; | ||
740 | } | ||
741 | tail = pbpage + PB_PAGE_SKIP; | ||
742 | } | ||
743 | 674 | ||
744 | /* This is for swsusp_free() */ | 675 | static void copy_page_backup_list(struct pbe *dst, struct pbe *src) |
745 | for_each_pb_page (pbpage, pblist) { | 676 | { |
746 | SetPageNosave(virt_to_page(pbpage)); | 677 | /* We assume both lists contain the same number of elements */ |
747 | SetPageNosaveFree(virt_to_page(pbpage)); | 678 | while (src) { |
679 | dst->orig_address = src->orig_address; | ||
680 | dst->swap_address = src->swap_address; | ||
681 | dst = dst->next; | ||
682 | src = src->next; | ||
748 | } | 683 | } |
749 | |||
750 | printk("swsusp: Relocated %d pages\n", rel); | ||
751 | |||
752 | return pblist; | ||
753 | } | 684 | } |
754 | 685 | ||
755 | /* | 686 | /* |
@@ -888,7 +819,7 @@ static int check_sig(void) | |||
888 | * Reset swap signature now. | 819 | * Reset swap signature now. |
889 | */ | 820 | */ |
890 | error = bio_write_page(0, &swsusp_header); | 821 | error = bio_write_page(0, &swsusp_header); |
891 | } else { | 822 | } else { |
892 | return -EINVAL; | 823 | return -EINVAL; |
893 | } | 824 | } |
894 | if (!error) | 825 | if (!error) |
@@ -990,20 +921,25 @@ static int read_suspend_image(void) | |||
990 | int error = 0; | 921 | int error = 0; |
991 | struct pbe *p; | 922 | struct pbe *p; |
992 | 923 | ||
993 | if (!(p = alloc_pagedir(nr_copy_pages))) | 924 | if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0))) |
994 | return -ENOMEM; | 925 | return -ENOMEM; |
995 | 926 | ||
996 | if ((error = read_pagedir(p))) | 927 | if ((error = read_pagedir(p))) |
997 | return error; | 928 | return error; |
998 | |||
999 | create_pbe_list(p, nr_copy_pages); | 929 | create_pbe_list(p, nr_copy_pages); |
1000 | 930 | mark_unsafe_pages(p); | |
1001 | if (!(pagedir_nosave = swsusp_pagedir_relocate(p))) | 931 | pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); |
932 | if (pagedir_nosave) { | ||
933 | create_pbe_list(pagedir_nosave, nr_copy_pages); | ||
934 | copy_page_backup_list(pagedir_nosave, p); | ||
935 | } | ||
936 | free_pagedir(p); | ||
937 | if (!pagedir_nosave) | ||
1002 | return -ENOMEM; | 938 | return -ENOMEM; |
1003 | 939 | ||
1004 | /* Allocate memory for the image and read the data from swap */ | 940 | /* Allocate memory for the image and read the data from swap */ |
1005 | 941 | ||
1006 | error = check_pagedir(pagedir_nosave); | 942 | error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1); |
1007 | 943 | ||
1008 | if (!error) | 944 | if (!error) |
1009 | error = data_read(pagedir_nosave); | 945 | error = data_read(pagedir_nosave); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5b8dd98a230e..b88d4186cd7a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -155,7 +155,7 @@ int ptrace_attach(struct task_struct *task) | |||
155 | retval = -EPERM; | 155 | retval = -EPERM; |
156 | if (task->pid <= 1) | 156 | if (task->pid <= 1) |
157 | goto bad; | 157 | goto bad; |
158 | if (task == current) | 158 | if (task->tgid == current->tgid) |
159 | goto bad; | 159 | goto bad; |
160 | /* the same process cannot be attached many times */ | 160 | /* the same process cannot be attached many times */ |
161 | if (task->ptrace & PT_PTRACED) | 161 | if (task->ptrace & PT_PTRACED) |
diff --git a/kernel/sched.c b/kernel/sched.c index 3ce26954be12..b6506671b2be 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -206,6 +206,7 @@ struct runqueue { | |||
206 | */ | 206 | */ |
207 | unsigned long nr_running; | 207 | unsigned long nr_running; |
208 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
209 | unsigned long prio_bias; | ||
209 | unsigned long cpu_load[3]; | 210 | unsigned long cpu_load[3]; |
210 | #endif | 211 | #endif |
211 | unsigned long long nr_switches; | 212 | unsigned long long nr_switches; |
@@ -659,13 +660,68 @@ static int effective_prio(task_t *p) | |||
659 | return prio; | 660 | return prio; |
660 | } | 661 | } |
661 | 662 | ||
663 | #ifdef CONFIG_SMP | ||
664 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
665 | { | ||
666 | rq->prio_bias += MAX_PRIO - prio; | ||
667 | } | ||
668 | |||
669 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
670 | { | ||
671 | rq->prio_bias -= MAX_PRIO - prio; | ||
672 | } | ||
673 | |||
674 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
675 | { | ||
676 | rq->nr_running++; | ||
677 | if (rt_task(p)) { | ||
678 | if (p != rq->migration_thread) | ||
679 | /* | ||
680 | * The migration thread does the actual balancing. Do | ||
681 | * not bias by its priority as the ultra high priority | ||
682 | * will skew balancing adversely. | ||
683 | */ | ||
684 | inc_prio_bias(rq, p->prio); | ||
685 | } else | ||
686 | inc_prio_bias(rq, p->static_prio); | ||
687 | } | ||
688 | |||
689 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
690 | { | ||
691 | rq->nr_running--; | ||
692 | if (rt_task(p)) { | ||
693 | if (p != rq->migration_thread) | ||
694 | dec_prio_bias(rq, p->prio); | ||
695 | } else | ||
696 | dec_prio_bias(rq, p->static_prio); | ||
697 | } | ||
698 | #else | ||
699 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
700 | { | ||
701 | } | ||
702 | |||
703 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
704 | { | ||
705 | } | ||
706 | |||
707 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
708 | { | ||
709 | rq->nr_running++; | ||
710 | } | ||
711 | |||
712 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
713 | { | ||
714 | rq->nr_running--; | ||
715 | } | ||
716 | #endif | ||
717 | |||
662 | /* | 718 | /* |
663 | * __activate_task - move a task to the runqueue. | 719 | * __activate_task - move a task to the runqueue. |
664 | */ | 720 | */ |
665 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 721 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
666 | { | 722 | { |
667 | enqueue_task(p, rq->active); | 723 | enqueue_task(p, rq->active); |
668 | rq->nr_running++; | 724 | inc_nr_running(p, rq); |
669 | } | 725 | } |
670 | 726 | ||
671 | /* | 727 | /* |
@@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
674 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 730 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
675 | { | 731 | { |
676 | enqueue_task_head(p, rq->active); | 732 | enqueue_task_head(p, rq->active); |
677 | rq->nr_running++; | 733 | inc_nr_running(p, rq); |
678 | } | 734 | } |
679 | 735 | ||
680 | static int recalc_task_prio(task_t *p, unsigned long long now) | 736 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -759,7 +815,8 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
759 | } | 815 | } |
760 | #endif | 816 | #endif |
761 | 817 | ||
762 | p->prio = recalc_task_prio(p, now); | 818 | if (!rt_task(p)) |
819 | p->prio = recalc_task_prio(p, now); | ||
763 | 820 | ||
764 | /* | 821 | /* |
765 | * This checks to make sure it's not an uninterruptible task | 822 | * This checks to make sure it's not an uninterruptible task |
@@ -793,7 +850,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
793 | */ | 850 | */ |
794 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 851 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
795 | { | 852 | { |
796 | rq->nr_running--; | 853 | dec_nr_running(p, rq); |
797 | dequeue_task(p, p->array); | 854 | dequeue_task(p, p->array); |
798 | p->array = NULL; | 855 | p->array = NULL; |
799 | } | 856 | } |
@@ -808,21 +865,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) | |||
808 | #ifdef CONFIG_SMP | 865 | #ifdef CONFIG_SMP |
809 | static void resched_task(task_t *p) | 866 | static void resched_task(task_t *p) |
810 | { | 867 | { |
811 | int need_resched, nrpolling; | 868 | int cpu; |
812 | 869 | ||
813 | assert_spin_locked(&task_rq(p)->lock); | 870 | assert_spin_locked(&task_rq(p)->lock); |
814 | 871 | ||
815 | /* minimise the chance of sending an interrupt to poll_idle() */ | 872 | if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) |
816 | nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 873 | return; |
817 | need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); | 874 | |
818 | nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 875 | set_tsk_thread_flag(p, TIF_NEED_RESCHED); |
876 | |||
877 | cpu = task_cpu(p); | ||
878 | if (cpu == smp_processor_id()) | ||
879 | return; | ||
819 | 880 | ||
820 | if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) | 881 | /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ |
821 | smp_send_reschedule(task_cpu(p)); | 882 | smp_mb(); |
883 | if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) | ||
884 | smp_send_reschedule(cpu); | ||
822 | } | 885 | } |
823 | #else | 886 | #else |
824 | static inline void resched_task(task_t *p) | 887 | static inline void resched_task(task_t *p) |
825 | { | 888 | { |
889 | assert_spin_locked(&task_rq(p)->lock); | ||
826 | set_tsk_need_resched(p); | 890 | set_tsk_need_resched(p); |
827 | } | 891 | } |
828 | #endif | 892 | #endif |
@@ -930,27 +994,61 @@ void kick_process(task_t *p) | |||
930 | * We want to under-estimate the load of migration sources, to | 994 | * We want to under-estimate the load of migration sources, to |
931 | * balance conservatively. | 995 | * balance conservatively. |
932 | */ | 996 | */ |
933 | static inline unsigned long source_load(int cpu, int type) | 997 | static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) |
934 | { | 998 | { |
935 | runqueue_t *rq = cpu_rq(cpu); | 999 | runqueue_t *rq = cpu_rq(cpu); |
936 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1000 | unsigned long running = rq->nr_running; |
1001 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1002 | load_now = running * SCHED_LOAD_SCALE; | ||
1003 | |||
937 | if (type == 0) | 1004 | if (type == 0) |
938 | return load_now; | 1005 | source_load = load_now; |
1006 | else | ||
1007 | source_load = min(cpu_load, load_now); | ||
939 | 1008 | ||
940 | return min(rq->cpu_load[type-1], load_now); | 1009 | if (running > 1 || (idle == NOT_IDLE && running)) |
1010 | /* | ||
1011 | * If we are busy rebalancing the load is biased by | ||
1012 | * priority to create 'nice' support across cpus. When | ||
1013 | * idle rebalancing we should only bias the source_load if | ||
1014 | * there is more than one task running on that queue to | ||
1015 | * prevent idle rebalance from trying to pull tasks from a | ||
1016 | * queue with only one running task. | ||
1017 | */ | ||
1018 | source_load = source_load * rq->prio_bias / running; | ||
1019 | |||
1020 | return source_load; | ||
1021 | } | ||
1022 | |||
1023 | static inline unsigned long source_load(int cpu, int type) | ||
1024 | { | ||
1025 | return __source_load(cpu, type, NOT_IDLE); | ||
941 | } | 1026 | } |
942 | 1027 | ||
943 | /* | 1028 | /* |
944 | * Return a high guess at the load of a migration-target cpu | 1029 | * Return a high guess at the load of a migration-target cpu |
945 | */ | 1030 | */ |
946 | static inline unsigned long target_load(int cpu, int type) | 1031 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) |
947 | { | 1032 | { |
948 | runqueue_t *rq = cpu_rq(cpu); | 1033 | runqueue_t *rq = cpu_rq(cpu); |
949 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1034 | unsigned long running = rq->nr_running; |
1035 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1036 | load_now = running * SCHED_LOAD_SCALE; | ||
1037 | |||
950 | if (type == 0) | 1038 | if (type == 0) |
951 | return load_now; | 1039 | target_load = load_now; |
1040 | else | ||
1041 | target_load = max(cpu_load, load_now); | ||
1042 | |||
1043 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1044 | target_load = target_load * rq->prio_bias / running; | ||
952 | 1045 | ||
953 | return max(rq->cpu_load[type-1], load_now); | 1046 | return target_load; |
1047 | } | ||
1048 | |||
1049 | static inline unsigned long target_load(int cpu, int type) | ||
1050 | { | ||
1051 | return __target_load(cpu, type, NOT_IDLE); | ||
954 | } | 1052 | } |
955 | 1053 | ||
956 | /* | 1054 | /* |
@@ -1411,7 +1509,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1411 | list_add_tail(&p->run_list, ¤t->run_list); | 1509 | list_add_tail(&p->run_list, ¤t->run_list); |
1412 | p->array = current->array; | 1510 | p->array = current->array; |
1413 | p->array->nr_active++; | 1511 | p->array->nr_active++; |
1414 | rq->nr_running++; | 1512 | inc_nr_running(p, rq); |
1415 | } | 1513 | } |
1416 | set_need_resched(); | 1514 | set_need_resched(); |
1417 | } else | 1515 | } else |
@@ -1756,9 +1854,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1756 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1854 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1757 | { | 1855 | { |
1758 | dequeue_task(p, src_array); | 1856 | dequeue_task(p, src_array); |
1759 | src_rq->nr_running--; | 1857 | dec_nr_running(p, src_rq); |
1760 | set_task_cpu(p, this_cpu); | 1858 | set_task_cpu(p, this_cpu); |
1761 | this_rq->nr_running++; | 1859 | inc_nr_running(p, this_rq); |
1762 | enqueue_task(p, this_array); | 1860 | enqueue_task(p, this_array); |
1763 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1861 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1764 | + this_rq->timestamp_last_tick; | 1862 | + this_rq->timestamp_last_tick; |
@@ -1937,9 +2035,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1937 | 2035 | ||
1938 | /* Bias balancing toward cpus of our domain */ | 2036 | /* Bias balancing toward cpus of our domain */ |
1939 | if (local_group) | 2037 | if (local_group) |
1940 | load = target_load(i, load_idx); | 2038 | load = __target_load(i, load_idx, idle); |
1941 | else | 2039 | else |
1942 | load = source_load(i, load_idx); | 2040 | load = __source_load(i, load_idx, idle); |
1943 | 2041 | ||
1944 | avg_load += load; | 2042 | avg_load += load; |
1945 | } | 2043 | } |
@@ -2044,14 +2142,15 @@ out_balanced: | |||
2044 | /* | 2142 | /* |
2045 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 2143 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
2046 | */ | 2144 | */ |
2047 | static runqueue_t *find_busiest_queue(struct sched_group *group) | 2145 | static runqueue_t *find_busiest_queue(struct sched_group *group, |
2146 | enum idle_type idle) | ||
2048 | { | 2147 | { |
2049 | unsigned long load, max_load = 0; | 2148 | unsigned long load, max_load = 0; |
2050 | runqueue_t *busiest = NULL; | 2149 | runqueue_t *busiest = NULL; |
2051 | int i; | 2150 | int i; |
2052 | 2151 | ||
2053 | for_each_cpu_mask(i, group->cpumask) { | 2152 | for_each_cpu_mask(i, group->cpumask) { |
2054 | load = source_load(i, 0); | 2153 | load = __source_load(i, 0, idle); |
2055 | 2154 | ||
2056 | if (load > max_load) { | 2155 | if (load > max_load) { |
2057 | max_load = load; | 2156 | max_load = load; |
@@ -2095,7 +2194,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2095 | goto out_balanced; | 2194 | goto out_balanced; |
2096 | } | 2195 | } |
2097 | 2196 | ||
2098 | busiest = find_busiest_queue(group); | 2197 | busiest = find_busiest_queue(group, idle); |
2099 | if (!busiest) { | 2198 | if (!busiest) { |
2100 | schedstat_inc(sd, lb_nobusyq[idle]); | 2199 | schedstat_inc(sd, lb_nobusyq[idle]); |
2101 | goto out_balanced; | 2200 | goto out_balanced; |
@@ -2218,7 +2317,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2218 | goto out_balanced; | 2317 | goto out_balanced; |
2219 | } | 2318 | } |
2220 | 2319 | ||
2221 | busiest = find_busiest_queue(group); | 2320 | busiest = find_busiest_queue(group, NEWLY_IDLE); |
2222 | if (!busiest) { | 2321 | if (!busiest) { |
2223 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2322 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
2224 | goto out_balanced; | 2323 | goto out_balanced; |
@@ -3451,8 +3550,10 @@ void set_user_nice(task_t *p, long nice) | |||
3451 | goto out_unlock; | 3550 | goto out_unlock; |
3452 | } | 3551 | } |
3453 | array = p->array; | 3552 | array = p->array; |
3454 | if (array) | 3553 | if (array) { |
3455 | dequeue_task(p, array); | 3554 | dequeue_task(p, array); |
3555 | dec_prio_bias(rq, p->static_prio); | ||
3556 | } | ||
3456 | 3557 | ||
3457 | old_prio = p->prio; | 3558 | old_prio = p->prio; |
3458 | new_prio = NICE_TO_PRIO(nice); | 3559 | new_prio = NICE_TO_PRIO(nice); |
@@ -3462,6 +3563,7 @@ void set_user_nice(task_t *p, long nice) | |||
3462 | 3563 | ||
3463 | if (array) { | 3564 | if (array) { |
3464 | enqueue_task(p, array); | 3565 | enqueue_task(p, array); |
3566 | inc_prio_bias(rq, p->static_prio); | ||
3465 | /* | 3567 | /* |
3466 | * If the task increased its priority or is running and | 3568 | * If the task increased its priority or is running and |
3467 | * lowered its priority, then reschedule its CPU: | 3569 | * lowered its priority, then reschedule its CPU: |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index a2dcceb9437d..c67189a25d52 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -73,9 +73,6 @@ void softlockup_tick(struct pt_regs *regs) | |||
73 | static int watchdog(void * __bind_cpu) | 73 | static int watchdog(void * __bind_cpu) |
74 | { | 74 | { |
75 | struct sched_param param = { .sched_priority = 99 }; | 75 | struct sched_param param = { .sched_priority = 99 }; |
76 | int this_cpu = (long) __bind_cpu; | ||
77 | |||
78 | printk("softlockup thread %d started up.\n", this_cpu); | ||
79 | 76 | ||
80 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 77 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
81 | current->flags |= PF_NOFREEZE; | 78 | current->flags |= PF_NOFREEZE; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c4f35f96884d..9990e10192e8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -169,7 +169,7 @@ struct file_operations proc_sys_file_operations = { | |||
169 | 169 | ||
170 | extern struct proc_dir_entry *proc_sys_root; | 170 | extern struct proc_dir_entry *proc_sys_root; |
171 | 171 | ||
172 | static void register_proc_table(ctl_table *, struct proc_dir_entry *); | 172 | static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); |
173 | static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); | 173 | static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); |
174 | #endif | 174 | #endif |
175 | 175 | ||
@@ -992,10 +992,51 @@ static ctl_table dev_table[] = { | |||
992 | 992 | ||
993 | extern void init_irq_proc (void); | 993 | extern void init_irq_proc (void); |
994 | 994 | ||
995 | static DEFINE_SPINLOCK(sysctl_lock); | ||
996 | |||
997 | /* called under sysctl_lock */ | ||
998 | static int use_table(struct ctl_table_header *p) | ||
999 | { | ||
1000 | if (unlikely(p->unregistering)) | ||
1001 | return 0; | ||
1002 | p->used++; | ||
1003 | return 1; | ||
1004 | } | ||
1005 | |||
1006 | /* called under sysctl_lock */ | ||
1007 | static void unuse_table(struct ctl_table_header *p) | ||
1008 | { | ||
1009 | if (!--p->used) | ||
1010 | if (unlikely(p->unregistering)) | ||
1011 | complete(p->unregistering); | ||
1012 | } | ||
1013 | |||
1014 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
1015 | static void start_unregistering(struct ctl_table_header *p) | ||
1016 | { | ||
1017 | /* | ||
1018 | * if p->used is 0, nobody will ever touch that entry again; | ||
1019 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
1020 | */ | ||
1021 | if (unlikely(p->used)) { | ||
1022 | struct completion wait; | ||
1023 | init_completion(&wait); | ||
1024 | p->unregistering = &wait; | ||
1025 | spin_unlock(&sysctl_lock); | ||
1026 | wait_for_completion(&wait); | ||
1027 | spin_lock(&sysctl_lock); | ||
1028 | } | ||
1029 | /* | ||
1030 | * do not remove from the list until nobody holds it; walking the | ||
1031 | * list in do_sysctl() relies on that. | ||
1032 | */ | ||
1033 | list_del_init(&p->ctl_entry); | ||
1034 | } | ||
1035 | |||
995 | void __init sysctl_init(void) | 1036 | void __init sysctl_init(void) |
996 | { | 1037 | { |
997 | #ifdef CONFIG_PROC_FS | 1038 | #ifdef CONFIG_PROC_FS |
998 | register_proc_table(root_table, proc_sys_root); | 1039 | register_proc_table(root_table, proc_sys_root, &root_table_header); |
999 | init_irq_proc(); | 1040 | init_irq_proc(); |
1000 | #endif | 1041 | #endif |
1001 | } | 1042 | } |
@@ -1004,6 +1045,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
1004 | void __user *newval, size_t newlen) | 1045 | void __user *newval, size_t newlen) |
1005 | { | 1046 | { |
1006 | struct list_head *tmp; | 1047 | struct list_head *tmp; |
1048 | int error = -ENOTDIR; | ||
1007 | 1049 | ||
1008 | if (nlen <= 0 || nlen >= CTL_MAXNAME) | 1050 | if (nlen <= 0 || nlen >= CTL_MAXNAME) |
1009 | return -ENOTDIR; | 1051 | return -ENOTDIR; |
@@ -1012,20 +1054,30 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
1012 | if (!oldlenp || get_user(old_len, oldlenp)) | 1054 | if (!oldlenp || get_user(old_len, oldlenp)) |
1013 | return -EFAULT; | 1055 | return -EFAULT; |
1014 | } | 1056 | } |
1057 | spin_lock(&sysctl_lock); | ||
1015 | tmp = &root_table_header.ctl_entry; | 1058 | tmp = &root_table_header.ctl_entry; |
1016 | do { | 1059 | do { |
1017 | struct ctl_table_header *head = | 1060 | struct ctl_table_header *head = |
1018 | list_entry(tmp, struct ctl_table_header, ctl_entry); | 1061 | list_entry(tmp, struct ctl_table_header, ctl_entry); |
1019 | void *context = NULL; | 1062 | void *context = NULL; |
1020 | int error = parse_table(name, nlen, oldval, oldlenp, | 1063 | |
1064 | if (!use_table(head)) | ||
1065 | continue; | ||
1066 | |||
1067 | spin_unlock(&sysctl_lock); | ||
1068 | |||
1069 | error = parse_table(name, nlen, oldval, oldlenp, | ||
1021 | newval, newlen, head->ctl_table, | 1070 | newval, newlen, head->ctl_table, |
1022 | &context); | 1071 | &context); |
1023 | kfree(context); | 1072 | kfree(context); |
1073 | |||
1074 | spin_lock(&sysctl_lock); | ||
1075 | unuse_table(head); | ||
1024 | if (error != -ENOTDIR) | 1076 | if (error != -ENOTDIR) |
1025 | return error; | 1077 | break; |
1026 | tmp = tmp->next; | 1078 | } while ((tmp = tmp->next) != &root_table_header.ctl_entry); |
1027 | } while (tmp != &root_table_header.ctl_entry); | 1079 | spin_unlock(&sysctl_lock); |
1028 | return -ENOTDIR; | 1080 | return error; |
1029 | } | 1081 | } |
1030 | 1082 | ||
1031 | asmlinkage long sys_sysctl(struct __sysctl_args __user *args) | 1083 | asmlinkage long sys_sysctl(struct __sysctl_args __user *args) |
@@ -1236,12 +1288,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, | |||
1236 | return NULL; | 1288 | return NULL; |
1237 | tmp->ctl_table = table; | 1289 | tmp->ctl_table = table; |
1238 | INIT_LIST_HEAD(&tmp->ctl_entry); | 1290 | INIT_LIST_HEAD(&tmp->ctl_entry); |
1291 | tmp->used = 0; | ||
1292 | tmp->unregistering = NULL; | ||
1293 | spin_lock(&sysctl_lock); | ||
1239 | if (insert_at_head) | 1294 | if (insert_at_head) |
1240 | list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); | 1295 | list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); |
1241 | else | 1296 | else |
1242 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); | 1297 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); |
1298 | spin_unlock(&sysctl_lock); | ||
1243 | #ifdef CONFIG_PROC_FS | 1299 | #ifdef CONFIG_PROC_FS |
1244 | register_proc_table(table, proc_sys_root); | 1300 | register_proc_table(table, proc_sys_root, tmp); |
1245 | #endif | 1301 | #endif |
1246 | return tmp; | 1302 | return tmp; |
1247 | } | 1303 | } |
@@ -1255,10 +1311,13 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, | |||
1255 | */ | 1311 | */ |
1256 | void unregister_sysctl_table(struct ctl_table_header * header) | 1312 | void unregister_sysctl_table(struct ctl_table_header * header) |
1257 | { | 1313 | { |
1258 | list_del(&header->ctl_entry); | 1314 | might_sleep(); |
1315 | spin_lock(&sysctl_lock); | ||
1316 | start_unregistering(header); | ||
1259 | #ifdef CONFIG_PROC_FS | 1317 | #ifdef CONFIG_PROC_FS |
1260 | unregister_proc_table(header->ctl_table, proc_sys_root); | 1318 | unregister_proc_table(header->ctl_table, proc_sys_root); |
1261 | #endif | 1319 | #endif |
1320 | spin_unlock(&sysctl_lock); | ||
1262 | kfree(header); | 1321 | kfree(header); |
1263 | } | 1322 | } |
1264 | 1323 | ||
@@ -1269,7 +1328,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) | |||
1269 | #ifdef CONFIG_PROC_FS | 1328 | #ifdef CONFIG_PROC_FS |
1270 | 1329 | ||
1271 | /* Scan the sysctl entries in table and add them all into /proc */ | 1330 | /* Scan the sysctl entries in table and add them all into /proc */ |
1272 | static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) | 1331 | static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) |
1273 | { | 1332 | { |
1274 | struct proc_dir_entry *de; | 1333 | struct proc_dir_entry *de; |
1275 | int len; | 1334 | int len; |
@@ -1305,13 +1364,14 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) | |||
1305 | de = create_proc_entry(table->procname, mode, root); | 1364 | de = create_proc_entry(table->procname, mode, root); |
1306 | if (!de) | 1365 | if (!de) |
1307 | continue; | 1366 | continue; |
1367 | de->set = set; | ||
1308 | de->data = (void *) table; | 1368 | de->data = (void *) table; |
1309 | if (table->proc_handler) | 1369 | if (table->proc_handler) |
1310 | de->proc_fops = &proc_sys_file_operations; | 1370 | de->proc_fops = &proc_sys_file_operations; |
1311 | } | 1371 | } |
1312 | table->de = de; | 1372 | table->de = de; |
1313 | if (de->mode & S_IFDIR) | 1373 | if (de->mode & S_IFDIR) |
1314 | register_proc_table(table->child, de); | 1374 | register_proc_table(table->child, de, set); |
1315 | } | 1375 | } |
1316 | } | 1376 | } |
1317 | 1377 | ||
@@ -1336,6 +1396,13 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root | |||
1336 | continue; | 1396 | continue; |
1337 | } | 1397 | } |
1338 | 1398 | ||
1399 | /* | ||
1400 | * In any case, mark the entry as goner; we'll keep it | ||
1401 | * around if it's busy, but we'll know to do nothing with | ||
1402 | * its fields. We are under sysctl_lock here. | ||
1403 | */ | ||
1404 | de->data = NULL; | ||
1405 | |||
1339 | /* Don't unregister proc entries that are still being used.. */ | 1406 | /* Don't unregister proc entries that are still being used.. */ |
1340 | if (atomic_read(&de->count)) | 1407 | if (atomic_read(&de->count)) |
1341 | continue; | 1408 | continue; |
@@ -1349,27 +1416,38 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, | |||
1349 | size_t count, loff_t *ppos) | 1416 | size_t count, loff_t *ppos) |
1350 | { | 1417 | { |
1351 | int op; | 1418 | int op; |
1352 | struct proc_dir_entry *de; | 1419 | struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); |
1353 | struct ctl_table *table; | 1420 | struct ctl_table *table; |
1354 | size_t res; | 1421 | size_t res; |
1355 | ssize_t error; | 1422 | ssize_t error = -ENOTDIR; |
1356 | |||
1357 | de = PDE(file->f_dentry->d_inode); | ||
1358 | if (!de || !de->data) | ||
1359 | return -ENOTDIR; | ||
1360 | table = (struct ctl_table *) de->data; | ||
1361 | if (!table || !table->proc_handler) | ||
1362 | return -ENOTDIR; | ||
1363 | op = (write ? 002 : 004); | ||
1364 | if (ctl_perm(table, op)) | ||
1365 | return -EPERM; | ||
1366 | 1423 | ||
1367 | res = count; | 1424 | spin_lock(&sysctl_lock); |
1368 | 1425 | if (de && de->data && use_table(de->set)) { | |
1369 | error = (*table->proc_handler) (table, write, file, buf, &res, ppos); | 1426 | /* |
1370 | if (error) | 1427 | * at that point we know that sysctl was not unregistered |
1371 | return error; | 1428 | * and won't be until we finish |
1372 | return res; | 1429 | */ |
1430 | spin_unlock(&sysctl_lock); | ||
1431 | table = (struct ctl_table *) de->data; | ||
1432 | if (!table || !table->proc_handler) | ||
1433 | goto out; | ||
1434 | error = -EPERM; | ||
1435 | op = (write ? 002 : 004); | ||
1436 | if (ctl_perm(table, op)) | ||
1437 | goto out; | ||
1438 | |||
1439 | /* careful: calling conventions are nasty here */ | ||
1440 | res = count; | ||
1441 | error = (*table->proc_handler)(table, write, file, | ||
1442 | buf, &res, ppos); | ||
1443 | if (!error) | ||
1444 | error = res; | ||
1445 | out: | ||
1446 | spin_lock(&sysctl_lock); | ||
1447 | unuse_table(de->set); | ||
1448 | } | ||
1449 | spin_unlock(&sysctl_lock); | ||
1450 | return error; | ||
1373 | } | 1451 | } |
1374 | 1452 | ||
1375 | static int proc_opensys(struct inode *inode, struct file *file) | 1453 | static int proc_opensys(struct inode *inode, struct file *file) |