diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 25 | ||||
-rw-r--r-- | kernel/extable.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 12 | ||||
-rw-r--r-- | kernel/ptrace.c | 57 | ||||
-rw-r--r-- | kernel/rcupdate.c | 19 | ||||
-rw-r--r-- | kernel/sched.c | 62 | ||||
-rw-r--r-- | kernel/timer.c | 16 |
7 files changed, 115 insertions, 78 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 72248d1b9e3f..ab81fdd4572b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2231 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | 2231 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are |
2232 | * short of memory, might require taking the callback_mutex mutex. | 2232 | * short of memory, might require taking the callback_mutex mutex. |
2233 | * | 2233 | * |
2234 | * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() | 2234 | * The first call here from mm/page_alloc:get_page_from_freelist() |
2235 | * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing | 2235 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so |
2236 | * hardwall cpusets - no allocation on a node outside the cpuset is | 2236 | * no allocation on a node outside the cpuset is allowed (unless in |
2237 | * allowed (unless in interrupt, of course). | 2237 | * interrupt, of course). |
2238 | * | 2238 | * |
2239 | * The second loop doesn't even call here for GFP_ATOMIC requests | 2239 | * The second pass through get_page_from_freelist() doesn't even call |
2240 | * (if the __alloc_pages() local variable 'wait' is set). That check | 2240 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() |
2241 | * and the checks below have the combined affect in the second loop of | 2241 | * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set |
2242 | * the __alloc_pages() routine that: | 2242 | * in alloc_flags. That logic and the checks below have the combined |
2243 | * affect that: | ||
2243 | * in_interrupt - any node ok (current task context irrelevant) | 2244 | * in_interrupt - any node ok (current task context irrelevant) |
2244 | * GFP_ATOMIC - any node ok | 2245 | * GFP_ATOMIC - any node ok |
2245 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | 2246 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok |
2246 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2247 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2248 | * | ||
2249 | * Rule: | ||
2250 | * Don't call cpuset_zone_allowed() if you can't sleep, unless you | ||
2251 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables | ||
2252 | * the code that might scan up ancestor cpusets and sleep. | ||
2247 | **/ | 2253 | **/ |
2248 | 2254 | ||
2249 | int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | 2255 | int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) |
@@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | |||
2255 | if (in_interrupt()) | 2261 | if (in_interrupt()) |
2256 | return 1; | 2262 | return 1; |
2257 | node = z->zone_pgdat->node_id; | 2263 | node = z->zone_pgdat->node_id; |
2264 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); | ||
2258 | if (node_isset(node, current->mems_allowed)) | 2265 | if (node_isset(node, current->mems_allowed)) |
2259 | return 1; | 2266 | return 1; |
2260 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | 2267 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ |
diff --git a/kernel/extable.c b/kernel/extable.c index 7501b531ceed..7fe262855317 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
@@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) | |||
40 | return e; | 40 | return e; |
41 | } | 41 | } |
42 | 42 | ||
43 | static int core_kernel_text(unsigned long addr) | 43 | int core_kernel_text(unsigned long addr) |
44 | { | 44 | { |
45 | if (addr >= (unsigned long)_stext && | 45 | if (addr >= (unsigned long)_stext && |
46 | addr <= (unsigned long)_etext) | 46 | addr <= (unsigned long)_etext) |
diff --git a/kernel/module.c b/kernel/module.c index d24deb0dbbc9..bbe04862e1b0 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put); | |||
705 | 705 | ||
706 | void symbol_put_addr(void *addr) | 706 | void symbol_put_addr(void *addr) |
707 | { | 707 | { |
708 | unsigned long flags; | 708 | struct module *modaddr; |
709 | 709 | ||
710 | spin_lock_irqsave(&modlist_lock, flags); | 710 | if (core_kernel_text((unsigned long)addr)) |
711 | if (!kernel_text_address((unsigned long)addr)) | 711 | return; |
712 | BUG(); | ||
713 | 712 | ||
714 | module_put(module_text_address((unsigned long)addr)); | 713 | if (!(modaddr = module_text_address((unsigned long)addr))) |
715 | spin_unlock_irqrestore(&modlist_lock, flags); | 714 | BUG(); |
715 | module_put(modaddr); | ||
716 | } | 716 | } |
717 | EXPORT_SYMBOL_GPL(symbol_put_addr); | 717 | EXPORT_SYMBOL_GPL(symbol_put_addr); |
718 | 718 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4e0f0ec003f7..921c22ad16e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -148,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task) | |||
148 | int ptrace_attach(struct task_struct *task) | 148 | int ptrace_attach(struct task_struct *task) |
149 | { | 149 | { |
150 | int retval; | 150 | int retval; |
151 | task_lock(task); | 151 | |
152 | retval = -EPERM; | 152 | retval = -EPERM; |
153 | if (task->pid <= 1) | 153 | if (task->pid <= 1) |
154 | goto bad; | 154 | goto out; |
155 | if (task->tgid == current->tgid) | 155 | if (task->tgid == current->tgid) |
156 | goto bad; | 156 | goto out; |
157 | |||
158 | repeat: | ||
159 | /* | ||
160 | * Nasty, nasty. | ||
161 | * | ||
162 | * We want to hold both the task-lock and the | ||
163 | * tasklist_lock for writing at the same time. | ||
164 | * But that's against the rules (tasklist_lock | ||
165 | * is taken for reading by interrupts on other | ||
166 | * cpu's that may have task_lock). | ||
167 | */ | ||
168 | task_lock(task); | ||
169 | local_irq_disable(); | ||
170 | if (!write_trylock(&tasklist_lock)) { | ||
171 | local_irq_enable(); | ||
172 | task_unlock(task); | ||
173 | do { | ||
174 | cpu_relax(); | ||
175 | } while (!write_can_lock(&tasklist_lock)); | ||
176 | goto repeat; | ||
177 | } | ||
178 | |||
157 | /* the same process cannot be attached many times */ | 179 | /* the same process cannot be attached many times */ |
158 | if (task->ptrace & PT_PTRACED) | 180 | if (task->ptrace & PT_PTRACED) |
159 | goto bad; | 181 | goto bad; |
@@ -166,17 +188,15 @@ int ptrace_attach(struct task_struct *task) | |||
166 | ? PT_ATTACHED : 0); | 188 | ? PT_ATTACHED : 0); |
167 | if (capable(CAP_SYS_PTRACE)) | 189 | if (capable(CAP_SYS_PTRACE)) |
168 | task->ptrace |= PT_PTRACE_CAP; | 190 | task->ptrace |= PT_PTRACE_CAP; |
169 | task_unlock(task); | ||
170 | 191 | ||
171 | write_lock_irq(&tasklist_lock); | ||
172 | __ptrace_link(task, current); | 192 | __ptrace_link(task, current); |
173 | write_unlock_irq(&tasklist_lock); | ||
174 | 193 | ||
175 | force_sig_specific(SIGSTOP, task); | 194 | force_sig_specific(SIGSTOP, task); |
176 | return 0; | ||
177 | 195 | ||
178 | bad: | 196 | bad: |
197 | write_unlock_irq(&tasklist_lock); | ||
179 | task_unlock(task); | 198 | task_unlock(task); |
199 | out: | ||
180 | return retval; | 200 | return retval; |
181 | } | 201 | } |
182 | 202 | ||
@@ -417,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request, | |||
417 | */ | 437 | */ |
418 | int ptrace_traceme(void) | 438 | int ptrace_traceme(void) |
419 | { | 439 | { |
420 | int ret; | 440 | int ret = -EPERM; |
421 | 441 | ||
422 | /* | 442 | /* |
423 | * Are we already being traced? | 443 | * Are we already being traced? |
424 | */ | 444 | */ |
425 | if (current->ptrace & PT_PTRACED) | 445 | task_lock(current); |
426 | return -EPERM; | 446 | if (!(current->ptrace & PT_PTRACED)) { |
427 | ret = security_ptrace(current->parent, current); | 447 | ret = security_ptrace(current->parent, current); |
428 | if (ret) | 448 | /* |
429 | return -EPERM; | 449 | * Set the ptrace bit in the process ptrace flags. |
430 | /* | 450 | */ |
431 | * Set the ptrace bit in the process ptrace flags. | 451 | if (!ret) |
432 | */ | 452 | current->ptrace |= PT_PTRACED; |
433 | current->ptrace |= PT_PTRACED; | 453 | } |
434 | return 0; | 454 | task_unlock(current); |
455 | return ret; | ||
435 | } | 456 | } |
436 | 457 | ||
437 | /** | 458 | /** |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 6d32ff26f948..2058f88c7bbb 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) | |||
479 | return 0; | 479 | return 0; |
480 | } | 480 | } |
481 | 481 | ||
482 | /* | ||
483 | * Check to see if there is any immediate RCU-related work to be done | ||
484 | * by the current CPU, returning 1 if so. This function is part of the | ||
485 | * RCU implementation; it is -not- an exported member of the RCU API. | ||
486 | */ | ||
482 | int rcu_pending(int cpu) | 487 | int rcu_pending(int cpu) |
483 | { | 488 | { |
484 | return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || | 489 | return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || |
485 | __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); | 490 | __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); |
486 | } | 491 | } |
487 | 492 | ||
493 | /* | ||
494 | * Check to see if any future RCU-related work will need to be done | ||
495 | * by the current CPU, even if none need be done immediately, returning | ||
496 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
497 | * an exported member of the RCU API. | ||
498 | */ | ||
499 | int rcu_needs_cpu(int cpu) | ||
500 | { | ||
501 | struct rcu_data *rdp = &per_cpu(rcu_data, cpu); | ||
502 | struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); | ||
503 | |||
504 | return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); | ||
505 | } | ||
506 | |||
488 | void rcu_check_callbacks(int cpu, int user) | 507 | void rcu_check_callbacks(int cpu, int user) |
489 | { | 508 | { |
490 | if (user || | 509 | if (user || |
diff --git a/kernel/sched.c b/kernel/sched.c index 4c64f85698ae..c13f1bd2df7d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -665,55 +665,13 @@ static int effective_prio(task_t *p) | |||
665 | } | 665 | } |
666 | 666 | ||
667 | /* | 667 | /* |
668 | * We place interactive tasks back into the active array, if possible. | ||
669 | * | ||
670 | * To guarantee that this does not starve expired tasks we ignore the | ||
671 | * interactivity of a task if the first expired task had to wait more | ||
672 | * than a 'reasonable' amount of time. This deadline timeout is | ||
673 | * load-dependent, as the frequency of array switched decreases with | ||
674 | * increasing number of running tasks. We also ignore the interactivity | ||
675 | * if a better static_prio task has expired, and switch periodically | ||
676 | * regardless, to ensure that highly interactive tasks do not starve | ||
677 | * the less fortunate for unreasonably long periods. | ||
678 | */ | ||
679 | static inline int expired_starving(runqueue_t *rq) | ||
680 | { | ||
681 | int limit; | ||
682 | |||
683 | /* | ||
684 | * Arrays were recently switched, all is well | ||
685 | */ | ||
686 | if (!rq->expired_timestamp) | ||
687 | return 0; | ||
688 | |||
689 | limit = STARVATION_LIMIT * rq->nr_running; | ||
690 | |||
691 | /* | ||
692 | * It's time to switch arrays | ||
693 | */ | ||
694 | if (jiffies - rq->expired_timestamp >= limit) | ||
695 | return 1; | ||
696 | |||
697 | /* | ||
698 | * There's a better selection in the expired array | ||
699 | */ | ||
700 | if (rq->curr->static_prio > rq->best_expired_prio) | ||
701 | return 1; | ||
702 | |||
703 | /* | ||
704 | * All is well | ||
705 | */ | ||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | /* | ||
710 | * __activate_task - move a task to the runqueue. | 668 | * __activate_task - move a task to the runqueue. |
711 | */ | 669 | */ |
712 | static void __activate_task(task_t *p, runqueue_t *rq) | 670 | static void __activate_task(task_t *p, runqueue_t *rq) |
713 | { | 671 | { |
714 | prio_array_t *target = rq->active; | 672 | prio_array_t *target = rq->active; |
715 | 673 | ||
716 | if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p)))) | 674 | if (batch_task(p)) |
717 | target = rq->expired; | 675 | target = rq->expired; |
718 | enqueue_task(p, target); | 676 | enqueue_task(p, target); |
719 | rq->nr_running++; | 677 | rq->nr_running++; |
@@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk) | |||
2532 | } | 2490 | } |
2533 | 2491 | ||
2534 | /* | 2492 | /* |
2493 | * We place interactive tasks back into the active array, if possible. | ||
2494 | * | ||
2495 | * To guarantee that this does not starve expired tasks we ignore the | ||
2496 | * interactivity of a task if the first expired task had to wait more | ||
2497 | * than a 'reasonable' amount of time. This deadline timeout is | ||
2498 | * load-dependent, as the frequency of array switched decreases with | ||
2499 | * increasing number of running tasks. We also ignore the interactivity | ||
2500 | * if a better static_prio task has expired: | ||
2501 | */ | ||
2502 | #define EXPIRED_STARVING(rq) \ | ||
2503 | ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ | ||
2504 | (jiffies - (rq)->expired_timestamp >= \ | ||
2505 | STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ | ||
2506 | ((rq)->curr->static_prio > (rq)->best_expired_prio)) | ||
2507 | |||
2508 | /* | ||
2535 | * Account user cpu time to a process. | 2509 | * Account user cpu time to a process. |
2536 | * @p: the process that the cpu time gets accounted to | 2510 | * @p: the process that the cpu time gets accounted to |
2537 | * @hardirq_offset: the offset to subtract from hardirq_count() | 2511 | * @hardirq_offset: the offset to subtract from hardirq_count() |
@@ -2666,7 +2640,7 @@ void scheduler_tick(void) | |||
2666 | 2640 | ||
2667 | if (!rq->expired_timestamp) | 2641 | if (!rq->expired_timestamp) |
2668 | rq->expired_timestamp = jiffies; | 2642 | rq->expired_timestamp = jiffies; |
2669 | if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { | 2643 | if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { |
2670 | enqueue_task(p, rq->expired); | 2644 | enqueue_task(p, rq->expired); |
2671 | if (p->static_prio < rq->best_expired_prio) | 2645 | if (p->static_prio < rq->best_expired_prio) |
2672 | rq->best_expired_prio = p->static_prio; | 2646 | rq->best_expired_prio = p->static_prio; |
diff --git a/kernel/timer.c b/kernel/timer.c index 67eaf0f54096..9e49deed468c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -541,6 +541,22 @@ found: | |||
541 | } | 541 | } |
542 | spin_unlock(&base->lock); | 542 | spin_unlock(&base->lock); |
543 | 543 | ||
544 | /* | ||
545 | * It can happen that other CPUs service timer IRQs and increment | ||
546 | * jiffies, but we have not yet got a local timer tick to process | ||
547 | * the timer wheels. In that case, the expiry time can be before | ||
548 | * jiffies, but since the high-resolution timer here is relative to | ||
549 | * jiffies, the default expression when high-resolution timers are | ||
550 | * not active, | ||
551 | * | ||
552 | * time_before(MAX_JIFFY_OFFSET + jiffies, expires) | ||
553 | * | ||
554 | * would falsely evaluate to true. If that is the case, just | ||
555 | * return jiffies so that we can immediately fire the local timer | ||
556 | */ | ||
557 | if (time_before(expires, jiffies)) | ||
558 | return jiffies; | ||
559 | |||
544 | if (time_before(hr_expires, expires)) | 560 | if (time_before(hr_expires, expires)) |
545 | return hr_expires; | 561 | return hr_expires; |
546 | 562 | ||