aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c25
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/ptrace.c57
-rw-r--r--kernel/rcupdate.c19
-rw-r--r--kernel/sched.c62
-rw-r--r--kernel/timer.c16
7 files changed, 115 insertions, 78 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 72248d1b9e3f..ab81fdd4572b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2231 * So only GFP_KERNEL allocations, if all nodes in the cpuset are 2231 * So only GFP_KERNEL allocations, if all nodes in the cpuset are
2232 * short of memory, might require taking the callback_mutex mutex. 2232 * short of memory, might require taking the callback_mutex mutex.
2233 * 2233 *
2234 * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() 2234 * The first call here from mm/page_alloc:get_page_from_freelist()
2235 * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing 2235 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
2236 * hardwall cpusets - no allocation on a node outside the cpuset is 2236 * no allocation on a node outside the cpuset is allowed (unless in
2237 * allowed (unless in interrupt, of course). 2237 * interrupt, of course).
2238 * 2238 *
2239 * The second loop doesn't even call here for GFP_ATOMIC requests 2239 * The second pass through get_page_from_freelist() doesn't even call
2240 * (if the __alloc_pages() local variable 'wait' is set). That check 2240 * here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
2241 * and the checks below have the combined affect in the second loop of 2241 * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
2242 * the __alloc_pages() routine that: 2242 * in alloc_flags. That logic and the checks below have the combined
2243 * affect that:
2243 * in_interrupt - any node ok (current task context irrelevant) 2244 * in_interrupt - any node ok (current task context irrelevant)
2244 * GFP_ATOMIC - any node ok 2245 * GFP_ATOMIC - any node ok
2245 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok 2246 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
2246 * GFP_USER - only nodes in current tasks mems allowed ok. 2247 * GFP_USER - only nodes in current tasks mems allowed ok.
2248 *
2249 * Rule:
2250 * Don't call cpuset_zone_allowed() if you can't sleep, unless you
2251 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
2252 * the code that might scan up ancestor cpusets and sleep.
2247 **/ 2253 **/
2248 2254
2249int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 2255int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
@@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2255 if (in_interrupt()) 2261 if (in_interrupt())
2256 return 1; 2262 return 1;
2257 node = z->zone_pgdat->node_id; 2263 node = z->zone_pgdat->node_id;
2264 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2258 if (node_isset(node, current->mems_allowed)) 2265 if (node_isset(node, current->mems_allowed))
2259 return 1; 2266 return 1;
2260 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ 2267 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
diff --git a/kernel/extable.c b/kernel/extable.c
index 7501b531ceed..7fe262855317 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
40 return e; 40 return e;
41} 41}
42 42
43static int core_kernel_text(unsigned long addr) 43int core_kernel_text(unsigned long addr)
44{ 44{
45 if (addr >= (unsigned long)_stext && 45 if (addr >= (unsigned long)_stext &&
46 addr <= (unsigned long)_etext) 46 addr <= (unsigned long)_etext)
diff --git a/kernel/module.c b/kernel/module.c
index d24deb0dbbc9..bbe04862e1b0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put);
705 705
706void symbol_put_addr(void *addr) 706void symbol_put_addr(void *addr)
707{ 707{
708 unsigned long flags; 708 struct module *modaddr;
709 709
710 spin_lock_irqsave(&modlist_lock, flags); 710 if (core_kernel_text((unsigned long)addr))
711 if (!kernel_text_address((unsigned long)addr)) 711 return;
712 BUG();
713 712
714 module_put(module_text_address((unsigned long)addr)); 713 if (!(modaddr = module_text_address((unsigned long)addr)))
715 spin_unlock_irqrestore(&modlist_lock, flags); 714 BUG();
715 module_put(modaddr);
716} 716}
717EXPORT_SYMBOL_GPL(symbol_put_addr); 717EXPORT_SYMBOL_GPL(symbol_put_addr);
718 718
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4e0f0ec003f7..921c22ad16e4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -148,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task)
148int ptrace_attach(struct task_struct *task) 148int ptrace_attach(struct task_struct *task)
149{ 149{
150 int retval; 150 int retval;
151 task_lock(task); 151
152 retval = -EPERM; 152 retval = -EPERM;
153 if (task->pid <= 1) 153 if (task->pid <= 1)
154 goto bad; 154 goto out;
155 if (task->tgid == current->tgid) 155 if (task->tgid == current->tgid)
156 goto bad; 156 goto out;
157
158repeat:
159 /*
160 * Nasty, nasty.
161 *
162 * We want to hold both the task-lock and the
163 * tasklist_lock for writing at the same time.
164 * But that's against the rules (tasklist_lock
165 * is taken for reading by interrupts on other
166 * cpu's that may have task_lock).
167 */
168 task_lock(task);
169 local_irq_disable();
170 if (!write_trylock(&tasklist_lock)) {
171 local_irq_enable();
172 task_unlock(task);
173 do {
174 cpu_relax();
175 } while (!write_can_lock(&tasklist_lock));
176 goto repeat;
177 }
178
157 /* the same process cannot be attached many times */ 179 /* the same process cannot be attached many times */
158 if (task->ptrace & PT_PTRACED) 180 if (task->ptrace & PT_PTRACED)
159 goto bad; 181 goto bad;
@@ -166,17 +188,15 @@ int ptrace_attach(struct task_struct *task)
166 ? PT_ATTACHED : 0); 188 ? PT_ATTACHED : 0);
167 if (capable(CAP_SYS_PTRACE)) 189 if (capable(CAP_SYS_PTRACE))
168 task->ptrace |= PT_PTRACE_CAP; 190 task->ptrace |= PT_PTRACE_CAP;
169 task_unlock(task);
170 191
171 write_lock_irq(&tasklist_lock);
172 __ptrace_link(task, current); 192 __ptrace_link(task, current);
173 write_unlock_irq(&tasklist_lock);
174 193
175 force_sig_specific(SIGSTOP, task); 194 force_sig_specific(SIGSTOP, task);
176 return 0;
177 195
178bad: 196bad:
197 write_unlock_irq(&tasklist_lock);
179 task_unlock(task); 198 task_unlock(task);
199out:
180 return retval; 200 return retval;
181} 201}
182 202
@@ -417,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request,
417 */ 437 */
418int ptrace_traceme(void) 438int ptrace_traceme(void)
419{ 439{
420 int ret; 440 int ret = -EPERM;
421 441
422 /* 442 /*
423 * Are we already being traced? 443 * Are we already being traced?
424 */ 444 */
425 if (current->ptrace & PT_PTRACED) 445 task_lock(current);
426 return -EPERM; 446 if (!(current->ptrace & PT_PTRACED)) {
427 ret = security_ptrace(current->parent, current); 447 ret = security_ptrace(current->parent, current);
428 if (ret) 448 /*
429 return -EPERM; 449 * Set the ptrace bit in the process ptrace flags.
430 /* 450 */
431 * Set the ptrace bit in the process ptrace flags. 451 if (!ret)
432 */ 452 current->ptrace |= PT_PTRACED;
433 current->ptrace |= PT_PTRACED; 453 }
434 return 0; 454 task_unlock(current);
455 return ret;
435} 456}
436 457
437/** 458/**
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 6d32ff26f948..2058f88c7bbb 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
479 return 0; 479 return 0;
480} 480}
481 481
482/*
483 * Check to see if there is any immediate RCU-related work to be done
484 * by the current CPU, returning 1 if so. This function is part of the
485 * RCU implementation; it is -not- an exported member of the RCU API.
486 */
482int rcu_pending(int cpu) 487int rcu_pending(int cpu)
483{ 488{
484 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || 489 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
485 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); 490 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
486} 491}
487 492
493/*
494 * Check to see if any future RCU-related work will need to be done
495 * by the current CPU, even if none need be done immediately, returning
496 * 1 if so. This function is part of the RCU implementation; it is -not-
497 * an exported member of the RCU API.
498 */
499int rcu_needs_cpu(int cpu)
500{
501 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
502 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
503
504 return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
505}
506
488void rcu_check_callbacks(int cpu, int user) 507void rcu_check_callbacks(int cpu, int user)
489{ 508{
490 if (user || 509 if (user ||
diff --git a/kernel/sched.c b/kernel/sched.c
index 4c64f85698ae..c13f1bd2df7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -665,55 +665,13 @@ static int effective_prio(task_t *p)
665} 665}
666 666
667/* 667/*
668 * We place interactive tasks back into the active array, if possible.
669 *
670 * To guarantee that this does not starve expired tasks we ignore the
671 * interactivity of a task if the first expired task had to wait more
672 * than a 'reasonable' amount of time. This deadline timeout is
673 * load-dependent, as the frequency of array switched decreases with
674 * increasing number of running tasks. We also ignore the interactivity
675 * if a better static_prio task has expired, and switch periodically
676 * regardless, to ensure that highly interactive tasks do not starve
677 * the less fortunate for unreasonably long periods.
678 */
679static inline int expired_starving(runqueue_t *rq)
680{
681 int limit;
682
683 /*
684 * Arrays were recently switched, all is well
685 */
686 if (!rq->expired_timestamp)
687 return 0;
688
689 limit = STARVATION_LIMIT * rq->nr_running;
690
691 /*
692 * It's time to switch arrays
693 */
694 if (jiffies - rq->expired_timestamp >= limit)
695 return 1;
696
697 /*
698 * There's a better selection in the expired array
699 */
700 if (rq->curr->static_prio > rq->best_expired_prio)
701 return 1;
702
703 /*
704 * All is well
705 */
706 return 0;
707}
708
709/*
710 * __activate_task - move a task to the runqueue. 668 * __activate_task - move a task to the runqueue.
711 */ 669 */
712static void __activate_task(task_t *p, runqueue_t *rq) 670static void __activate_task(task_t *p, runqueue_t *rq)
713{ 671{
714 prio_array_t *target = rq->active; 672 prio_array_t *target = rq->active;
715 673
716 if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p)))) 674 if (batch_task(p))
717 target = rq->expired; 675 target = rq->expired;
718 enqueue_task(p, target); 676 enqueue_task(p, target);
719 rq->nr_running++; 677 rq->nr_running++;
@@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk)
2532} 2490}
2533 2491
2534/* 2492/*
2493 * We place interactive tasks back into the active array, if possible.
2494 *
2495 * To guarantee that this does not starve expired tasks we ignore the
2496 * interactivity of a task if the first expired task had to wait more
2497 * than a 'reasonable' amount of time. This deadline timeout is
2498 * load-dependent, as the frequency of array switched decreases with
2499 * increasing number of running tasks. We also ignore the interactivity
2500 * if a better static_prio task has expired:
2501 */
2502#define EXPIRED_STARVING(rq) \
2503 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
2504 (jiffies - (rq)->expired_timestamp >= \
2505 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
2506 ((rq)->curr->static_prio > (rq)->best_expired_prio))
2507
2508/*
2535 * Account user cpu time to a process. 2509 * Account user cpu time to a process.
2536 * @p: the process that the cpu time gets accounted to 2510 * @p: the process that the cpu time gets accounted to
2537 * @hardirq_offset: the offset to subtract from hardirq_count() 2511 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2666,7 +2640,7 @@ void scheduler_tick(void)
2666 2640
2667 if (!rq->expired_timestamp) 2641 if (!rq->expired_timestamp)
2668 rq->expired_timestamp = jiffies; 2642 rq->expired_timestamp = jiffies;
2669 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { 2643 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
2670 enqueue_task(p, rq->expired); 2644 enqueue_task(p, rq->expired);
2671 if (p->static_prio < rq->best_expired_prio) 2645 if (p->static_prio < rq->best_expired_prio)
2672 rq->best_expired_prio = p->static_prio; 2646 rq->best_expired_prio = p->static_prio;
diff --git a/kernel/timer.c b/kernel/timer.c
index 67eaf0f54096..9e49deed468c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -541,6 +541,22 @@ found:
541 } 541 }
542 spin_unlock(&base->lock); 542 spin_unlock(&base->lock);
543 543
544 /*
545 * It can happen that other CPUs service timer IRQs and increment
546 * jiffies, but we have not yet got a local timer tick to process
547 * the timer wheels. In that case, the expiry time can be before
548 * jiffies, but since the high-resolution timer here is relative to
549 * jiffies, the default expression when high-resolution timers are
550 * not active,
551 *
552 * time_before(MAX_JIFFY_OFFSET + jiffies, expires)
553 *
554 * would falsely evaluate to true. If that is the case, just
555 * return jiffies so that we can immediately fire the local timer
556 */
557 if (time_before(expires, jiffies))
558 return jiffies;
559
544 if (time_before(hr_expires, expires)) 560 if (time_before(hr_expires, expires))
545 return hr_expires; 561 return hr_expires;
546 562