aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c35
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/posix-timers.c8
-rw-r--r--kernel/sched.c64
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/workqueue.c2
7 files changed, 86 insertions, 43 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2a75e44e1a41..fe2f71f92ae0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1554,7 +1554,7 @@ struct ctr_struct {
1554 * when reading out p->cpuset, as we don't really care if it changes 1554 * when reading out p->cpuset, as we don't really care if it changes
1555 * on the next cycle, and we are not going to try to dereference it. 1555 * on the next cycle, and we are not going to try to dereference it.
1556 */ 1556 */
1557static inline int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs) 1557static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
1558{ 1558{
1559 int n = 0; 1559 int n = 0;
1560 struct task_struct *g, *p; 1560 struct task_struct *g, *p;
@@ -2150,6 +2150,33 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2150} 2150}
2151 2151
2152/** 2152/**
2153 * cpuset_lock - lock out any changes to cpuset structures
2154 *
2155 * The out of memory (oom) code needs to lock down cpusets
2156 * from being changed while it scans the tasklist looking for a
2157 * task in an overlapping cpuset. Expose callback_sem via this
2158 * cpuset_lock() routine, so the oom code can lock it, before
2159 * locking the task list. The tasklist_lock is a spinlock, so
2160 * must be taken inside callback_sem.
2161 */
2162
2163void cpuset_lock(void)
2164{
2165 down(&callback_sem);
2166}
2167
2168/**
2169 * cpuset_unlock - release lock on cpuset changes
2170 *
2171 * Undo the lock taken in a previous cpuset_lock() call.
2172 */
2173
2174void cpuset_unlock(void)
2175{
2176 up(&callback_sem);
2177}
2178
2179/**
2153 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? 2180 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
2154 * @p: pointer to task_struct of some other task. 2181 * @p: pointer to task_struct of some other task.
2155 * 2182 *
@@ -2158,7 +2185,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2158 * determine if task @p's memory usage might impact the memory 2185 * determine if task @p's memory usage might impact the memory
2159 * available to the current task. 2186 * available to the current task.
2160 * 2187 *
2161 * Acquires callback_sem - not suitable for calling from a fast path. 2188 * Call while holding callback_sem.
2162 **/ 2189 **/
2163 2190
2164int cpuset_excl_nodes_overlap(const struct task_struct *p) 2191int cpuset_excl_nodes_overlap(const struct task_struct *p)
@@ -2166,8 +2193,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
2166 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 2193 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
2167 int overlap = 0; /* do cpusets overlap? */ 2194 int overlap = 0; /* do cpusets overlap? */
2168 2195
2169 down(&callback_sem);
2170
2171 task_lock(current); 2196 task_lock(current);
2172 if (current->flags & PF_EXITING) { 2197 if (current->flags & PF_EXITING) {
2173 task_unlock(current); 2198 task_unlock(current);
@@ -2186,8 +2211,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
2186 2211
2187 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); 2212 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
2188done: 2213done:
2189 up(&callback_sem);
2190
2191 return overlap; 2214 return overlap;
2192} 2215}
2193 2216
diff --git a/kernel/exit.c b/kernel/exit.c
index f8e609ff1893..93cee3671332 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -193,7 +193,7 @@ int is_orphaned_pgrp(int pgrp)
193 return retval; 193 return retval;
194} 194}
195 195
196static inline int has_stopped_jobs(int pgrp) 196static int has_stopped_jobs(int pgrp)
197{ 197{
198 int retval = 0; 198 int retval = 0;
199 struct task_struct *p; 199 struct task_struct *p;
@@ -230,7 +230,7 @@ static inline int has_stopped_jobs(int pgrp)
230 * 230 *
231 * NOTE that reparent_to_init() gives the caller full capabilities. 231 * NOTE that reparent_to_init() gives the caller full capabilities.
232 */ 232 */
233static inline void reparent_to_init(void) 233static void reparent_to_init(void)
234{ 234{
235 write_lock_irq(&tasklist_lock); 235 write_lock_irq(&tasklist_lock);
236 236
@@ -244,7 +244,9 @@ static inline void reparent_to_init(void)
244 /* Set the exit signal to SIGCHLD so we signal init on exit */ 244 /* Set the exit signal to SIGCHLD so we signal init on exit */
245 current->exit_signal = SIGCHLD; 245 current->exit_signal = SIGCHLD;
246 246
247 if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0)) 247 if ((current->policy == SCHED_NORMAL ||
248 current->policy == SCHED_BATCH)
249 && (task_nice(current) < 0))
248 set_user_nice(current, 0); 250 set_user_nice(current, 0);
249 /* cpus_allowed? */ 251 /* cpus_allowed? */
250 /* rt_priority? */ 252 /* rt_priority? */
@@ -367,7 +369,7 @@ void daemonize(const char *name, ...)
367 369
368EXPORT_SYMBOL(daemonize); 370EXPORT_SYMBOL(daemonize);
369 371
370static inline void close_files(struct files_struct * files) 372static void close_files(struct files_struct * files)
371{ 373{
372 int i, j; 374 int i, j;
373 struct fdtable *fdt; 375 struct fdtable *fdt;
@@ -541,7 +543,7 @@ static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_re
541 p->real_parent = reaper; 543 p->real_parent = reaper;
542} 544}
543 545
544static inline void reparent_thread(task_t *p, task_t *father, int traced) 546static void reparent_thread(task_t *p, task_t *father, int traced)
545{ 547{
546 /* We don't want people slaying init. */ 548 /* We don't want people slaying init. */
547 if (p->exit_signal != -1) 549 if (p->exit_signal != -1)
@@ -605,7 +607,7 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced)
605 * group, and if no such member exists, give it to 607 * group, and if no such member exists, give it to
606 * the global child reaper process (ie "init") 608 * the global child reaper process (ie "init")
607 */ 609 */
608static inline void forget_original_parent(struct task_struct * father, 610static void forget_original_parent(struct task_struct * father,
609 struct list_head *to_release) 611 struct list_head *to_release)
610{ 612{
611 struct task_struct *p, *reaper = father; 613 struct task_struct *p, *reaper = father;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 9e66e614862a..197208b3aa2a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -192,7 +192,7 @@ static inline int common_clock_set(const clockid_t which_clock,
192 return do_sys_settimeofday(tp, NULL); 192 return do_sys_settimeofday(tp, NULL);
193} 193}
194 194
195static inline int common_timer_create(struct k_itimer *new_timer) 195static int common_timer_create(struct k_itimer *new_timer)
196{ 196{
197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); 197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock);
198 new_timer->it.real.timer.data = new_timer; 198 new_timer->it.real.timer.data = new_timer;
@@ -361,7 +361,7 @@ static int posix_timer_fn(void *data)
361 return ret; 361 return ret;
362} 362}
363 363
364static inline struct task_struct * good_sigevent(sigevent_t * event) 364static struct task_struct * good_sigevent(sigevent_t * event)
365{ 365{
366 struct task_struct *rtn = current->group_leader; 366 struct task_struct *rtn = current->group_leader;
367 367
@@ -687,7 +687,7 @@ sys_timer_getoverrun(timer_t timer_id)
687 687
688/* Set a POSIX.1b interval timer. */ 688/* Set a POSIX.1b interval timer. */
689/* timr->it_lock is taken. */ 689/* timr->it_lock is taken. */
690static inline int 690static int
691common_timer_set(struct k_itimer *timr, int flags, 691common_timer_set(struct k_itimer *timr, int flags,
692 struct itimerspec *new_setting, struct itimerspec *old_setting) 692 struct itimerspec *new_setting, struct itimerspec *old_setting)
693{ 693{
@@ -829,7 +829,7 @@ retry_delete:
829/* 829/*
830 * return timer owned by the process, used by exit_itimers 830 * return timer owned by the process, used by exit_itimers
831 */ 831 */
832static inline void itimer_delete(struct k_itimer *timer) 832static void itimer_delete(struct k_itimer *timer)
833{ 833{
834 unsigned long flags; 834 unsigned long flags;
835 835
diff --git a/kernel/sched.c b/kernel/sched.c
index c9dec2aa1976..788ecce1e0e4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -521,7 +521,7 @@ static inline void sched_info_dequeued(task_t *t)
521 * long it was waiting to run. We also note when it began so that we 521 * long it was waiting to run. We also note when it began so that we
522 * can keep stats on how long its timeslice is. 522 * can keep stats on how long its timeslice is.
523 */ 523 */
524static inline void sched_info_arrive(task_t *t) 524static void sched_info_arrive(task_t *t)
525{ 525{
526 unsigned long now = jiffies, diff = 0; 526 unsigned long now = jiffies, diff = 0;
527 struct runqueue *rq = task_rq(t); 527 struct runqueue *rq = task_rq(t);
@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
748 unsigned long long __sleep_time = now - p->timestamp; 748 unsigned long long __sleep_time = now - p->timestamp;
749 unsigned long sleep_time; 749 unsigned long sleep_time;
750 750
751 if (__sleep_time > NS_MAX_SLEEP_AVG) 751 if (unlikely(p->policy == SCHED_BATCH))
752 sleep_time = NS_MAX_SLEEP_AVG; 752 sleep_time = 0;
753 else 753 else {
754 sleep_time = (unsigned long)__sleep_time; 754 if (__sleep_time > NS_MAX_SLEEP_AVG)
755 sleep_time = NS_MAX_SLEEP_AVG;
756 else
757 sleep_time = (unsigned long)__sleep_time;
758 }
755 759
756 if (likely(sleep_time > 0)) { 760 if (likely(sleep_time > 0)) {
757 /* 761 /*
@@ -1003,7 +1007,7 @@ void kick_process(task_t *p)
1003 * We want to under-estimate the load of migration sources, to 1007 * We want to under-estimate the load of migration sources, to
1004 * balance conservatively. 1008 * balance conservatively.
1005 */ 1009 */
1006static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) 1010static unsigned long __source_load(int cpu, int type, enum idle_type idle)
1007{ 1011{
1008 runqueue_t *rq = cpu_rq(cpu); 1012 runqueue_t *rq = cpu_rq(cpu);
1009 unsigned long running = rq->nr_running; 1013 unsigned long running = rq->nr_running;
@@ -1866,7 +1870,7 @@ void sched_exec(void)
1866 * pull_task - move a task from a remote runqueue to the local runqueue. 1870 * pull_task - move a task from a remote runqueue to the local runqueue.
1867 * Both runqueues must be locked. 1871 * Both runqueues must be locked.
1868 */ 1872 */
1869static inline 1873static
1870void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, 1874void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1871 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1875 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1872{ 1876{
@@ -1888,7 +1892,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1888/* 1892/*
1889 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? 1893 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
1890 */ 1894 */
1891static inline 1895static
1892int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, 1896int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
1893 struct sched_domain *sd, enum idle_type idle, 1897 struct sched_domain *sd, enum idle_type idle,
1894 int *all_pinned) 1898 int *all_pinned)
@@ -2374,7 +2378,7 @@ out_balanced:
2374 * idle_balance is called by schedule() if this_cpu is about to become 2378 * idle_balance is called by schedule() if this_cpu is about to become
2375 * idle. Attempts to pull tasks from other CPUs. 2379 * idle. Attempts to pull tasks from other CPUs.
2376 */ 2380 */
2377static inline void idle_balance(int this_cpu, runqueue_t *this_rq) 2381static void idle_balance(int this_cpu, runqueue_t *this_rq)
2378{ 2382{
2379 struct sched_domain *sd; 2383 struct sched_domain *sd;
2380 2384
@@ -2758,7 +2762,7 @@ static inline void wakeup_busy_runqueue(runqueue_t *rq)
2758 resched_task(rq->idle); 2762 resched_task(rq->idle);
2759} 2763}
2760 2764
2761static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) 2765static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
2762{ 2766{
2763 struct sched_domain *tmp, *sd = NULL; 2767 struct sched_domain *tmp, *sd = NULL;
2764 cpumask_t sibling_map; 2768 cpumask_t sibling_map;
@@ -2812,7 +2816,7 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
2812 return p->time_slice * (100 - sd->per_cpu_gain) / 100; 2816 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
2813} 2817}
2814 2818
2815static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) 2819static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
2816{ 2820{
2817 struct sched_domain *tmp, *sd = NULL; 2821 struct sched_domain *tmp, *sd = NULL;
2818 cpumask_t sibling_map; 2822 cpumask_t sibling_map;
@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice)
3560 * The RT priorities are set via sched_setscheduler(), but we still 3564 * The RT priorities are set via sched_setscheduler(), but we still
3561 * allow the 'normal' nice value to be set - but as expected 3565 * allow the 'normal' nice value to be set - but as expected
3562 * it wont have any effect on scheduling until the task is 3566 * it wont have any effect on scheduling until the task is
3563 * not SCHED_NORMAL: 3567 * not SCHED_NORMAL/SCHED_BATCH:
3564 */ 3568 */
3565 if (rt_task(p)) { 3569 if (rt_task(p)) {
3566 p->static_prio = NICE_TO_PRIO(nice); 3570 p->static_prio = NICE_TO_PRIO(nice);
@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3706 BUG_ON(p->array); 3710 BUG_ON(p->array);
3707 p->policy = policy; 3711 p->policy = policy;
3708 p->rt_priority = prio; 3712 p->rt_priority = prio;
3709 if (policy != SCHED_NORMAL) 3713 if (policy != SCHED_NORMAL && policy != SCHED_BATCH) {
3710 p->prio = MAX_RT_PRIO-1 - p->rt_priority; 3714 p->prio = MAX_RT_PRIO-1 - p->rt_priority;
3711 else 3715 } else {
3712 p->prio = p->static_prio; 3716 p->prio = p->static_prio;
3717 /*
3718 * SCHED_BATCH tasks are treated as perpetual CPU hogs:
3719 */
3720 if (policy == SCHED_BATCH)
3721 p->sleep_avg = 0;
3722 }
3713} 3723}
3714 3724
3715/** 3725/**
@@ -3733,29 +3743,35 @@ recheck:
3733 if (policy < 0) 3743 if (policy < 0)
3734 policy = oldpolicy = p->policy; 3744 policy = oldpolicy = p->policy;
3735 else if (policy != SCHED_FIFO && policy != SCHED_RR && 3745 else if (policy != SCHED_FIFO && policy != SCHED_RR &&
3736 policy != SCHED_NORMAL) 3746 policy != SCHED_NORMAL && policy != SCHED_BATCH)
3737 return -EINVAL; 3747 return -EINVAL;
3738 /* 3748 /*
3739 * Valid priorities for SCHED_FIFO and SCHED_RR are 3749 * Valid priorities for SCHED_FIFO and SCHED_RR are
3740 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. 3750 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
3751 * SCHED_BATCH is 0.
3741 */ 3752 */
3742 if (param->sched_priority < 0 || 3753 if (param->sched_priority < 0 ||
3743 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || 3754 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
3744 (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) 3755 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
3745 return -EINVAL; 3756 return -EINVAL;
3746 if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) 3757 if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
3758 != (param->sched_priority == 0))
3747 return -EINVAL; 3759 return -EINVAL;
3748 3760
3749 /* 3761 /*
3750 * Allow unprivileged RT tasks to decrease priority: 3762 * Allow unprivileged RT tasks to decrease priority:
3751 */ 3763 */
3752 if (!capable(CAP_SYS_NICE)) { 3764 if (!capable(CAP_SYS_NICE)) {
3753 /* can't change policy */ 3765 /*
3754 if (policy != p->policy && 3766 * can't change policy, except between SCHED_NORMAL
3755 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 3767 * and SCHED_BATCH:
3768 */
3769 if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
3770 (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
3771 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
3756 return -EPERM; 3772 return -EPERM;
3757 /* can't increase priority */ 3773 /* can't increase priority */
3758 if (policy != SCHED_NORMAL && 3774 if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
3759 param->sched_priority > p->rt_priority && 3775 param->sched_priority > p->rt_priority &&
3760 param->sched_priority > 3776 param->sched_priority >
3761 p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 3777 p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
4233 ret = MAX_USER_RT_PRIO-1; 4249 ret = MAX_USER_RT_PRIO-1;
4234 break; 4250 break;
4235 case SCHED_NORMAL: 4251 case SCHED_NORMAL:
4252 case SCHED_BATCH:
4236 ret = 0; 4253 ret = 0;
4237 break; 4254 break;
4238 } 4255 }
@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4256 ret = 1; 4273 ret = 1;
4257 break; 4274 break;
4258 case SCHED_NORMAL: 4275 case SCHED_NORMAL:
4276 case SCHED_BATCH:
4259 ret = 0; 4277 ret = 0;
4260 } 4278 }
4261 return ret; 4279 return ret;
@@ -5990,7 +6008,7 @@ next_sg:
5990 * Detach sched domains from a group of cpus specified in cpu_map 6008 * Detach sched domains from a group of cpus specified in cpu_map
5991 * These cpus will now be attached to the NULL domain 6009 * These cpus will now be attached to the NULL domain
5992 */ 6010 */
5993static inline void detach_destroy_domains(const cpumask_t *cpu_map) 6011static void detach_destroy_domains(const cpumask_t *cpu_map)
5994{ 6012{
5995 int i; 6013 int i;
5996 6014
diff --git a/kernel/signal.c b/kernel/signal.c
index 1da2e74beb97..5dafbd36d62e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -476,7 +476,7 @@ unblock_all_signals(void)
476 spin_unlock_irqrestore(&current->sighand->siglock, flags); 476 spin_unlock_irqrestore(&current->sighand->siglock, flags);
477} 477}
478 478
479static inline int collect_signal(int sig, struct sigpending *list, siginfo_t *info) 479static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
480{ 480{
481 struct sigqueue *q, *first = NULL; 481 struct sigqueue *q, *first = NULL;
482 int still_pending = 0; 482 int still_pending = 0;
@@ -1881,7 +1881,7 @@ do_signal_stop(int signr)
1881 * We return zero if we still hold the siglock and should look 1881 * We return zero if we still hold the siglock and should look
1882 * for another signal without checking group_stop_count again. 1882 * for another signal without checking group_stop_count again.
1883 */ 1883 */
1884static inline int handle_group_stop(void) 1884static int handle_group_stop(void)
1885{ 1885{
1886 int stop_count; 1886 int stop_count;
1887 1887
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62d4d9566876..f5d69b6e29f5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -648,7 +648,7 @@ static ctl_table kern_table[] = {
648 .mode = 0644, 648 .mode = 0644,
649 .proc_handler = &proc_dointvec, 649 .proc_handler = &proc_dointvec,
650 }, 650 },
651#if defined(CONFIG_S390) 651#if defined(CONFIG_S390) && defined(CONFIG_SMP)
652 { 652 {
653 .ctl_name = KERN_SPIN_RETRY, 653 .ctl_name = KERN_SPIN_RETRY,
654 .procname = "spin_retry", 654 .procname = "spin_retry",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 82c4fa70595c..b052e2c4c710 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -147,7 +147,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
147 return ret; 147 return ret;
148} 148}
149 149
150static inline void run_workqueue(struct cpu_workqueue_struct *cwq) 150static void run_workqueue(struct cpu_workqueue_struct *cwq)
151{ 151{
152 unsigned long flags; 152 unsigned long flags;
153 153