diff options
author | Paul Mackerras <paulus@samba.org> | 2006-02-23 22:05:47 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2006-02-23 22:05:47 -0500 |
commit | a00428f5b149e36b8225b2a0812742a6dfb07b8c (patch) | |
tree | a78869cd67cf78a0eb091fb0ea5d397734bd6738 /kernel | |
parent | 774fee58c465ea1c7e9775e347ec307bcf2deeb3 (diff) | |
parent | fb5c594c2acc441f0d2d8f457484a0e0e9285db3 (diff) |
Merge ../powerpc-merge
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 6 | ||||
-rw-r--r-- | kernel/cpuset.c | 35 | ||||
-rw-r--r-- | kernel/exit.c | 3 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/hrtimer.c | 13 | ||||
-rw-r--r-- | kernel/panic.c | 1 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 4 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 4 | ||||
-rw-r--r-- | kernel/ptrace.c | 28 | ||||
-rw-r--r-- | kernel/sched.c | 152 | ||||
-rw-r--r-- | kernel/sys_ni.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 20 | ||||
-rw-r--r-- | kernel/timer.c | 39 |
13 files changed, 156 insertions, 160 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3e376202dd48..c4394abcd5e6 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -841,7 +841,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
841 | 841 | ||
842 | for (aux = context->aux; aux; aux = aux->next) { | 842 | for (aux = context->aux; aux; aux = aux->next) { |
843 | 843 | ||
844 | ab = audit_log_start(context, GFP_KERNEL, aux->type); | 844 | ab = audit_log_start(context, gfp_mask, aux->type); |
845 | if (!ab) | 845 | if (!ab) |
846 | continue; /* audit_panic has been called */ | 846 | continue; /* audit_panic has been called */ |
847 | 847 | ||
@@ -878,14 +878,14 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
878 | } | 878 | } |
879 | 879 | ||
880 | if (context->pwd && context->pwdmnt) { | 880 | if (context->pwd && context->pwdmnt) { |
881 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); | 881 | ab = audit_log_start(context, gfp_mask, AUDIT_CWD); |
882 | if (ab) { | 882 | if (ab) { |
883 | audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); | 883 | audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); |
884 | audit_log_end(ab); | 884 | audit_log_end(ab); |
885 | } | 885 | } |
886 | } | 886 | } |
887 | for (i = 0; i < context->name_count; i++) { | 887 | for (i = 0; i < context->name_count; i++) { |
888 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); | 888 | ab = audit_log_start(context, gfp_mask, AUDIT_PATH); |
889 | if (!ab) | 889 | if (!ab) |
890 | continue; /* audit_panic has been called */ | 890 | continue; /* audit_panic has been called */ |
891 | 891 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ba42b0a76961..12815d3f1a05 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child) | |||
1977 | * We don't need to task_lock() this reference to tsk->cpuset, | 1977 | * We don't need to task_lock() this reference to tsk->cpuset, |
1978 | * because tsk is already marked PF_EXITING, so attach_task() won't | 1978 | * because tsk is already marked PF_EXITING, so attach_task() won't |
1979 | * mess with it, or task is a failed fork, never visible to attach_task. | 1979 | * mess with it, or task is a failed fork, never visible to attach_task. |
1980 | * | ||
1981 | * Hack: | ||
1982 | * | ||
1983 | * Set the exiting tasks cpuset to the root cpuset (top_cpuset). | ||
1984 | * | ||
1985 | * Don't leave a task unable to allocate memory, as that is an | ||
1986 | * accident waiting to happen should someone add a callout in | ||
1987 | * do_exit() after the cpuset_exit() call that might allocate. | ||
1988 | * If a task tries to allocate memory with an invalid cpuset, | ||
1989 | * it will oops in cpuset_update_task_memory_state(). | ||
1990 | * | ||
1991 | * We call cpuset_exit() while the task is still competent to | ||
1992 | * handle notify_on_release(), then leave the task attached to | ||
1993 | * the root cpuset (top_cpuset) for the remainder of its exit. | ||
1994 | * | ||
1995 | * To do this properly, we would increment the reference count on | ||
1996 | * top_cpuset, and near the very end of the kernel/exit.c do_exit() | ||
1997 | * code we would add a second cpuset function call, to drop that | ||
1998 | * reference. This would just create an unnecessary hot spot on | ||
1999 | * the top_cpuset reference count, to no avail. | ||
2000 | * | ||
2001 | * Normally, holding a reference to a cpuset without bumping its | ||
2002 | * count is unsafe. The cpuset could go away, or someone could | ||
2003 | * attach us to a different cpuset, decrementing the count on | ||
2004 | * the first cpuset that we never incremented. But in this case, | ||
2005 | * top_cpuset isn't going away, and either task has PF_EXITING set, | ||
2006 | * which wards off any attach_task() attempts, or task is a failed | ||
2007 | * fork, never visible to attach_task. | ||
2008 | * | ||
2009 | * Another way to do this would be to set the cpuset pointer | ||
2010 | * to NULL here, and check in cpuset_update_task_memory_state() | ||
2011 | * for a NULL pointer. This hack avoids that NULL check, for no | ||
2012 | * cost (other than this way too long comment ;). | ||
1980 | **/ | 2013 | **/ |
1981 | 2014 | ||
1982 | void cpuset_exit(struct task_struct *tsk) | 2015 | void cpuset_exit(struct task_struct *tsk) |
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk) | |||
1984 | struct cpuset *cs; | 2017 | struct cpuset *cs; |
1985 | 2018 | ||
1986 | cs = tsk->cpuset; | 2019 | cs = tsk->cpuset; |
1987 | tsk->cpuset = NULL; | 2020 | tsk->cpuset = &top_cpuset; /* Hack - see comment above */ |
1988 | 2021 | ||
1989 | if (notify_on_release(cs)) { | 2022 | if (notify_on_release(cs)) { |
1990 | char *pathbuf = NULL; | 2023 | char *pathbuf = NULL; |
diff --git a/kernel/exit.c b/kernel/exit.c index 93cee3671332..531aadca5530 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -360,6 +360,9 @@ void daemonize(const char *name, ...) | |||
360 | fs = init_task.fs; | 360 | fs = init_task.fs; |
361 | current->fs = fs; | 361 | current->fs = fs; |
362 | atomic_inc(&fs->count); | 362 | atomic_inc(&fs->count); |
363 | exit_namespace(current); | ||
364 | current->namespace = init_task.namespace; | ||
365 | get_namespace(current->namespace); | ||
363 | exit_files(current); | 366 | exit_files(current); |
364 | current->files = init_task.files; | 367 | current->files = init_task.files; |
365 | atomic_inc(¤t->files->count); | 368 | atomic_inc(¤t->files->count); |
diff --git a/kernel/fork.c b/kernel/fork.c index 8e88b374cee9..fbea12d7a943 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1123,8 +1123,8 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1123 | p->real_parent = current; | 1123 | p->real_parent = current; |
1124 | p->parent = p->real_parent; | 1124 | p->parent = p->real_parent; |
1125 | 1125 | ||
1126 | spin_lock(¤t->sighand->siglock); | ||
1126 | if (clone_flags & CLONE_THREAD) { | 1127 | if (clone_flags & CLONE_THREAD) { |
1127 | spin_lock(¤t->sighand->siglock); | ||
1128 | /* | 1128 | /* |
1129 | * Important: if an exit-all has been started then | 1129 | * Important: if an exit-all has been started then |
1130 | * do not create this new thread - the whole thread | 1130 | * do not create this new thread - the whole thread |
@@ -1162,8 +1162,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1162 | */ | 1162 | */ |
1163 | p->it_prof_expires = jiffies_to_cputime(1); | 1163 | p->it_prof_expires = jiffies_to_cputime(1); |
1164 | } | 1164 | } |
1165 | |||
1166 | spin_unlock(¤t->sighand->siglock); | ||
1167 | } | 1165 | } |
1168 | 1166 | ||
1169 | /* | 1167 | /* |
@@ -1175,8 +1173,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1175 | if (unlikely(p->ptrace & PT_PTRACED)) | 1173 | if (unlikely(p->ptrace & PT_PTRACED)) |
1176 | __ptrace_link(p, current->parent); | 1174 | __ptrace_link(p, current->parent); |
1177 | 1175 | ||
1178 | attach_pid(p, PIDTYPE_PID, p->pid); | ||
1179 | attach_pid(p, PIDTYPE_TGID, p->tgid); | ||
1180 | if (thread_group_leader(p)) { | 1176 | if (thread_group_leader(p)) { |
1181 | p->signal->tty = current->signal->tty; | 1177 | p->signal->tty = current->signal->tty; |
1182 | p->signal->pgrp = process_group(current); | 1178 | p->signal->pgrp = process_group(current); |
@@ -1186,9 +1182,12 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1186 | if (p->pid) | 1182 | if (p->pid) |
1187 | __get_cpu_var(process_counts)++; | 1183 | __get_cpu_var(process_counts)++; |
1188 | } | 1184 | } |
1185 | attach_pid(p, PIDTYPE_TGID, p->tgid); | ||
1186 | attach_pid(p, PIDTYPE_PID, p->pid); | ||
1189 | 1187 | ||
1190 | nr_threads++; | 1188 | nr_threads++; |
1191 | total_forks++; | 1189 | total_forks++; |
1190 | spin_unlock(¤t->sighand->siglock); | ||
1192 | write_unlock_irq(&tasklist_lock); | 1191 | write_unlock_irq(&tasklist_lock); |
1193 | proc_fork_connector(p); | 1192 | proc_fork_connector(p); |
1194 | return p; | 1193 | return p; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2b6e1757aedd..5ae51f1bc7c8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -418,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
418 | /* Switch the timer base, if necessary: */ | 418 | /* Switch the timer base, if necessary: */ |
419 | new_base = switch_hrtimer_base(timer, base); | 419 | new_base = switch_hrtimer_base(timer, base); |
420 | 420 | ||
421 | if (mode == HRTIMER_REL) | 421 | if (mode == HRTIMER_REL) { |
422 | tim = ktime_add(tim, new_base->get_time()); | 422 | tim = ktime_add(tim, new_base->get_time()); |
423 | /* | ||
424 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | ||
425 | * to signal that they simply return xtime in | ||
426 | * do_gettimeoffset(). In this case we want to round up by | ||
427 | * resolution when starting a relative timer, to avoid short | ||
428 | * timeouts. This will go away with the GTOD framework. | ||
429 | */ | ||
430 | #ifdef CONFIG_TIME_LOW_RES | ||
431 | tim = ktime_add(tim, base->resolution); | ||
432 | #endif | ||
433 | } | ||
423 | timer->expires = tim; | 434 | timer->expires = tim; |
424 | 435 | ||
425 | enqueue_hrtimer(timer, new_base); | 436 | enqueue_hrtimer(timer, new_base); |
diff --git a/kernel/panic.c b/kernel/panic.c index c5c4ab255834..126dc43f1c74 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
130 | #endif | 130 | #endif |
131 | local_irq_enable(); | 131 | local_irq_enable(); |
132 | for (i = 0;;) { | 132 | for (i = 0;;) { |
133 | touch_softlockup_watchdog(); | ||
133 | i += panic_blink(i); | 134 | i += panic_blink(i); |
134 | mdelay(1); | 135 | mdelay(1); |
135 | i++; | 136 | i++; |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 41f66365f0d8..8d5a5986d621 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone) | |||
91 | * corrected eventually when the cases giving rise to this | 91 | * corrected eventually when the cases giving rise to this |
92 | * are better understood. | 92 | * are better understood. |
93 | */ | 93 | */ |
94 | if (PageReserved(page)) { | 94 | if (PageReserved(page)) |
95 | printk("highmem reserved page?!\n"); | ||
96 | continue; | 95 | continue; |
97 | } | ||
98 | BUG_ON(PageNosave(page)); | 96 | BUG_ON(PageNosave(page)); |
99 | if (PageNosaveFree(page)) | 97 | if (PageNosaveFree(page)) |
100 | continue; | 98 | continue; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 4e90905f0e87..2d9d08f72f76 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
153 | { | 153 | { |
154 | int i; | 154 | int i; |
155 | 155 | ||
156 | if (!swsusp_resume_device) | ||
157 | return -ENODEV; | ||
158 | spin_lock(&swap_lock); | 156 | spin_lock(&swap_lock); |
159 | for (i = 0; i < MAX_SWAPFILES; i++) { | 157 | for (i = 0; i < MAX_SWAPFILES; i++) { |
160 | if (!(swap_info[i].flags & SWP_WRITEOK)) | 158 | if (!(swap_info[i].flags & SWP_WRITEOK)) |
161 | continue; | 159 | continue; |
162 | if (is_resume_device(swap_info + i)) { | 160 | if (!swsusp_resume_device || is_resume_device(swap_info + i)) { |
163 | spin_unlock(&swap_lock); | 161 | spin_unlock(&swap_lock); |
164 | root_swap = i; | 162 | root_swap = i; |
165 | return 0; | 163 | return 0; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5f33cdb6fff5..d95a72c9279d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child) | |||
72 | */ | 72 | */ |
73 | void __ptrace_unlink(task_t *child) | 73 | void __ptrace_unlink(task_t *child) |
74 | { | 74 | { |
75 | if (!child->ptrace) | 75 | BUG_ON(!child->ptrace); |
76 | BUG(); | 76 | |
77 | child->ptrace = 0; | 77 | child->ptrace = 0; |
78 | if (!list_empty(&child->ptrace_list)) { | 78 | if (!list_empty(&child->ptrace_list)) { |
79 | list_del_init(&child->ptrace_list); | 79 | list_del_init(&child->ptrace_list); |
@@ -184,22 +184,27 @@ bad: | |||
184 | return retval; | 184 | return retval; |
185 | } | 185 | } |
186 | 186 | ||
187 | void __ptrace_detach(struct task_struct *child, unsigned int data) | ||
188 | { | ||
189 | child->exit_code = data; | ||
190 | /* .. re-parent .. */ | ||
191 | __ptrace_unlink(child); | ||
192 | /* .. and wake it up. */ | ||
193 | if (child->exit_state != EXIT_ZOMBIE) | ||
194 | wake_up_process(child); | ||
195 | } | ||
196 | |||
187 | int ptrace_detach(struct task_struct *child, unsigned int data) | 197 | int ptrace_detach(struct task_struct *child, unsigned int data) |
188 | { | 198 | { |
189 | if (!valid_signal(data)) | 199 | if (!valid_signal(data)) |
190 | return -EIO; | 200 | return -EIO; |
191 | 201 | ||
192 | /* Architecture-specific hardware disable .. */ | 202 | /* Architecture-specific hardware disable .. */ |
193 | ptrace_disable(child); | 203 | ptrace_disable(child); |
194 | 204 | ||
195 | /* .. re-parent .. */ | ||
196 | child->exit_code = data; | ||
197 | |||
198 | write_lock_irq(&tasklist_lock); | 205 | write_lock_irq(&tasklist_lock); |
199 | __ptrace_unlink(child); | 206 | if (child->ptrace) |
200 | /* .. and wake it up. */ | 207 | __ptrace_detach(child, data); |
201 | if (child->exit_state != EXIT_ZOMBIE) | ||
202 | wake_up_process(child); | ||
203 | write_unlock_irq(&tasklist_lock); | 208 | write_unlock_irq(&tasklist_lock); |
204 | 209 | ||
205 | return 0; | 210 | return 0; |
@@ -242,8 +247,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
242 | if (write) { | 247 | if (write) { |
243 | copy_to_user_page(vma, page, addr, | 248 | copy_to_user_page(vma, page, addr, |
244 | maddr + offset, buf, bytes); | 249 | maddr + offset, buf, bytes); |
245 | if (!PageCompound(page)) | 250 | set_page_dirty_lock(page); |
246 | set_page_dirty_lock(page); | ||
247 | } else { | 251 | } else { |
248 | copy_from_user_page(vma, page, addr, | 252 | copy_from_user_page(vma, page, addr, |
249 | buf, maddr + offset, bytes); | 253 | buf, maddr + offset, bytes); |
diff --git a/kernel/sched.c b/kernel/sched.c index bc38804e40dd..12d291bf3379 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -215,7 +215,6 @@ struct runqueue { | |||
215 | */ | 215 | */ |
216 | unsigned long nr_running; | 216 | unsigned long nr_running; |
217 | #ifdef CONFIG_SMP | 217 | #ifdef CONFIG_SMP |
218 | unsigned long prio_bias; | ||
219 | unsigned long cpu_load[3]; | 218 | unsigned long cpu_load[3]; |
220 | #endif | 219 | #endif |
221 | unsigned long long nr_switches; | 220 | unsigned long long nr_switches; |
@@ -669,68 +668,13 @@ static int effective_prio(task_t *p) | |||
669 | return prio; | 668 | return prio; |
670 | } | 669 | } |
671 | 670 | ||
672 | #ifdef CONFIG_SMP | ||
673 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
674 | { | ||
675 | rq->prio_bias += MAX_PRIO - prio; | ||
676 | } | ||
677 | |||
678 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
679 | { | ||
680 | rq->prio_bias -= MAX_PRIO - prio; | ||
681 | } | ||
682 | |||
683 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
684 | { | ||
685 | rq->nr_running++; | ||
686 | if (rt_task(p)) { | ||
687 | if (p != rq->migration_thread) | ||
688 | /* | ||
689 | * The migration thread does the actual balancing. Do | ||
690 | * not bias by its priority as the ultra high priority | ||
691 | * will skew balancing adversely. | ||
692 | */ | ||
693 | inc_prio_bias(rq, p->prio); | ||
694 | } else | ||
695 | inc_prio_bias(rq, p->static_prio); | ||
696 | } | ||
697 | |||
698 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
699 | { | ||
700 | rq->nr_running--; | ||
701 | if (rt_task(p)) { | ||
702 | if (p != rq->migration_thread) | ||
703 | dec_prio_bias(rq, p->prio); | ||
704 | } else | ||
705 | dec_prio_bias(rq, p->static_prio); | ||
706 | } | ||
707 | #else | ||
708 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
709 | { | ||
710 | } | ||
711 | |||
712 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
713 | { | ||
714 | } | ||
715 | |||
716 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
717 | { | ||
718 | rq->nr_running++; | ||
719 | } | ||
720 | |||
721 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
722 | { | ||
723 | rq->nr_running--; | ||
724 | } | ||
725 | #endif | ||
726 | |||
727 | /* | 671 | /* |
728 | * __activate_task - move a task to the runqueue. | 672 | * __activate_task - move a task to the runqueue. |
729 | */ | 673 | */ |
730 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 674 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
731 | { | 675 | { |
732 | enqueue_task(p, rq->active); | 676 | enqueue_task(p, rq->active); |
733 | inc_nr_running(p, rq); | 677 | rq->nr_running++; |
734 | } | 678 | } |
735 | 679 | ||
736 | /* | 680 | /* |
@@ -739,7 +683,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
739 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 683 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
740 | { | 684 | { |
741 | enqueue_task_head(p, rq->active); | 685 | enqueue_task_head(p, rq->active); |
742 | inc_nr_running(p, rq); | 686 | rq->nr_running++; |
743 | } | 687 | } |
744 | 688 | ||
745 | static int recalc_task_prio(task_t *p, unsigned long long now) | 689 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -863,7 +807,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
863 | */ | 807 | */ |
864 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 808 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
865 | { | 809 | { |
866 | dec_nr_running(p, rq); | 810 | rq->nr_running--; |
867 | dequeue_task(p, p->array); | 811 | dequeue_task(p, p->array); |
868 | p->array = NULL; | 812 | p->array = NULL; |
869 | } | 813 | } |
@@ -1007,61 +951,27 @@ void kick_process(task_t *p) | |||
1007 | * We want to under-estimate the load of migration sources, to | 951 | * We want to under-estimate the load of migration sources, to |
1008 | * balance conservatively. | 952 | * balance conservatively. |
1009 | */ | 953 | */ |
1010 | static unsigned long __source_load(int cpu, int type, enum idle_type idle) | 954 | static inline unsigned long source_load(int cpu, int type) |
1011 | { | 955 | { |
1012 | runqueue_t *rq = cpu_rq(cpu); | 956 | runqueue_t *rq = cpu_rq(cpu); |
1013 | unsigned long running = rq->nr_running; | 957 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1014 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1015 | load_now = running * SCHED_LOAD_SCALE; | ||
1016 | |||
1017 | if (type == 0) | 958 | if (type == 0) |
1018 | source_load = load_now; | 959 | return load_now; |
1019 | else | ||
1020 | source_load = min(cpu_load, load_now); | ||
1021 | |||
1022 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1023 | /* | ||
1024 | * If we are busy rebalancing the load is biased by | ||
1025 | * priority to create 'nice' support across cpus. When | ||
1026 | * idle rebalancing we should only bias the source_load if | ||
1027 | * there is more than one task running on that queue to | ||
1028 | * prevent idle rebalance from trying to pull tasks from a | ||
1029 | * queue with only one running task. | ||
1030 | */ | ||
1031 | source_load = source_load * rq->prio_bias / running; | ||
1032 | |||
1033 | return source_load; | ||
1034 | } | ||
1035 | 960 | ||
1036 | static inline unsigned long source_load(int cpu, int type) | 961 | return min(rq->cpu_load[type-1], load_now); |
1037 | { | ||
1038 | return __source_load(cpu, type, NOT_IDLE); | ||
1039 | } | 962 | } |
1040 | 963 | ||
1041 | /* | 964 | /* |
1042 | * Return a high guess at the load of a migration-target cpu | 965 | * Return a high guess at the load of a migration-target cpu |
1043 | */ | 966 | */ |
1044 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) | 967 | static inline unsigned long target_load(int cpu, int type) |
1045 | { | 968 | { |
1046 | runqueue_t *rq = cpu_rq(cpu); | 969 | runqueue_t *rq = cpu_rq(cpu); |
1047 | unsigned long running = rq->nr_running; | 970 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1048 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1049 | load_now = running * SCHED_LOAD_SCALE; | ||
1050 | |||
1051 | if (type == 0) | 971 | if (type == 0) |
1052 | target_load = load_now; | 972 | return load_now; |
1053 | else | ||
1054 | target_load = max(cpu_load, load_now); | ||
1055 | |||
1056 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1057 | target_load = target_load * rq->prio_bias / running; | ||
1058 | |||
1059 | return target_load; | ||
1060 | } | ||
1061 | 973 | ||
1062 | static inline unsigned long target_load(int cpu, int type) | 974 | return max(rq->cpu_load[type-1], load_now); |
1063 | { | ||
1064 | return __target_load(cpu, type, NOT_IDLE); | ||
1065 | } | 975 | } |
1066 | 976 | ||
1067 | /* | 977 | /* |
@@ -1294,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync) | |||
1294 | } | 1204 | } |
1295 | } | 1205 | } |
1296 | 1206 | ||
1297 | if (p->last_waker_cpu != this_cpu) | ||
1298 | goto out_set_cpu; | ||
1299 | |||
1300 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1207 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1301 | goto out_set_cpu; | 1208 | goto out_set_cpu; |
1302 | 1209 | ||
@@ -1367,8 +1274,6 @@ out_set_cpu: | |||
1367 | cpu = task_cpu(p); | 1274 | cpu = task_cpu(p); |
1368 | } | 1275 | } |
1369 | 1276 | ||
1370 | p->last_waker_cpu = this_cpu; | ||
1371 | |||
1372 | out_activate: | 1277 | out_activate: |
1373 | #endif /* CONFIG_SMP */ | 1278 | #endif /* CONFIG_SMP */ |
1374 | if (old_state == TASK_UNINTERRUPTIBLE) { | 1279 | if (old_state == TASK_UNINTERRUPTIBLE) { |
@@ -1450,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
1450 | #ifdef CONFIG_SCHEDSTATS | 1355 | #ifdef CONFIG_SCHEDSTATS |
1451 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1356 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
1452 | #endif | 1357 | #endif |
1453 | #if defined(CONFIG_SMP) | 1358 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1454 | p->last_waker_cpu = cpu; | ||
1455 | #if defined(__ARCH_WANT_UNLOCKED_CTXSW) | ||
1456 | p->oncpu = 0; | 1359 | p->oncpu = 0; |
1457 | #endif | 1360 | #endif |
1458 | #endif | ||
1459 | #ifdef CONFIG_PREEMPT | 1361 | #ifdef CONFIG_PREEMPT |
1460 | /* Want to start with kernel preemption disabled. */ | 1362 | /* Want to start with kernel preemption disabled. */ |
1461 | task_thread_info(p)->preempt_count = 1; | 1363 | task_thread_info(p)->preempt_count = 1; |
@@ -1530,7 +1432,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1530 | list_add_tail(&p->run_list, ¤t->run_list); | 1432 | list_add_tail(&p->run_list, ¤t->run_list); |
1531 | p->array = current->array; | 1433 | p->array = current->array; |
1532 | p->array->nr_active++; | 1434 | p->array->nr_active++; |
1533 | inc_nr_running(p, rq); | 1435 | rq->nr_running++; |
1534 | } | 1436 | } |
1535 | set_need_resched(); | 1437 | set_need_resched(); |
1536 | } else | 1438 | } else |
@@ -1875,9 +1777,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1875 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1777 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1876 | { | 1778 | { |
1877 | dequeue_task(p, src_array); | 1779 | dequeue_task(p, src_array); |
1878 | dec_nr_running(p, src_rq); | 1780 | src_rq->nr_running--; |
1879 | set_task_cpu(p, this_cpu); | 1781 | set_task_cpu(p, this_cpu); |
1880 | inc_nr_running(p, this_rq); | 1782 | this_rq->nr_running++; |
1881 | enqueue_task(p, this_array); | 1783 | enqueue_task(p, this_array); |
1882 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1784 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1883 | + this_rq->timestamp_last_tick; | 1785 | + this_rq->timestamp_last_tick; |
@@ -2056,9 +1958,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2056 | 1958 | ||
2057 | /* Bias balancing toward cpus of our domain */ | 1959 | /* Bias balancing toward cpus of our domain */ |
2058 | if (local_group) | 1960 | if (local_group) |
2059 | load = __target_load(i, load_idx, idle); | 1961 | load = target_load(i, load_idx); |
2060 | else | 1962 | else |
2061 | load = __source_load(i, load_idx, idle); | 1963 | load = source_load(i, load_idx); |
2062 | 1964 | ||
2063 | avg_load += load; | 1965 | avg_load += load; |
2064 | } | 1966 | } |
@@ -2171,7 +2073,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group, | |||
2171 | int i; | 2073 | int i; |
2172 | 2074 | ||
2173 | for_each_cpu_mask(i, group->cpumask) { | 2075 | for_each_cpu_mask(i, group->cpumask) { |
2174 | load = __source_load(i, 0, idle); | 2076 | load = source_load(i, 0); |
2175 | 2077 | ||
2176 | if (load > max_load) { | 2078 | if (load > max_load) { |
2177 | max_load = load; | 2079 | max_load = load; |
@@ -3571,10 +3473,8 @@ void set_user_nice(task_t *p, long nice) | |||
3571 | goto out_unlock; | 3473 | goto out_unlock; |
3572 | } | 3474 | } |
3573 | array = p->array; | 3475 | array = p->array; |
3574 | if (array) { | 3476 | if (array) |
3575 | dequeue_task(p, array); | 3477 | dequeue_task(p, array); |
3576 | dec_prio_bias(rq, p->static_prio); | ||
3577 | } | ||
3578 | 3478 | ||
3579 | old_prio = p->prio; | 3479 | old_prio = p->prio; |
3580 | new_prio = NICE_TO_PRIO(nice); | 3480 | new_prio = NICE_TO_PRIO(nice); |
@@ -3584,7 +3484,6 @@ void set_user_nice(task_t *p, long nice) | |||
3584 | 3484 | ||
3585 | if (array) { | 3485 | if (array) { |
3586 | enqueue_task(p, array); | 3486 | enqueue_task(p, array); |
3587 | inc_prio_bias(rq, p->static_prio); | ||
3588 | /* | 3487 | /* |
3589 | * If the task increased its priority or is running and | 3488 | * If the task increased its priority or is running and |
3590 | * lowered its priority, then reschedule its CPU: | 3489 | * lowered its priority, then reschedule its CPU: |
@@ -5159,7 +5058,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5159 | #define MAX_DOMAIN_DISTANCE 32 | 5058 | #define MAX_DOMAIN_DISTANCE 32 |
5160 | 5059 | ||
5161 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = | 5060 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = |
5162 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; | 5061 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = |
5062 | /* | ||
5063 | * Architectures may override the migration cost and thus avoid | ||
5064 | * boot-time calibration. Unit is nanoseconds. Mostly useful for | ||
5065 | * virtualized hardware: | ||
5066 | */ | ||
5067 | #ifdef CONFIG_DEFAULT_MIGRATION_COST | ||
5068 | CONFIG_DEFAULT_MIGRATION_COST | ||
5069 | #else | ||
5070 | -1LL | ||
5071 | #endif | ||
5072 | }; | ||
5163 | 5073 | ||
5164 | /* | 5074 | /* |
5165 | * Allow override of migration cost - in units of microseconds. | 5075 | * Allow override of migration cost - in units of microseconds. |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 17313b99e53d..1067090db6b1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16); | |||
104 | cond_syscall(sys_setuid16); | 104 | cond_syscall(sys_setuid16); |
105 | cond_syscall(sys_vm86old); | 105 | cond_syscall(sys_vm86old); |
106 | cond_syscall(sys_vm86); | 106 | cond_syscall(sys_vm86); |
107 | cond_syscall(compat_sys_ipc); | ||
108 | cond_syscall(compat_sys_sysctl); | ||
107 | 109 | ||
108 | /* arch-specific weak syscall entries */ | 110 | /* arch-specific weak syscall entries */ |
109 | cond_syscall(sys_pciconfig_read); | 111 | cond_syscall(sys_pciconfig_read); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 71dd6f62efec..c05a2b7125e1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -44,14 +44,12 @@ | |||
44 | #include <linux/limits.h> | 44 | #include <linux/limits.h> |
45 | #include <linux/dcache.h> | 45 | #include <linux/dcache.h> |
46 | #include <linux/syscalls.h> | 46 | #include <linux/syscalls.h> |
47 | #include <linux/nfs_fs.h> | ||
48 | #include <linux/acpi.h> | ||
47 | 49 | ||
48 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
49 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
50 | 52 | ||
51 | #ifdef CONFIG_ROOT_NFS | ||
52 | #include <linux/nfs_fs.h> | ||
53 | #endif | ||
54 | |||
55 | #if defined(CONFIG_SYSCTL) | 53 | #if defined(CONFIG_SYSCTL) |
56 | 54 | ||
57 | /* External variables not in a header file. */ | 55 | /* External variables not in a header file. */ |
@@ -126,8 +124,6 @@ extern int sysctl_hz_timer; | |||
126 | extern int acct_parm[]; | 124 | extern int acct_parm[]; |
127 | #endif | 125 | #endif |
128 | 126 | ||
129 | int randomize_va_space = 1; | ||
130 | |||
131 | static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, | 127 | static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, |
132 | ctl_table *, void **); | 128 | ctl_table *, void **); |
133 | static int proc_doutsstring(ctl_table *table, int write, struct file *filp, | 129 | static int proc_doutsstring(ctl_table *table, int write, struct file *filp, |
@@ -640,6 +636,7 @@ static ctl_table kern_table[] = { | |||
640 | .proc_handler = &proc_dointvec, | 636 | .proc_handler = &proc_dointvec, |
641 | }, | 637 | }, |
642 | #endif | 638 | #endif |
639 | #if defined(CONFIG_MMU) | ||
643 | { | 640 | { |
644 | .ctl_name = KERN_RANDOMIZE, | 641 | .ctl_name = KERN_RANDOMIZE, |
645 | .procname = "randomize_va_space", | 642 | .procname = "randomize_va_space", |
@@ -648,6 +645,7 @@ static ctl_table kern_table[] = { | |||
648 | .mode = 0644, | 645 | .mode = 0644, |
649 | .proc_handler = &proc_dointvec, | 646 | .proc_handler = &proc_dointvec, |
650 | }, | 647 | }, |
648 | #endif | ||
651 | #if defined(CONFIG_S390) && defined(CONFIG_SMP) | 649 | #if defined(CONFIG_S390) && defined(CONFIG_SMP) |
652 | { | 650 | { |
653 | .ctl_name = KERN_SPIN_RETRY, | 651 | .ctl_name = KERN_SPIN_RETRY, |
@@ -658,6 +656,16 @@ static ctl_table kern_table[] = { | |||
658 | .proc_handler = &proc_dointvec, | 656 | .proc_handler = &proc_dointvec, |
659 | }, | 657 | }, |
660 | #endif | 658 | #endif |
659 | #ifdef CONFIG_ACPI_SLEEP | ||
660 | { | ||
661 | .ctl_name = KERN_ACPI_VIDEO_FLAGS, | ||
662 | .procname = "acpi_video_flags", | ||
663 | .data = &acpi_video_flags, | ||
664 | .maxlen = sizeof (unsigned long), | ||
665 | .mode = 0644, | ||
666 | .proc_handler = &proc_dointvec, | ||
667 | }, | ||
668 | #endif | ||
661 | { .ctl_name = 0 } | 669 | { .ctl_name = 0 } |
662 | }; | 670 | }; |
663 | 671 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index b9dad3994676..fe3a9a9f8328 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -717,12 +717,16 @@ static void second_overflow(void) | |||
717 | #endif | 717 | #endif |
718 | } | 718 | } |
719 | 719 | ||
720 | /* in the NTP reference this is called "hardclock()" */ | 720 | /* |
721 | static void update_wall_time_one_tick(void) | 721 | * Returns how many microseconds we need to add to xtime this tick |
722 | * in doing an adjustment requested with adjtime. | ||
723 | */ | ||
724 | static long adjtime_adjustment(void) | ||
722 | { | 725 | { |
723 | long time_adjust_step, delta_nsec; | 726 | long time_adjust_step; |
724 | 727 | ||
725 | if ((time_adjust_step = time_adjust) != 0 ) { | 728 | time_adjust_step = time_adjust; |
729 | if (time_adjust_step) { | ||
726 | /* | 730 | /* |
727 | * We are doing an adjtime thing. Prepare time_adjust_step to | 731 | * We are doing an adjtime thing. Prepare time_adjust_step to |
728 | * be within bounds. Note that a positive time_adjust means we | 732 | * be within bounds. Note that a positive time_adjust means we |
@@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void) | |||
733 | */ | 737 | */ |
734 | time_adjust_step = min(time_adjust_step, (long)tickadj); | 738 | time_adjust_step = min(time_adjust_step, (long)tickadj); |
735 | time_adjust_step = max(time_adjust_step, (long)-tickadj); | 739 | time_adjust_step = max(time_adjust_step, (long)-tickadj); |
740 | } | ||
741 | return time_adjust_step; | ||
742 | } | ||
736 | 743 | ||
744 | /* in the NTP reference this is called "hardclock()" */ | ||
745 | static void update_wall_time_one_tick(void) | ||
746 | { | ||
747 | long time_adjust_step, delta_nsec; | ||
748 | |||
749 | time_adjust_step = adjtime_adjustment(); | ||
750 | if (time_adjust_step) | ||
737 | /* Reduce by this step the amount of time left */ | 751 | /* Reduce by this step the amount of time left */ |
738 | time_adjust -= time_adjust_step; | 752 | time_adjust -= time_adjust_step; |
739 | } | ||
740 | delta_nsec = tick_nsec + time_adjust_step * 1000; | 753 | delta_nsec = tick_nsec + time_adjust_step * 1000; |
741 | /* | 754 | /* |
742 | * Advance the phase, once it gets to one microsecond, then | 755 | * Advance the phase, once it gets to one microsecond, then |
@@ -759,6 +772,22 @@ static void update_wall_time_one_tick(void) | |||
759 | } | 772 | } |
760 | 773 | ||
761 | /* | 774 | /* |
775 | * Return how long ticks are at the moment, that is, how much time | ||
776 | * update_wall_time_one_tick will add to xtime next time we call it | ||
777 | * (assuming no calls to do_adjtimex in the meantime). | ||
778 | * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 | ||
779 | * bits to the right of the binary point. | ||
780 | * This function has no side-effects. | ||
781 | */ | ||
782 | u64 current_tick_length(void) | ||
783 | { | ||
784 | long delta_nsec; | ||
785 | |||
786 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; | ||
787 | return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; | ||
788 | } | ||
789 | |||
790 | /* | ||
762 | * Using a loop looks inefficient, but "ticks" is | 791 | * Using a loop looks inefficient, but "ticks" is |
763 | * usually just one (we shouldn't be losing ticks, | 792 | * usually just one (we shouldn't be losing ticks, |
764 | * we're doing this this way mainly for interrupt | 793 | * we're doing this this way mainly for interrupt |