aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-04-03 09:08:57 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-04-03 09:08:57 -0400
commit76467874b83835129dc454e3a7a8e5d1186101b0 (patch)
tree162129f0c36c35be4aa323cf00626db0e804c3fc /kernel
parent8628de0583504138551a05ad44ca388467f0f552 (diff)
parent6246b6128bbe34d0752f119cf7c5111c85fe481d (diff)
Merge branch 'master'
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c12
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cpuset.c69
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c28
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/futex_compat.c4
-rw-r--r--kernel/hrtimer.c49
-rw-r--r--kernel/module.c1
-rw-r--r--kernel/pid.c212
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/process.c3
-rw-r--r--kernel/printk.c6
-rw-r--r--kernel/ptrace.c3
-rw-r--r--kernel/sched.c84
-rw-r--r--kernel/signal.c7
-rw-r--r--kernel/sys.c19
-rw-r--r--kernel/time.c8
-rw-r--r--kernel/timer.c95
19 files changed, 387 insertions, 228 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 065d8b4e51ef..b327f4d20104 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -449,8 +449,8 @@ static void do_acct_process(long exitcode, struct file *file)
449 /* calculate run_time in nsec*/ 449 /* calculate run_time in nsec*/
450 do_posix_clock_monotonic_gettime(&uptime); 450 do_posix_clock_monotonic_gettime(&uptime);
451 run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; 451 run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
452 run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC 452 run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC
453 + current->start_time.tv_nsec; 453 + current->group_leader->start_time.tv_nsec;
454 /* convert nsec -> AHZ */ 454 /* convert nsec -> AHZ */
455 elapsed = nsec_to_AHZ(run_time); 455 elapsed = nsec_to_AHZ(run_time);
456#if ACCT_VERSION==3 456#if ACCT_VERSION==3
@@ -469,10 +469,10 @@ static void do_acct_process(long exitcode, struct file *file)
469#endif 469#endif
470 do_div(elapsed, AHZ); 470 do_div(elapsed, AHZ);
471 ac.ac_btime = xtime.tv_sec - elapsed; 471 ac.ac_btime = xtime.tv_sec - elapsed;
472 jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime, 472 jiffies = cputime_to_jiffies(cputime_add(current->utime,
473 current->signal->utime)); 473 current->signal->utime));
474 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); 474 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
475 jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime, 475 jiffies = cputime_to_jiffies(cputime_add(current->stime,
476 current->signal->stime)); 476 current->signal->stime));
477 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); 477 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
478 /* we really need to bite the bullet and change layout */ 478 /* we really need to bite the bullet and change layout */
@@ -522,9 +522,9 @@ static void do_acct_process(long exitcode, struct file *file)
522 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ 522 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
523 ac.ac_rw = encode_comp_t(ac.ac_io / 1024); 523 ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
524 ac.ac_minflt = encode_comp_t(current->signal->min_flt + 524 ac.ac_minflt = encode_comp_t(current->signal->min_flt +
525 current->group_leader->min_flt); 525 current->min_flt);
526 ac.ac_majflt = encode_comp_t(current->signal->maj_flt + 526 ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
527 current->group_leader->maj_flt); 527 current->maj_flt);
528 ac.ac_swaps = encode_comp_t(0); 528 ac.ac_swaps = encode_comp_t(0);
529 ac.ac_exitcode = exitcode; 529 ac.ac_exitcode = exitcode;
530 530
diff --git a/kernel/audit.c b/kernel/audit.c
index 04fe2e301b61..c8ccbd09048f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -578,7 +578,7 @@ static int __init audit_enable(char *str)
578 audit_initialized ? "" : " (after initialization)"); 578 audit_initialized ? "" : " (after initialization)");
579 if (audit_initialized) 579 if (audit_initialized)
580 audit_enabled = audit_default; 580 audit_enabled = audit_default;
581 return 0; 581 return 1;
582} 582}
583 583
584__setup("audit=", audit_enable); 584__setup("audit=", audit_enable);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 18aea1bd1284..72248d1b9e3f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -616,12 +616,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
616 * current->cpuset if a task has its memory placement changed. 616 * current->cpuset if a task has its memory placement changed.
617 * Do not call this routine if in_interrupt(). 617 * Do not call this routine if in_interrupt().
618 * 618 *
619 * Call without callback_mutex or task_lock() held. May be called 619 * Call without callback_mutex or task_lock() held. May be
620 * with or without manage_mutex held. Doesn't need task_lock to guard 620 * called with or without manage_mutex held. Thanks in part to
621 * against another task changing a non-NULL cpuset pointer to NULL, 621 * 'the_top_cpuset_hack', the tasks cpuset pointer will never
622 * as that is only done by a task on itself, and if the current task 622 * be NULL. This routine also might acquire callback_mutex and
623 * is here, it is not simultaneously in the exit code NULL'ing its
624 * cpuset pointer. This routine also might acquire callback_mutex and
625 * current->mm->mmap_sem during call. 623 * current->mm->mmap_sem during call.
626 * 624 *
627 * Reading current->cpuset->mems_generation doesn't need task_lock 625 * Reading current->cpuset->mems_generation doesn't need task_lock
@@ -836,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf)
836} 834}
837 835
838/* 836/*
837 * cpuset_migrate_mm
838 *
839 * Migrate memory region from one set of nodes to another.
840 *
841 * Temporarilly set tasks mems_allowed to target nodes of migration,
842 * so that the migration code can allocate pages on these nodes.
843 *
844 * Call holding manage_mutex, so our current->cpuset won't change
845 * during this call, as manage_mutex holds off any attach_task()
846 * calls. Therefore we don't need to take task_lock around the
847 * call to guarantee_online_mems(), as we know no one is changing
848 * our tasks cpuset.
849 *
850 * Hold callback_mutex around the two modifications of our tasks
851 * mems_allowed to synchronize with cpuset_mems_allowed().
852 *
853 * While the mm_struct we are migrating is typically from some
854 * other task, the task_struct mems_allowed that we are hacking
855 * is for our current task, which must allocate new pages for that
856 * migrating memory region.
857 *
858 * We call cpuset_update_task_memory_state() before hacking
859 * our tasks mems_allowed, so that we are assured of being in
860 * sync with our tasks cpuset, and in particular, callbacks to
861 * cpuset_update_task_memory_state() from nested page allocations
862 * won't see any mismatch of our cpuset and task mems_generation
863 * values, so won't overwrite our hacked tasks mems_allowed
864 * nodemask.
865 */
866
867static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
868 const nodemask_t *to)
869{
870 struct task_struct *tsk = current;
871
872 cpuset_update_task_memory_state();
873
874 mutex_lock(&callback_mutex);
875 tsk->mems_allowed = *to;
876 mutex_unlock(&callback_mutex);
877
878 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
879
880 mutex_lock(&callback_mutex);
881 guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
882 mutex_unlock(&callback_mutex);
883}
884
885/*
839 * Handle user request to change the 'mems' memory placement 886 * Handle user request to change the 'mems' memory placement
840 * of a cpuset. Needs to validate the request, update the 887 * of a cpuset. Needs to validate the request, update the
841 * cpusets mems_allowed and mems_generation, and for each 888 * cpusets mems_allowed and mems_generation, and for each
@@ -947,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf)
947 struct mm_struct *mm = mmarray[i]; 994 struct mm_struct *mm = mmarray[i];
948 995
949 mpol_rebind_mm(mm, &cs->mems_allowed); 996 mpol_rebind_mm(mm, &cs->mems_allowed);
950 if (migrate) { 997 if (migrate)
951 do_migrate_pages(mm, &oldmem, &cs->mems_allowed, 998 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
952 MPOL_MF_MOVE_ALL);
953 }
954 mmput(mm); 999 mmput(mm);
955 } 1000 }
956 1001
@@ -1185,11 +1230,11 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1185 mm = get_task_mm(tsk); 1230 mm = get_task_mm(tsk);
1186 if (mm) { 1231 if (mm) {
1187 mpol_rebind_mm(mm, &to); 1232 mpol_rebind_mm(mm, &to);
1233 if (is_memory_migrate(cs))
1234 cpuset_migrate_mm(mm, &from, &to);
1188 mmput(mm); 1235 mmput(mm);
1189 } 1236 }
1190 1237
1191 if (is_memory_migrate(cs))
1192 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
1193 put_task_struct(tsk); 1238 put_task_struct(tsk);
1194 synchronize_rcu(); 1239 synchronize_rcu();
1195 if (atomic_dec_and_test(&oldcs->count)) 1240 if (atomic_dec_and_test(&oldcs->count))
diff --git a/kernel/exit.c b/kernel/exit.c
index bc0ec674d3f4..6c2eeb8f6390 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -127,6 +127,11 @@ static void __exit_signal(struct task_struct *tsk)
127 } 127 }
128} 128}
129 129
130static void delayed_put_task_struct(struct rcu_head *rhp)
131{
132 put_task_struct(container_of(rhp, struct task_struct, rcu));
133}
134
130void release_task(struct task_struct * p) 135void release_task(struct task_struct * p)
131{ 136{
132 int zap_leader; 137 int zap_leader;
@@ -168,7 +173,7 @@ repeat:
168 spin_unlock(&p->proc_lock); 173 spin_unlock(&p->proc_lock);
169 proc_pid_flush(proc_dentry); 174 proc_pid_flush(proc_dentry);
170 release_thread(p); 175 release_thread(p);
171 put_task_struct(p); 176 call_rcu(&p->rcu, delayed_put_task_struct);
172 177
173 p = leader; 178 p = leader;
174 if (unlikely(zap_leader)) 179 if (unlikely(zap_leader))
diff --git a/kernel/fork.c b/kernel/fork.c
index b3f7a1bb5e55..3384eb89cb1c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,10 +108,8 @@ void free_task(struct task_struct *tsk)
108} 108}
109EXPORT_SYMBOL(free_task); 109EXPORT_SYMBOL(free_task);
110 110
111void __put_task_struct_cb(struct rcu_head *rhp) 111void __put_task_struct(struct task_struct *tsk)
112{ 112{
113 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
114
115 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 113 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
116 WARN_ON(atomic_read(&tsk->usage)); 114 WARN_ON(atomic_read(&tsk->usage));
117 WARN_ON(tsk == current); 115 WARN_ON(tsk == current);
@@ -126,6 +124,12 @@ void __put_task_struct_cb(struct rcu_head *rhp)
126 free_task(tsk); 124 free_task(tsk);
127} 125}
128 126
127void __put_task_struct_cb(struct rcu_head *rhp)
128{
129 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
130 __put_task_struct(tsk);
131}
132
129void __init fork_init(unsigned long mempages) 133void __init fork_init(unsigned long mempages)
130{ 134{
131#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 135#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -721,7 +725,7 @@ out_release:
721 free_fdset (new_fdt->open_fds, new_fdt->max_fdset); 725 free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
722 free_fd_array(new_fdt->fd, new_fdt->max_fds); 726 free_fd_array(new_fdt->fd, new_fdt->max_fds);
723 kmem_cache_free(files_cachep, newf); 727 kmem_cache_free(files_cachep, newf);
724 goto out; 728 return NULL;
725} 729}
726 730
727static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 731static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
@@ -1311,17 +1315,19 @@ long do_fork(unsigned long clone_flags,
1311{ 1315{
1312 struct task_struct *p; 1316 struct task_struct *p;
1313 int trace = 0; 1317 int trace = 0;
1314 long pid = alloc_pidmap(); 1318 struct pid *pid = alloc_pid();
1319 long nr;
1315 1320
1316 if (pid < 0) 1321 if (!pid)
1317 return -EAGAIN; 1322 return -EAGAIN;
1323 nr = pid->nr;
1318 if (unlikely(current->ptrace)) { 1324 if (unlikely(current->ptrace)) {
1319 trace = fork_traceflag (clone_flags); 1325 trace = fork_traceflag (clone_flags);
1320 if (trace) 1326 if (trace)
1321 clone_flags |= CLONE_PTRACE; 1327 clone_flags |= CLONE_PTRACE;
1322 } 1328 }
1323 1329
1324 p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 1330 p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr);
1325 /* 1331 /*
1326 * Do this prior waking up the new thread - the thread pointer 1332 * Do this prior waking up the new thread - the thread pointer
1327 * might get invalid after that point, if the thread exits quickly. 1333 * might get invalid after that point, if the thread exits quickly.
@@ -1348,7 +1354,7 @@ long do_fork(unsigned long clone_flags,
1348 p->state = TASK_STOPPED; 1354 p->state = TASK_STOPPED;
1349 1355
1350 if (unlikely (trace)) { 1356 if (unlikely (trace)) {
1351 current->ptrace_message = pid; 1357 current->ptrace_message = nr;
1352 ptrace_notify ((trace << 8) | SIGTRAP); 1358 ptrace_notify ((trace << 8) | SIGTRAP);
1353 } 1359 }
1354 1360
@@ -1358,10 +1364,10 @@ long do_fork(unsigned long clone_flags,
1358 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 1364 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1359 } 1365 }
1360 } else { 1366 } else {
1361 free_pidmap(pid); 1367 free_pid(pid);
1362 pid = PTR_ERR(p); 1368 nr = PTR_ERR(p);
1363 } 1369 }
1364 return pid; 1370 return nr;
1365} 1371}
1366 1372
1367#ifndef ARCH_MIN_MMSTRUCT_ALIGN 1373#ifndef ARCH_MIN_MMSTRUCT_ALIGN
diff --git a/kernel/futex.c b/kernel/futex.c
index 9c9b2b6b22dd..5699c512057b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1039,9 +1039,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
1039 unsigned long timeout = MAX_SCHEDULE_TIMEOUT; 1039 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
1040 int val2 = 0; 1040 int val2 = 0;
1041 1041
1042 if ((op == FUTEX_WAIT) && utime) { 1042 if (utime && (op == FUTEX_WAIT)) {
1043 if (copy_from_user(&t, utime, sizeof(t)) != 0) 1043 if (copy_from_user(&t, utime, sizeof(t)) != 0)
1044 return -EFAULT; 1044 return -EFAULT;
1045 if (!timespec_valid(&t))
1046 return -EINVAL;
1045 timeout = timespec_to_jiffies(&t) + 1; 1047 timeout = timespec_to_jiffies(&t) + 1;
1046 } 1048 }
1047 /* 1049 /*
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 54274fc85321..1ab6a0ea3d14 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -129,9 +129,11 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
129 unsigned long timeout = MAX_SCHEDULE_TIMEOUT; 129 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
130 int val2 = 0; 130 int val2 = 0;
131 131
132 if ((op == FUTEX_WAIT) && utime) { 132 if (utime && (op == FUTEX_WAIT)) {
133 if (get_compat_timespec(&t, utime)) 133 if (get_compat_timespec(&t, utime))
134 return -EFAULT; 134 return -EFAULT;
135 if (!timespec_valid(&t))
136 return -EINVAL;
135 timeout = timespec_to_jiffies(&t) + 1; 137 timeout = timespec_to_jiffies(&t) + 1;
136 } 138 }
137 if (op >= FUTEX_REQUEUE) 139 if (op >= FUTEX_REQUEUE)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0237a556eb1f..f181ff4dd32e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -606,6 +606,9 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
606{ 606{
607 struct rb_node *node; 607 struct rb_node *node;
608 608
609 if (!base->first)
610 return;
611
609 if (base->get_softirq_time) 612 if (base->get_softirq_time)
610 base->softirq_time = base->get_softirq_time(); 613 base->softirq_time = base->get_softirq_time();
611 614
@@ -655,29 +658,28 @@ void hrtimer_run_queues(void)
655/* 658/*
656 * Sleep related functions: 659 * Sleep related functions:
657 */ 660 */
658 661static int hrtimer_wakeup(struct hrtimer *timer)
659struct sleep_hrtimer {
660 struct hrtimer timer;
661 struct task_struct *task;
662 int expired;
663};
664
665static int nanosleep_wakeup(struct hrtimer *timer)
666{ 662{
667 struct sleep_hrtimer *t = 663 struct hrtimer_sleeper *t =
668 container_of(timer, struct sleep_hrtimer, timer); 664 container_of(timer, struct hrtimer_sleeper, timer);
665 struct task_struct *task = t->task;
669 666
670 t->expired = 1; 667 t->task = NULL;
671 wake_up_process(t->task); 668 if (task)
669 wake_up_process(task);
672 670
673 return HRTIMER_NORESTART; 671 return HRTIMER_NORESTART;
674} 672}
675 673
676static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode) 674void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task)
677{ 675{
678 t->timer.function = nanosleep_wakeup; 676 sl->timer.function = hrtimer_wakeup;
679 t->task = current; 677 sl->task = task;
680 t->expired = 0; 678}
679
680static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
681{
682 hrtimer_init_sleeper(t, current);
681 683
682 do { 684 do {
683 set_current_state(TASK_INTERRUPTIBLE); 685 set_current_state(TASK_INTERRUPTIBLE);
@@ -685,18 +687,17 @@ static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode)
685 687
686 schedule(); 688 schedule();
687 689
688 if (unlikely(!t->expired)) { 690 hrtimer_cancel(&t->timer);
689 hrtimer_cancel(&t->timer); 691 mode = HRTIMER_ABS;
690 mode = HRTIMER_ABS; 692
691 } 693 } while (t->task && !signal_pending(current));
692 } while (!t->expired && !signal_pending(current));
693 694
694 return t->expired; 695 return t->task == NULL;
695} 696}
696 697
697static long __sched nanosleep_restart(struct restart_block *restart) 698static long __sched nanosleep_restart(struct restart_block *restart)
698{ 699{
699 struct sleep_hrtimer t; 700 struct hrtimer_sleeper t;
700 struct timespec __user *rmtp; 701 struct timespec __user *rmtp;
701 struct timespec tu; 702 struct timespec tu;
702 ktime_t time; 703 ktime_t time;
@@ -729,7 +730,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
729 const enum hrtimer_mode mode, const clockid_t clockid) 730 const enum hrtimer_mode mode, const clockid_t clockid)
730{ 731{
731 struct restart_block *restart; 732 struct restart_block *restart;
732 struct sleep_hrtimer t; 733 struct hrtimer_sleeper t;
733 struct timespec tu; 734 struct timespec tu;
734 ktime_t rem; 735 ktime_t rem;
735 736
diff --git a/kernel/module.c b/kernel/module.c
index bd088a7c1499..d24deb0dbbc9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1254,6 +1254,7 @@ static inline int license_is_gpl_compatible(const char *license)
1254 || strcmp(license, "GPL v2") == 0 1254 || strcmp(license, "GPL v2") == 0
1255 || strcmp(license, "GPL and additional rights") == 0 1255 || strcmp(license, "GPL and additional rights") == 0
1256 || strcmp(license, "Dual BSD/GPL") == 0 1256 || strcmp(license, "Dual BSD/GPL") == 0
1257 || strcmp(license, "Dual MIT/GPL") == 0
1257 || strcmp(license, "Dual MPL/GPL") == 0); 1258 || strcmp(license, "Dual MPL/GPL") == 0);
1258} 1259}
1259 1260
diff --git a/kernel/pid.c b/kernel/pid.c
index a9f2dfd006d2..eeb836b65ca4 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -28,8 +28,9 @@
28#include <linux/hash.h> 28#include <linux/hash.h>
29 29
30#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) 30#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
31static struct hlist_head *pid_hash[PIDTYPE_MAX]; 31static struct hlist_head *pid_hash;
32static int pidhash_shift; 32static int pidhash_shift;
33static kmem_cache_t *pid_cachep;
33 34
34int pid_max = PID_MAX_DEFAULT; 35int pid_max = PID_MAX_DEFAULT;
35int last_pid; 36int last_pid;
@@ -60,9 +61,22 @@ typedef struct pidmap {
60static pidmap_t pidmap_array[PIDMAP_ENTRIES] = 61static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
61 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; 62 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
62 63
64/*
65 * Note: disable interrupts while the pidmap_lock is held as an
66 * interrupt might come in and do read_lock(&tasklist_lock).
67 *
68 * If we don't disable interrupts there is a nasty deadlock between
69 * detach_pid()->free_pid() and another cpu that does
70 * spin_lock(&pidmap_lock) followed by an interrupt routine that does
71 * read_lock(&tasklist_lock);
72 *
73 * After we clean up the tasklist_lock and know there are no
74 * irq handlers that take it we can leave the interrupts enabled.
75 * For now it is easier to be safe than to prove it can't happen.
76 */
63static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); 77static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
64 78
65fastcall void free_pidmap(int pid) 79static fastcall void free_pidmap(int pid)
66{ 80{
67 pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; 81 pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
68 int offset = pid & BITS_PER_PAGE_MASK; 82 int offset = pid & BITS_PER_PAGE_MASK;
@@ -71,7 +85,7 @@ fastcall void free_pidmap(int pid)
71 atomic_inc(&map->nr_free); 85 atomic_inc(&map->nr_free);
72} 86}
73 87
74int alloc_pidmap(void) 88static int alloc_pidmap(void)
75{ 89{
76 int i, offset, max_scan, pid, last = last_pid; 90 int i, offset, max_scan, pid, last = last_pid;
77 pidmap_t *map; 91 pidmap_t *map;
@@ -89,12 +103,12 @@ int alloc_pidmap(void)
89 * Free the page if someone raced with us 103 * Free the page if someone raced with us
90 * installing it: 104 * installing it:
91 */ 105 */
92 spin_lock(&pidmap_lock); 106 spin_lock_irq(&pidmap_lock);
93 if (map->page) 107 if (map->page)
94 free_page(page); 108 free_page(page);
95 else 109 else
96 map->page = (void *)page; 110 map->page = (void *)page;
97 spin_unlock(&pidmap_lock); 111 spin_unlock_irq(&pidmap_lock);
98 if (unlikely(!map->page)) 112 if (unlikely(!map->page))
99 break; 113 break;
100 } 114 }
@@ -131,13 +145,73 @@ int alloc_pidmap(void)
131 return -1; 145 return -1;
132} 146}
133 147
134struct pid * fastcall find_pid(enum pid_type type, int nr) 148fastcall void put_pid(struct pid *pid)
149{
150 if (!pid)
151 return;
152 if ((atomic_read(&pid->count) == 1) ||
153 atomic_dec_and_test(&pid->count))
154 kmem_cache_free(pid_cachep, pid);
155}
156
157static void delayed_put_pid(struct rcu_head *rhp)
158{
159 struct pid *pid = container_of(rhp, struct pid, rcu);
160 put_pid(pid);
161}
162
163fastcall void free_pid(struct pid *pid)
164{
165 /* We can be called with write_lock_irq(&tasklist_lock) held */
166 unsigned long flags;
167
168 spin_lock_irqsave(&pidmap_lock, flags);
169 hlist_del_rcu(&pid->pid_chain);
170 spin_unlock_irqrestore(&pidmap_lock, flags);
171
172 free_pidmap(pid->nr);
173 call_rcu(&pid->rcu, delayed_put_pid);
174}
175
176struct pid *alloc_pid(void)
177{
178 struct pid *pid;
179 enum pid_type type;
180 int nr = -1;
181
182 pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
183 if (!pid)
184 goto out;
185
186 nr = alloc_pidmap();
187 if (nr < 0)
188 goto out_free;
189
190 atomic_set(&pid->count, 1);
191 pid->nr = nr;
192 for (type = 0; type < PIDTYPE_MAX; ++type)
193 INIT_HLIST_HEAD(&pid->tasks[type]);
194
195 spin_lock_irq(&pidmap_lock);
196 hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
197 spin_unlock_irq(&pidmap_lock);
198
199out:
200 return pid;
201
202out_free:
203 kmem_cache_free(pid_cachep, pid);
204 pid = NULL;
205 goto out;
206}
207
208struct pid * fastcall find_pid(int nr)
135{ 209{
136 struct hlist_node *elem; 210 struct hlist_node *elem;
137 struct pid *pid; 211 struct pid *pid;
138 212
139 hlist_for_each_entry_rcu(pid, elem, 213 hlist_for_each_entry_rcu(pid, elem,
140 &pid_hash[type][pid_hashfn(nr)], pid_chain) { 214 &pid_hash[pid_hashfn(nr)], pid_chain) {
141 if (pid->nr == nr) 215 if (pid->nr == nr)
142 return pid; 216 return pid;
143 } 217 }
@@ -146,77 +220,82 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
146 220
147int fastcall attach_pid(task_t *task, enum pid_type type, int nr) 221int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
148{ 222{
149 struct pid *pid, *task_pid; 223 struct pid_link *link;
150 224 struct pid *pid;
151 task_pid = &task->pids[type]; 225
152 pid = find_pid(type, nr); 226 WARN_ON(!task->pid); /* to be removed soon */
153 task_pid->nr = nr; 227 WARN_ON(!nr); /* to be removed soon */
154 if (pid == NULL) { 228
155 INIT_LIST_HEAD(&task_pid->pid_list); 229 link = &task->pids[type];
156 hlist_add_head_rcu(&task_pid->pid_chain, 230 link->pid = pid = find_pid(nr);
157 &pid_hash[type][pid_hashfn(nr)]); 231 hlist_add_head_rcu(&link->node, &pid->tasks[type]);
158 } else {
159 INIT_HLIST_NODE(&task_pid->pid_chain);
160 list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
161 }
162 232
163 return 0; 233 return 0;
164} 234}
165 235
166static fastcall int __detach_pid(task_t *task, enum pid_type type) 236void fastcall detach_pid(task_t *task, enum pid_type type)
167{ 237{
168 struct pid *pid, *pid_next; 238 struct pid_link *link;
169 int nr = 0; 239 struct pid *pid;
240 int tmp;
170 241
171 pid = &task->pids[type]; 242 link = &task->pids[type];
172 if (!hlist_unhashed(&pid->pid_chain)) { 243 pid = link->pid;
173 244
174 if (list_empty(&pid->pid_list)) { 245 hlist_del_rcu(&link->node);
175 nr = pid->nr; 246 link->pid = NULL;
176 hlist_del_rcu(&pid->pid_chain);
177 } else {
178 pid_next = list_entry(pid->pid_list.next,
179 struct pid, pid_list);
180 /* insert next pid from pid_list to hash */
181 hlist_replace_rcu(&pid->pid_chain,
182 &pid_next->pid_chain);
183 }
184 }
185 247
186 list_del_rcu(&pid->pid_list); 248 for (tmp = PIDTYPE_MAX; --tmp >= 0; )
187 pid->nr = 0; 249 if (!hlist_empty(&pid->tasks[tmp]))
250 return;
188 251
189 return nr; 252 free_pid(pid);
190} 253}
191 254
192void fastcall detach_pid(task_t *task, enum pid_type type) 255struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
193{ 256{
194 int tmp, nr; 257 struct task_struct *result = NULL;
258 if (pid) {
259 struct hlist_node *first;
260 first = rcu_dereference(pid->tasks[type].first);
261 if (first)
262 result = hlist_entry(first, struct task_struct, pids[(type)].node);
263 }
264 return result;
265}
195 266
196 nr = __detach_pid(task, type); 267/*
197 if (!nr) 268 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
198 return; 269 */
270task_t *find_task_by_pid_type(int type, int nr)
271{
272 return pid_task(find_pid(nr), type);
273}
199 274
200 for (tmp = PIDTYPE_MAX; --tmp >= 0; ) 275EXPORT_SYMBOL(find_task_by_pid_type);
201 if (tmp != type && find_pid(tmp, nr))
202 return;
203 276
204 free_pidmap(nr); 277struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
278{
279 struct task_struct *result;
280 rcu_read_lock();
281 result = pid_task(pid, type);
282 if (result)
283 get_task_struct(result);
284 rcu_read_unlock();
285 return result;
205} 286}
206 287
207task_t *find_task_by_pid_type(int type, int nr) 288struct pid *find_get_pid(pid_t nr)
208{ 289{
209 struct pid *pid; 290 struct pid *pid;
210 291
211 pid = find_pid(type, nr); 292 rcu_read_lock();
212 if (!pid) 293 pid = get_pid(find_pid(nr));
213 return NULL; 294 rcu_read_unlock();
214 295
215 return pid_task(&pid->pid_list, type); 296 return pid;
216} 297}
217 298
218EXPORT_SYMBOL(find_task_by_pid_type);
219
220/* 299/*
221 * The pid hash table is scaled according to the amount of memory in the 300 * The pid hash table is scaled according to the amount of memory in the
222 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or 301 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -224,7 +303,7 @@ EXPORT_SYMBOL(find_task_by_pid_type);
224 */ 303 */
225void __init pidhash_init(void) 304void __init pidhash_init(void)
226{ 305{
227 int i, j, pidhash_size; 306 int i, pidhash_size;
228 unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); 307 unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
229 308
230 pidhash_shift = max(4, fls(megabytes * 4)); 309 pidhash_shift = max(4, fls(megabytes * 4));
@@ -233,16 +312,13 @@ void __init pidhash_init(void)
233 312
234 printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", 313 printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
235 pidhash_size, pidhash_shift, 314 pidhash_size, pidhash_shift,
236 PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head)); 315 pidhash_size * sizeof(struct hlist_head));
237 316
238 for (i = 0; i < PIDTYPE_MAX; i++) { 317 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
239 pid_hash[i] = alloc_bootmem(pidhash_size * 318 if (!pid_hash)
240 sizeof(*(pid_hash[i]))); 319 panic("Could not alloc pidhash!\n");
241 if (!pid_hash[i]) 320 for (i = 0; i < pidhash_size; i++)
242 panic("Could not alloc pidhash!\n"); 321 INIT_HLIST_HEAD(&pid_hash[i]);
243 for (j = 0; j < pidhash_size; j++)
244 INIT_HLIST_HEAD(&pid_hash[i][j]);
245 }
246} 322}
247 323
248void __init pidmap_init(void) 324void __init pidmap_init(void)
@@ -251,4 +327,8 @@ void __init pidmap_init(void)
251 /* Reserve PID 0. We never call free_pidmap(0) */ 327 /* Reserve PID 0. We never call free_pidmap(0) */
252 set_bit(0, pidmap_array->page); 328 set_bit(0, pidmap_array->page);
253 atomic_dec(&pidmap_array->nr_free); 329 atomic_dec(&pidmap_array->nr_free);
330
331 pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
332 __alignof__(struct pid),
333 SLAB_PANIC, NULL, NULL);
254} 334}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 9fd8d4f03595..ce0dfb8f4a4e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -41,7 +41,7 @@ config SOFTWARE_SUSPEND
41 depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP) 41 depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
42 ---help--- 42 ---help---
43 Enable the possibility of suspending the machine. 43 Enable the possibility of suspending the machine.
44 It doesn't need APM. 44 It doesn't need ACPI or APM.
45 You may suspend your machine by 'swsusp' or 'shutdown -z <time>' 45 You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
46 (patch for sysvinit needed). 46 (patch for sysvinit needed).
47 47
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 8ac7c35fad77..b2a5f671d6cd 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -26,8 +26,7 @@ static inline int freezeable(struct task_struct * p)
26 (p->flags & PF_NOFREEZE) || 26 (p->flags & PF_NOFREEZE) ||
27 (p->exit_state == EXIT_ZOMBIE) || 27 (p->exit_state == EXIT_ZOMBIE) ||
28 (p->exit_state == EXIT_DEAD) || 28 (p->exit_state == EXIT_DEAD) ||
29 (p->state == TASK_STOPPED) || 29 (p->state == TASK_STOPPED))
30 (p->state == TASK_TRACED))
31 return 0; 30 return 0;
32 return 1; 31 return 1;
33} 32}
diff --git a/kernel/printk.c b/kernel/printk.c
index 891f7a714037..a33f342b31b7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -360,8 +360,7 @@ static void call_console_drivers(unsigned long start, unsigned long end)
360 unsigned long cur_index, start_print; 360 unsigned long cur_index, start_print;
361 static int msg_level = -1; 361 static int msg_level = -1;
362 362
363 if (((long)(start - end)) > 0) 363 BUG_ON(((long)(start - end)) > 0);
364 BUG();
365 364
366 cur_index = start; 365 cur_index = start;
367 start_print = start; 366 start_print = start;
@@ -708,8 +707,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
708 */ 707 */
709void acquire_console_sem(void) 708void acquire_console_sem(void)
710{ 709{
711 if (in_interrupt()) 710 BUG_ON(in_interrupt());
712 BUG();
713 down(&console_sem); 711 down(&console_sem);
714 console_locked = 1; 712 console_locked = 1;
715 console_may_schedule = 1; 713 console_may_schedule = 1;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 86a7f6c60cb2..0eeb7e66722c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -30,8 +30,7 @@
30 */ 30 */
31void __ptrace_link(task_t *child, task_t *new_parent) 31void __ptrace_link(task_t *child, task_t *new_parent)
32{ 32{
33 if (!list_empty(&child->ptrace_list)) 33 BUG_ON(!list_empty(&child->ptrace_list));
34 BUG();
35 if (child->parent == new_parent) 34 if (child->parent == new_parent)
36 return; 35 return;
37 list_add(&child->ptrace_list, &child->parent->ptrace_children); 36 list_add(&child->ptrace_list, &child->parent->ptrace_children);
diff --git a/kernel/sched.c b/kernel/sched.c
index a9ecac398bb9..dd153d6f8a04 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -667,9 +667,13 @@ static int effective_prio(task_t *p)
667/* 667/*
668 * __activate_task - move a task to the runqueue. 668 * __activate_task - move a task to the runqueue.
669 */ 669 */
670static inline void __activate_task(task_t *p, runqueue_t *rq) 670static void __activate_task(task_t *p, runqueue_t *rq)
671{ 671{
672 enqueue_task(p, rq->active); 672 prio_array_t *target = rq->active;
673
674 if (batch_task(p))
675 target = rq->expired;
676 enqueue_task(p, target);
673 rq->nr_running++; 677 rq->nr_running++;
674} 678}
675 679
@@ -688,7 +692,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
688 unsigned long long __sleep_time = now - p->timestamp; 692 unsigned long long __sleep_time = now - p->timestamp;
689 unsigned long sleep_time; 693 unsigned long sleep_time;
690 694
691 if (unlikely(p->policy == SCHED_BATCH)) 695 if (batch_task(p))
692 sleep_time = 0; 696 sleep_time = 0;
693 else { 697 else {
694 if (__sleep_time > NS_MAX_SLEEP_AVG) 698 if (__sleep_time > NS_MAX_SLEEP_AVG)
@@ -700,21 +704,25 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
700 if (likely(sleep_time > 0)) { 704 if (likely(sleep_time > 0)) {
701 /* 705 /*
702 * User tasks that sleep a long time are categorised as 706 * User tasks that sleep a long time are categorised as
703 * idle and will get just interactive status to stay active & 707 * idle. They will only have their sleep_avg increased to a
704 * prevent them suddenly becoming cpu hogs and starving 708 * level that makes them just interactive priority to stay
705 * other processes. 709 * active yet prevent them suddenly becoming cpu hogs and
710 * starving other processes.
706 */ 711 */
707 if (p->mm && p->activated != -1 && 712 if (p->mm && sleep_time > INTERACTIVE_SLEEP(p)) {
708 sleep_time > INTERACTIVE_SLEEP(p)) { 713 unsigned long ceiling;
709 p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - 714
710 DEF_TIMESLICE); 715 ceiling = JIFFIES_TO_NS(MAX_SLEEP_AVG -
716 DEF_TIMESLICE);
717 if (p->sleep_avg < ceiling)
718 p->sleep_avg = ceiling;
711 } else { 719 } else {
712 /* 720 /*
713 * Tasks waking from uninterruptible sleep are 721 * Tasks waking from uninterruptible sleep are
714 * limited in their sleep_avg rise as they 722 * limited in their sleep_avg rise as they
715 * are likely to be waiting on I/O 723 * are likely to be waiting on I/O
716 */ 724 */
717 if (p->activated == -1 && p->mm) { 725 if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
718 if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) 726 if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
719 sleep_time = 0; 727 sleep_time = 0;
720 else if (p->sleep_avg + sleep_time >= 728 else if (p->sleep_avg + sleep_time >=
@@ -769,7 +777,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
769 * This checks to make sure it's not an uninterruptible task 777 * This checks to make sure it's not an uninterruptible task
770 * that is now waking up. 778 * that is now waking up.
771 */ 779 */
772 if (!p->activated) { 780 if (p->sleep_type == SLEEP_NORMAL) {
773 /* 781 /*
774 * Tasks which were woken up by interrupts (ie. hw events) 782 * Tasks which were woken up by interrupts (ie. hw events)
775 * are most likely of interactive nature. So we give them 783 * are most likely of interactive nature. So we give them
@@ -778,13 +786,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
778 * on a CPU, first time around: 786 * on a CPU, first time around:
779 */ 787 */
780 if (in_interrupt()) 788 if (in_interrupt())
781 p->activated = 2; 789 p->sleep_type = SLEEP_INTERRUPTED;
782 else { 790 else {
783 /* 791 /*
784 * Normal first-time wakeups get a credit too for 792 * Normal first-time wakeups get a credit too for
785 * on-runqueue time, but it will be weighted down: 793 * on-runqueue time, but it will be weighted down:
786 */ 794 */
787 p->activated = 1; 795 p->sleep_type = SLEEP_INTERACTIVE;
788 } 796 }
789 } 797 }
790 p->timestamp = now; 798 p->timestamp = now;
@@ -1272,19 +1280,19 @@ out_activate:
1272 * Tasks on involuntary sleep don't earn 1280 * Tasks on involuntary sleep don't earn
1273 * sleep_avg beyond just interactive state. 1281 * sleep_avg beyond just interactive state.
1274 */ 1282 */
1275 p->activated = -1; 1283 p->sleep_type = SLEEP_NONINTERACTIVE;
1276 } 1284 } else
1277 1285
1278 /* 1286 /*
1279 * Tasks that have marked their sleep as noninteractive get 1287 * Tasks that have marked their sleep as noninteractive get
1280 * woken up without updating their sleep average. (i.e. their 1288 * woken up with their sleep average not weighted in an
1281 * sleep is handled in a priority-neutral manner, no priority 1289 * interactive way.
1282 * boost and no penalty.)
1283 */ 1290 */
1284 if (old_state & TASK_NONINTERACTIVE) 1291 if (old_state & TASK_NONINTERACTIVE)
1285 __activate_task(p, rq); 1292 p->sleep_type = SLEEP_NONINTERACTIVE;
1286 else 1293
1287 activate_task(p, rq, cpu == this_cpu); 1294
1295 activate_task(p, rq, cpu == this_cpu);
1288 /* 1296 /*
1289 * Sync wakeups (i.e. those types of wakeups where the waker 1297 * Sync wakeups (i.e. those types of wakeups where the waker
1290 * has indicated that it will leave the CPU in short order) 1298 * has indicated that it will leave the CPU in short order)
@@ -1658,6 +1666,21 @@ unsigned long nr_iowait(void)
1658 return sum; 1666 return sum;
1659} 1667}
1660 1668
1669unsigned long nr_active(void)
1670{
1671 unsigned long i, running = 0, uninterruptible = 0;
1672
1673 for_each_online_cpu(i) {
1674 running += cpu_rq(i)->nr_running;
1675 uninterruptible += cpu_rq(i)->nr_uninterruptible;
1676 }
1677
1678 if (unlikely((long)uninterruptible < 0))
1679 uninterruptible = 0;
1680
1681 return running + uninterruptible;
1682}
1683
1661#ifdef CONFIG_SMP 1684#ifdef CONFIG_SMP
1662 1685
1663/* 1686/*
@@ -2860,6 +2883,12 @@ EXPORT_SYMBOL(sub_preempt_count);
2860 2883
2861#endif 2884#endif
2862 2885
2886static inline int interactive_sleep(enum sleep_type sleep_type)
2887{
2888 return (sleep_type == SLEEP_INTERACTIVE ||
2889 sleep_type == SLEEP_INTERRUPTED);
2890}
2891
2863/* 2892/*
2864 * schedule() is the main scheduler function. 2893 * schedule() is the main scheduler function.
2865 */ 2894 */
@@ -2983,12 +3012,12 @@ go_idle:
2983 queue = array->queue + idx; 3012 queue = array->queue + idx;
2984 next = list_entry(queue->next, task_t, run_list); 3013 next = list_entry(queue->next, task_t, run_list);
2985 3014
2986 if (!rt_task(next) && next->activated > 0) { 3015 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
2987 unsigned long long delta = now - next->timestamp; 3016 unsigned long long delta = now - next->timestamp;
2988 if (unlikely((long long)(now - next->timestamp) < 0)) 3017 if (unlikely((long long)(now - next->timestamp) < 0))
2989 delta = 0; 3018 delta = 0;
2990 3019
2991 if (next->activated == 1) 3020 if (next->sleep_type == SLEEP_INTERACTIVE)
2992 delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; 3021 delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
2993 3022
2994 array = next->array; 3023 array = next->array;
@@ -2998,10 +3027,9 @@ go_idle:
2998 dequeue_task(next, array); 3027 dequeue_task(next, array);
2999 next->prio = new_prio; 3028 next->prio = new_prio;
3000 enqueue_task(next, array); 3029 enqueue_task(next, array);
3001 } else 3030 }
3002 requeue_task(next, array);
3003 } 3031 }
3004 next->activated = 0; 3032 next->sleep_type = SLEEP_NORMAL;
3005switch_tasks: 3033switch_tasks:
3006 if (next == rq->idle) 3034 if (next == rq->idle)
3007 schedstat_inc(rq, sched_goidle); 3035 schedstat_inc(rq, sched_goidle);
diff --git a/kernel/signal.c b/kernel/signal.c
index 4922928d91f6..5ccaac505e8d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -769,8 +769,7 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
769{ 769{
770 int ret = 0; 770 int ret = 0;
771 771
772 if (!irqs_disabled()) 772 BUG_ON(!irqs_disabled());
773 BUG();
774 assert_spin_locked(&t->sighand->siglock); 773 assert_spin_locked(&t->sighand->siglock);
775 774
776 /* Short-circuit ignored signals. */ 775 /* Short-circuit ignored signals. */
@@ -1384,8 +1383,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1384 * the overrun count. Other uses should not try to 1383 * the overrun count. Other uses should not try to
1385 * send the signal multiple times. 1384 * send the signal multiple times.
1386 */ 1385 */
1387 if (q->info.si_code != SI_TIMER) 1386 BUG_ON(q->info.si_code != SI_TIMER);
1388 BUG();
1389 q->info.si_overrun++; 1387 q->info.si_overrun++;
1390 goto out; 1388 goto out;
1391 } 1389 }
@@ -1560,6 +1558,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
1560 /* Let the debugger run. */ 1558 /* Let the debugger run. */
1561 set_current_state(TASK_TRACED); 1559 set_current_state(TASK_TRACED);
1562 spin_unlock_irq(&current->sighand->siglock); 1560 spin_unlock_irq(&current->sighand->siglock);
1561 try_to_freeze();
1563 read_lock(&tasklist_lock); 1562 read_lock(&tasklist_lock);
1564 if (likely(current->ptrace & PT_PTRACED) && 1563 if (likely(current->ptrace & PT_PTRACED) &&
1565 likely(current->parent != current->real_parent || 1564 likely(current->parent != current->real_parent ||
diff --git a/kernel/sys.c b/kernel/sys.c
index 7ef7f6054c28..0b6ec0e7936f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1372,18 +1372,29 @@ asmlinkage long sys_getsid(pid_t pid)
1372asmlinkage long sys_setsid(void) 1372asmlinkage long sys_setsid(void)
1373{ 1373{
1374 struct task_struct *group_leader = current->group_leader; 1374 struct task_struct *group_leader = current->group_leader;
1375 struct pid *pid; 1375 pid_t session;
1376 int err = -EPERM; 1376 int err = -EPERM;
1377 1377
1378 mutex_lock(&tty_mutex); 1378 mutex_lock(&tty_mutex);
1379 write_lock_irq(&tasklist_lock); 1379 write_lock_irq(&tasklist_lock);
1380 1380
1381 pid = find_pid(PIDTYPE_PGID, group_leader->pid); 1381 /* Fail if I am already a session leader */
1382 if (pid) 1382 if (group_leader->signal->leader)
1383 goto out;
1384
1385 session = group_leader->pid;
1386 /* Fail if a process group id already exists that equals the
1387 * proposed session id.
1388 *
1389 * Don't check if session id == 1 because kernel threads use this
1390 * session id and so the check will always fail and make it so
1391 * init cannot successfully call setsid.
1392 */
1393 if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
1383 goto out; 1394 goto out;
1384 1395
1385 group_leader->signal->leader = 1; 1396 group_leader->signal->leader = 1;
1386 __set_special_pids(group_leader->pid, group_leader->pid); 1397 __set_special_pids(session, session);
1387 group_leader->signal->tty = NULL; 1398 group_leader->signal->tty = NULL;
1388 group_leader->signal->tty_old_pgrp = 0; 1399 group_leader->signal->tty_old_pgrp = 0;
1389 err = process_group(group_leader); 1400 err = process_group(group_leader);
diff --git a/kernel/time.c b/kernel/time.c
index ff8e7019c4c4..b00ddc71cedb 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -410,7 +410,7 @@ EXPORT_SYMBOL(current_kernel_time);
410 * current_fs_time - Return FS time 410 * current_fs_time - Return FS time
411 * @sb: Superblock. 411 * @sb: Superblock.
412 * 412 *
413 * Return the current time truncated to the time granuality supported by 413 * Return the current time truncated to the time granularity supported by
414 * the fs. 414 * the fs.
415 */ 415 */
416struct timespec current_fs_time(struct super_block *sb) 416struct timespec current_fs_time(struct super_block *sb)
@@ -421,11 +421,11 @@ struct timespec current_fs_time(struct super_block *sb)
421EXPORT_SYMBOL(current_fs_time); 421EXPORT_SYMBOL(current_fs_time);
422 422
423/** 423/**
424 * timespec_trunc - Truncate timespec to a granuality 424 * timespec_trunc - Truncate timespec to a granularity
425 * @t: Timespec 425 * @t: Timespec
426 * @gran: Granuality in ns. 426 * @gran: Granularity in ns.
427 * 427 *
428 * Truncate a timespec to a granuality. gran must be smaller than a second. 428 * Truncate a timespec to a granularity. gran must be smaller than a second.
429 * Always rounds down. 429 * Always rounds down.
430 * 430 *
431 * This function should be only used for timestamps returned by 431 * This function should be only used for timestamps returned by
diff --git a/kernel/timer.c b/kernel/timer.c
index ab189dd187cb..c3a874f1393c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -54,7 +54,6 @@ EXPORT_SYMBOL(jiffies_64);
54/* 54/*
55 * per-CPU timer vector definitions: 55 * per-CPU timer vector definitions:
56 */ 56 */
57
58#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 57#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
59#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 58#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
60#define TVN_SIZE (1 << TVN_BITS) 59#define TVN_SIZE (1 << TVN_BITS)
@@ -62,11 +61,6 @@ EXPORT_SYMBOL(jiffies_64);
62#define TVN_MASK (TVN_SIZE - 1) 61#define TVN_MASK (TVN_SIZE - 1)
63#define TVR_MASK (TVR_SIZE - 1) 62#define TVR_MASK (TVR_SIZE - 1)
64 63
65struct timer_base_s {
66 spinlock_t lock;
67 struct timer_list *running_timer;
68};
69
70typedef struct tvec_s { 64typedef struct tvec_s {
71 struct list_head vec[TVN_SIZE]; 65 struct list_head vec[TVN_SIZE];
72} tvec_t; 66} tvec_t;
@@ -76,7 +70,8 @@ typedef struct tvec_root_s {
76} tvec_root_t; 70} tvec_root_t;
77 71
78struct tvec_t_base_s { 72struct tvec_t_base_s {
79 struct timer_base_s t_base; 73 spinlock_t lock;
74 struct timer_list *running_timer;
80 unsigned long timer_jiffies; 75 unsigned long timer_jiffies;
81 tvec_root_t tv1; 76 tvec_root_t tv1;
82 tvec_t tv2; 77 tvec_t tv2;
@@ -87,13 +82,14 @@ struct tvec_t_base_s {
87 82
88typedef struct tvec_t_base_s tvec_base_t; 83typedef struct tvec_t_base_s tvec_base_t;
89static DEFINE_PER_CPU(tvec_base_t *, tvec_bases); 84static DEFINE_PER_CPU(tvec_base_t *, tvec_bases);
90static tvec_base_t boot_tvec_bases; 85tvec_base_t boot_tvec_bases;
86EXPORT_SYMBOL(boot_tvec_bases);
91 87
92static inline void set_running_timer(tvec_base_t *base, 88static inline void set_running_timer(tvec_base_t *base,
93 struct timer_list *timer) 89 struct timer_list *timer)
94{ 90{
95#ifdef CONFIG_SMP 91#ifdef CONFIG_SMP
96 base->t_base.running_timer = timer; 92 base->running_timer = timer;
97#endif 93#endif
98} 94}
99 95
@@ -139,15 +135,6 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
139 list_add_tail(&timer->entry, vec); 135 list_add_tail(&timer->entry, vec);
140} 136}
141 137
142typedef struct timer_base_s timer_base_t;
143/*
144 * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
145 * at compile time, and we need timer->base to lock the timer.
146 */
147timer_base_t __init_timer_base
148 ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
149EXPORT_SYMBOL(__init_timer_base);
150
151/*** 138/***
152 * init_timer - initialize a timer. 139 * init_timer - initialize a timer.
153 * @timer: the timer to be initialized 140 * @timer: the timer to be initialized
@@ -158,7 +145,7 @@ EXPORT_SYMBOL(__init_timer_base);
158void fastcall init_timer(struct timer_list *timer) 145void fastcall init_timer(struct timer_list *timer)
159{ 146{
160 timer->entry.next = NULL; 147 timer->entry.next = NULL;
161 timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base; 148 timer->base = per_cpu(tvec_bases, raw_smp_processor_id());
162} 149}
163EXPORT_SYMBOL(init_timer); 150EXPORT_SYMBOL(init_timer);
164 151
@@ -174,7 +161,7 @@ static inline void detach_timer(struct timer_list *timer,
174} 161}
175 162
176/* 163/*
177 * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock 164 * We are using hashed locking: holding per_cpu(tvec_bases).lock
178 * means that all timers which are tied to this base via timer->base are 165 * means that all timers which are tied to this base via timer->base are
179 * locked, and the base itself is locked too. 166 * locked, and the base itself is locked too.
180 * 167 *
@@ -185,10 +172,10 @@ static inline void detach_timer(struct timer_list *timer,
185 * possible to set timer->base = NULL and drop the lock: the timer remains 172 * possible to set timer->base = NULL and drop the lock: the timer remains
186 * locked. 173 * locked.
187 */ 174 */
188static timer_base_t *lock_timer_base(struct timer_list *timer, 175static tvec_base_t *lock_timer_base(struct timer_list *timer,
189 unsigned long *flags) 176 unsigned long *flags)
190{ 177{
191 timer_base_t *base; 178 tvec_base_t *base;
192 179
193 for (;;) { 180 for (;;) {
194 base = timer->base; 181 base = timer->base;
@@ -205,8 +192,7 @@ static timer_base_t *lock_timer_base(struct timer_list *timer,
205 192
206int __mod_timer(struct timer_list *timer, unsigned long expires) 193int __mod_timer(struct timer_list *timer, unsigned long expires)
207{ 194{
208 timer_base_t *base; 195 tvec_base_t *base, *new_base;
209 tvec_base_t *new_base;
210 unsigned long flags; 196 unsigned long flags;
211 int ret = 0; 197 int ret = 0;
212 198
@@ -221,7 +207,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
221 207
222 new_base = __get_cpu_var(tvec_bases); 208 new_base = __get_cpu_var(tvec_bases);
223 209
224 if (base != &new_base->t_base) { 210 if (base != new_base) {
225 /* 211 /*
226 * We are trying to schedule the timer on the local CPU. 212 * We are trying to schedule the timer on the local CPU.
227 * However we can't change timer's base while it is running, 213 * However we can't change timer's base while it is running,
@@ -229,21 +215,19 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
229 * handler yet has not finished. This also guarantees that 215 * handler yet has not finished. This also guarantees that
230 * the timer is serialized wrt itself. 216 * the timer is serialized wrt itself.
231 */ 217 */
232 if (unlikely(base->running_timer == timer)) { 218 if (likely(base->running_timer != timer)) {
233 /* The timer remains on a former base */
234 new_base = container_of(base, tvec_base_t, t_base);
235 } else {
236 /* See the comment in lock_timer_base() */ 219 /* See the comment in lock_timer_base() */
237 timer->base = NULL; 220 timer->base = NULL;
238 spin_unlock(&base->lock); 221 spin_unlock(&base->lock);
239 spin_lock(&new_base->t_base.lock); 222 base = new_base;
240 timer->base = &new_base->t_base; 223 spin_lock(&base->lock);
224 timer->base = base;
241 } 225 }
242 } 226 }
243 227
244 timer->expires = expires; 228 timer->expires = expires;
245 internal_add_timer(new_base, timer); 229 internal_add_timer(base, timer);
246 spin_unlock_irqrestore(&new_base->t_base.lock, flags); 230 spin_unlock_irqrestore(&base->lock, flags);
247 231
248 return ret; 232 return ret;
249} 233}
@@ -263,10 +247,10 @@ void add_timer_on(struct timer_list *timer, int cpu)
263 unsigned long flags; 247 unsigned long flags;
264 248
265 BUG_ON(timer_pending(timer) || !timer->function); 249 BUG_ON(timer_pending(timer) || !timer->function);
266 spin_lock_irqsave(&base->t_base.lock, flags); 250 spin_lock_irqsave(&base->lock, flags);
267 timer->base = &base->t_base; 251 timer->base = base;
268 internal_add_timer(base, timer); 252 internal_add_timer(base, timer);
269 spin_unlock_irqrestore(&base->t_base.lock, flags); 253 spin_unlock_irqrestore(&base->lock, flags);
270} 254}
271 255
272 256
@@ -319,7 +303,7 @@ EXPORT_SYMBOL(mod_timer);
319 */ 303 */
320int del_timer(struct timer_list *timer) 304int del_timer(struct timer_list *timer)
321{ 305{
322 timer_base_t *base; 306 tvec_base_t *base;
323 unsigned long flags; 307 unsigned long flags;
324 int ret = 0; 308 int ret = 0;
325 309
@@ -346,7 +330,7 @@ EXPORT_SYMBOL(del_timer);
346 */ 330 */
347int try_to_del_timer_sync(struct timer_list *timer) 331int try_to_del_timer_sync(struct timer_list *timer)
348{ 332{
349 timer_base_t *base; 333 tvec_base_t *base;
350 unsigned long flags; 334 unsigned long flags;
351 int ret = -1; 335 int ret = -1;
352 336
@@ -410,7 +394,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
410 struct timer_list *tmp; 394 struct timer_list *tmp;
411 395
412 tmp = list_entry(curr, struct timer_list, entry); 396 tmp = list_entry(curr, struct timer_list, entry);
413 BUG_ON(tmp->base != &base->t_base); 397 BUG_ON(tmp->base != base);
414 curr = curr->next; 398 curr = curr->next;
415 internal_add_timer(base, tmp); 399 internal_add_timer(base, tmp);
416 } 400 }
@@ -432,7 +416,7 @@ static inline void __run_timers(tvec_base_t *base)
432{ 416{
433 struct timer_list *timer; 417 struct timer_list *timer;
434 418
435 spin_lock_irq(&base->t_base.lock); 419 spin_lock_irq(&base->lock);
436 while (time_after_eq(jiffies, base->timer_jiffies)) { 420 while (time_after_eq(jiffies, base->timer_jiffies)) {
437 struct list_head work_list = LIST_HEAD_INIT(work_list); 421 struct list_head work_list = LIST_HEAD_INIT(work_list);
438 struct list_head *head = &work_list; 422 struct list_head *head = &work_list;
@@ -458,7 +442,7 @@ static inline void __run_timers(tvec_base_t *base)
458 442
459 set_running_timer(base, timer); 443 set_running_timer(base, timer);
460 detach_timer(timer, 1); 444 detach_timer(timer, 1);
461 spin_unlock_irq(&base->t_base.lock); 445 spin_unlock_irq(&base->lock);
462 { 446 {
463 int preempt_count = preempt_count(); 447 int preempt_count = preempt_count();
464 fn(data); 448 fn(data);
@@ -471,11 +455,11 @@ static inline void __run_timers(tvec_base_t *base)
471 BUG(); 455 BUG();
472 } 456 }
473 } 457 }
474 spin_lock_irq(&base->t_base.lock); 458 spin_lock_irq(&base->lock);
475 } 459 }
476 } 460 }
477 set_running_timer(base, NULL); 461 set_running_timer(base, NULL);
478 spin_unlock_irq(&base->t_base.lock); 462 spin_unlock_irq(&base->lock);
479} 463}
480 464
481#ifdef CONFIG_NO_IDLE_HZ 465#ifdef CONFIG_NO_IDLE_HZ
@@ -506,7 +490,7 @@ unsigned long next_timer_interrupt(void)
506 hr_expires += jiffies; 490 hr_expires += jiffies;
507 491
508 base = __get_cpu_var(tvec_bases); 492 base = __get_cpu_var(tvec_bases);
509 spin_lock(&base->t_base.lock); 493 spin_lock(&base->lock);
510 expires = base->timer_jiffies + (LONG_MAX >> 1); 494 expires = base->timer_jiffies + (LONG_MAX >> 1);
511 list = NULL; 495 list = NULL;
512 496
@@ -554,7 +538,7 @@ found:
554 expires = nte->expires; 538 expires = nte->expires;
555 } 539 }
556 } 540 }
557 spin_unlock(&base->t_base.lock); 541 spin_unlock(&base->lock);
558 542
559 if (time_before(hr_expires, expires)) 543 if (time_before(hr_expires, expires))
560 return hr_expires; 544 return hr_expires;
@@ -841,7 +825,7 @@ void update_process_times(int user_tick)
841 */ 825 */
842static unsigned long count_active_tasks(void) 826static unsigned long count_active_tasks(void)
843{ 827{
844 return (nr_running() + nr_uninterruptible()) * FIXED_1; 828 return nr_active() * FIXED_1;
845} 829}
846 830
847/* 831/*
@@ -1262,7 +1246,7 @@ static int __devinit init_timers_cpu(int cpu)
1262 } 1246 }
1263 per_cpu(tvec_bases, cpu) = base; 1247 per_cpu(tvec_bases, cpu) = base;
1264 } 1248 }
1265 spin_lock_init(&base->t_base.lock); 1249 spin_lock_init(&base->lock);
1266 for (j = 0; j < TVN_SIZE; j++) { 1250 for (j = 0; j < TVN_SIZE; j++) {
1267 INIT_LIST_HEAD(base->tv5.vec + j); 1251 INIT_LIST_HEAD(base->tv5.vec + j);
1268 INIT_LIST_HEAD(base->tv4.vec + j); 1252 INIT_LIST_HEAD(base->tv4.vec + j);
@@ -1284,7 +1268,7 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
1284 while (!list_empty(head)) { 1268 while (!list_empty(head)) {
1285 timer = list_entry(head->next, struct timer_list, entry); 1269 timer = list_entry(head->next, struct timer_list, entry);
1286 detach_timer(timer, 0); 1270 detach_timer(timer, 0);
1287 timer->base = &new_base->t_base; 1271 timer->base = new_base;
1288 internal_add_timer(new_base, timer); 1272 internal_add_timer(new_base, timer);
1289 } 1273 }
1290} 1274}
@@ -1300,11 +1284,11 @@ static void __devinit migrate_timers(int cpu)
1300 new_base = get_cpu_var(tvec_bases); 1284 new_base = get_cpu_var(tvec_bases);
1301 1285
1302 local_irq_disable(); 1286 local_irq_disable();
1303 spin_lock(&new_base->t_base.lock); 1287 spin_lock(&new_base->lock);
1304 spin_lock(&old_base->t_base.lock); 1288 spin_lock(&old_base->lock);
1289
1290 BUG_ON(old_base->running_timer);
1305 1291
1306 if (old_base->t_base.running_timer)
1307 BUG();
1308 for (i = 0; i < TVR_SIZE; i++) 1292 for (i = 0; i < TVR_SIZE; i++)
1309 migrate_timer_list(new_base, old_base->tv1.vec + i); 1293 migrate_timer_list(new_base, old_base->tv1.vec + i);
1310 for (i = 0; i < TVN_SIZE; i++) { 1294 for (i = 0; i < TVN_SIZE; i++) {
@@ -1314,8 +1298,8 @@ static void __devinit migrate_timers(int cpu)
1314 migrate_timer_list(new_base, old_base->tv5.vec + i); 1298 migrate_timer_list(new_base, old_base->tv5.vec + i);
1315 } 1299 }
1316 1300
1317 spin_unlock(&old_base->t_base.lock); 1301 spin_unlock(&old_base->lock);
1318 spin_unlock(&new_base->t_base.lock); 1302 spin_unlock(&new_base->lock);
1319 local_irq_enable(); 1303 local_irq_enable();
1320 put_cpu_var(tvec_bases); 1304 put_cpu_var(tvec_bases);
1321} 1305}
@@ -1495,8 +1479,7 @@ register_time_interpolator(struct time_interpolator *ti)
1495 unsigned long flags; 1479 unsigned long flags;
1496 1480
1497 /* Sanity check */ 1481 /* Sanity check */
1498 if (ti->frequency == 0 || ti->mask == 0) 1482 BUG_ON(ti->frequency == 0 || ti->mask == 0);
1499 BUG();
1500 1483
1501 ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; 1484 ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency;
1502 spin_lock(&time_interpolator_lock); 1485 spin_lock(&time_interpolator_lock);