aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c12
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cpuset.c69
-rw-r--r--kernel/exit.c11
-rw-r--r--kernel/fork.c28
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/futex_compat.c4
-rw-r--r--kernel/hrtimer.c50
-rw-r--r--kernel/irq/Makefile3
-rw-r--r--kernel/irq/migration.c5
-rw-r--r--kernel/module.c1
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/pid.c212
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/process.c3
-rw-r--r--kernel/printk.c6
-rw-r--r--kernel/ptrace.c3
-rw-r--r--kernel/sched.c144
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys.c19
-rw-r--r--kernel/sys_ni.c12
-rw-r--r--kernel/time.c8
-rw-r--r--kernel/timer.c126
23 files changed, 470 insertions, 263 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 065d8b4e51ef..b327f4d20104 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -449,8 +449,8 @@ static void do_acct_process(long exitcode, struct file *file)
449 /* calculate run_time in nsec*/ 449 /* calculate run_time in nsec*/
450 do_posix_clock_monotonic_gettime(&uptime); 450 do_posix_clock_monotonic_gettime(&uptime);
451 run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; 451 run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
452 run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC 452 run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC
453 + current->start_time.tv_nsec; 453 + current->group_leader->start_time.tv_nsec;
454 /* convert nsec -> AHZ */ 454 /* convert nsec -> AHZ */
455 elapsed = nsec_to_AHZ(run_time); 455 elapsed = nsec_to_AHZ(run_time);
456#if ACCT_VERSION==3 456#if ACCT_VERSION==3
@@ -469,10 +469,10 @@ static void do_acct_process(long exitcode, struct file *file)
469#endif 469#endif
470 do_div(elapsed, AHZ); 470 do_div(elapsed, AHZ);
471 ac.ac_btime = xtime.tv_sec - elapsed; 471 ac.ac_btime = xtime.tv_sec - elapsed;
472 jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime, 472 jiffies = cputime_to_jiffies(cputime_add(current->utime,
473 current->signal->utime)); 473 current->signal->utime));
474 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); 474 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
475 jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime, 475 jiffies = cputime_to_jiffies(cputime_add(current->stime,
476 current->signal->stime)); 476 current->signal->stime));
477 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); 477 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
478 /* we really need to bite the bullet and change layout */ 478 /* we really need to bite the bullet and change layout */
@@ -522,9 +522,9 @@ static void do_acct_process(long exitcode, struct file *file)
522 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ 522 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
523 ac.ac_rw = encode_comp_t(ac.ac_io / 1024); 523 ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
524 ac.ac_minflt = encode_comp_t(current->signal->min_flt + 524 ac.ac_minflt = encode_comp_t(current->signal->min_flt +
525 current->group_leader->min_flt); 525 current->min_flt);
526 ac.ac_majflt = encode_comp_t(current->signal->maj_flt + 526 ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
527 current->group_leader->maj_flt); 527 current->maj_flt);
528 ac.ac_swaps = encode_comp_t(0); 528 ac.ac_swaps = encode_comp_t(0);
529 ac.ac_exitcode = exitcode; 529 ac.ac_exitcode = exitcode;
530 530
diff --git a/kernel/audit.c b/kernel/audit.c
index 04fe2e301b61..c8ccbd09048f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -578,7 +578,7 @@ static int __init audit_enable(char *str)
578 audit_initialized ? "" : " (after initialization)"); 578 audit_initialized ? "" : " (after initialization)");
579 if (audit_initialized) 579 if (audit_initialized)
580 audit_enabled = audit_default; 580 audit_enabled = audit_default;
581 return 0; 581 return 1;
582} 582}
583 583
584__setup("audit=", audit_enable); 584__setup("audit=", audit_enable);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 18aea1bd1284..72248d1b9e3f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -616,12 +616,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
616 * current->cpuset if a task has its memory placement changed. 616 * current->cpuset if a task has its memory placement changed.
617 * Do not call this routine if in_interrupt(). 617 * Do not call this routine if in_interrupt().
618 * 618 *
619 * Call without callback_mutex or task_lock() held. May be called 619 * Call without callback_mutex or task_lock() held. May be
620 * with or without manage_mutex held. Doesn't need task_lock to guard 620 * called with or without manage_mutex held. Thanks in part to
621 * against another task changing a non-NULL cpuset pointer to NULL, 621 * 'the_top_cpuset_hack', the tasks cpuset pointer will never
622 * as that is only done by a task on itself, and if the current task 622 * be NULL. This routine also might acquire callback_mutex and
623 * is here, it is not simultaneously in the exit code NULL'ing its
624 * cpuset pointer. This routine also might acquire callback_mutex and
625 * current->mm->mmap_sem during call. 623 * current->mm->mmap_sem during call.
626 * 624 *
627 * Reading current->cpuset->mems_generation doesn't need task_lock 625 * Reading current->cpuset->mems_generation doesn't need task_lock
@@ -836,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf)
836} 834}
837 835
838/* 836/*
837 * cpuset_migrate_mm
838 *
839 * Migrate memory region from one set of nodes to another.
840 *
841 * Temporarilly set tasks mems_allowed to target nodes of migration,
842 * so that the migration code can allocate pages on these nodes.
843 *
844 * Call holding manage_mutex, so our current->cpuset won't change
845 * during this call, as manage_mutex holds off any attach_task()
846 * calls. Therefore we don't need to take task_lock around the
847 * call to guarantee_online_mems(), as we know no one is changing
848 * our tasks cpuset.
849 *
850 * Hold callback_mutex around the two modifications of our tasks
851 * mems_allowed to synchronize with cpuset_mems_allowed().
852 *
853 * While the mm_struct we are migrating is typically from some
854 * other task, the task_struct mems_allowed that we are hacking
855 * is for our current task, which must allocate new pages for that
856 * migrating memory region.
857 *
858 * We call cpuset_update_task_memory_state() before hacking
859 * our tasks mems_allowed, so that we are assured of being in
860 * sync with our tasks cpuset, and in particular, callbacks to
861 * cpuset_update_task_memory_state() from nested page allocations
862 * won't see any mismatch of our cpuset and task mems_generation
863 * values, so won't overwrite our hacked tasks mems_allowed
864 * nodemask.
865 */
866
867static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
868 const nodemask_t *to)
869{
870 struct task_struct *tsk = current;
871
872 cpuset_update_task_memory_state();
873
874 mutex_lock(&callback_mutex);
875 tsk->mems_allowed = *to;
876 mutex_unlock(&callback_mutex);
877
878 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
879
880 mutex_lock(&callback_mutex);
881 guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
882 mutex_unlock(&callback_mutex);
883}
884
885/*
839 * Handle user request to change the 'mems' memory placement 886 * Handle user request to change the 'mems' memory placement
840 * of a cpuset. Needs to validate the request, update the 887 * of a cpuset. Needs to validate the request, update the
841 * cpusets mems_allowed and mems_generation, and for each 888 * cpusets mems_allowed and mems_generation, and for each
@@ -947,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf)
947 struct mm_struct *mm = mmarray[i]; 994 struct mm_struct *mm = mmarray[i];
948 995
949 mpol_rebind_mm(mm, &cs->mems_allowed); 996 mpol_rebind_mm(mm, &cs->mems_allowed);
950 if (migrate) { 997 if (migrate)
951 do_migrate_pages(mm, &oldmem, &cs->mems_allowed, 998 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
952 MPOL_MF_MOVE_ALL);
953 }
954 mmput(mm); 999 mmput(mm);
955 } 1000 }
956 1001
@@ -1185,11 +1230,11 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1185 mm = get_task_mm(tsk); 1230 mm = get_task_mm(tsk);
1186 if (mm) { 1231 if (mm) {
1187 mpol_rebind_mm(mm, &to); 1232 mpol_rebind_mm(mm, &to);
1233 if (is_memory_migrate(cs))
1234 cpuset_migrate_mm(mm, &from, &to);
1188 mmput(mm); 1235 mmput(mm);
1189 } 1236 }
1190 1237
1191 if (is_memory_migrate(cs))
1192 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
1193 put_task_struct(tsk); 1238 put_task_struct(tsk);
1194 synchronize_rcu(); 1239 synchronize_rcu();
1195 if (atomic_dec_and_test(&oldcs->count)) 1240 if (atomic_dec_and_test(&oldcs->count))
diff --git a/kernel/exit.c b/kernel/exit.c
index bc0ec674d3f4..1a9787ac6173 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -34,6 +34,7 @@
34#include <linux/mutex.h> 34#include <linux/mutex.h>
35#include <linux/futex.h> 35#include <linux/futex.h>
36#include <linux/compat.h> 36#include <linux/compat.h>
37#include <linux/pipe_fs_i.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39#include <asm/unistd.h> 40#include <asm/unistd.h>
@@ -127,6 +128,11 @@ static void __exit_signal(struct task_struct *tsk)
127 } 128 }
128} 129}
129 130
131static void delayed_put_task_struct(struct rcu_head *rhp)
132{
133 put_task_struct(container_of(rhp, struct task_struct, rcu));
134}
135
130void release_task(struct task_struct * p) 136void release_task(struct task_struct * p)
131{ 137{
132 int zap_leader; 138 int zap_leader;
@@ -168,7 +174,7 @@ repeat:
168 spin_unlock(&p->proc_lock); 174 spin_unlock(&p->proc_lock);
169 proc_pid_flush(proc_dentry); 175 proc_pid_flush(proc_dentry);
170 release_thread(p); 176 release_thread(p);
171 put_task_struct(p); 177 call_rcu(&p->rcu, delayed_put_task_struct);
172 178
173 p = leader; 179 p = leader;
174 if (unlikely(zap_leader)) 180 if (unlikely(zap_leader))
@@ -936,6 +942,9 @@ fastcall NORET_TYPE void do_exit(long code)
936 if (tsk->io_context) 942 if (tsk->io_context)
937 exit_io_context(); 943 exit_io_context();
938 944
945 if (tsk->splice_pipe)
946 __free_pipe_info(tsk->splice_pipe);
947
939 /* PF_DEAD causes final put_task_struct after we schedule. */ 948 /* PF_DEAD causes final put_task_struct after we schedule. */
940 preempt_disable(); 949 preempt_disable();
941 BUG_ON(tsk->flags & PF_DEAD); 950 BUG_ON(tsk->flags & PF_DEAD);
diff --git a/kernel/fork.c b/kernel/fork.c
index b3f7a1bb5e55..3384eb89cb1c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,10 +108,8 @@ void free_task(struct task_struct *tsk)
108} 108}
109EXPORT_SYMBOL(free_task); 109EXPORT_SYMBOL(free_task);
110 110
111void __put_task_struct_cb(struct rcu_head *rhp) 111void __put_task_struct(struct task_struct *tsk)
112{ 112{
113 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
114
115 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 113 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
116 WARN_ON(atomic_read(&tsk->usage)); 114 WARN_ON(atomic_read(&tsk->usage));
117 WARN_ON(tsk == current); 115 WARN_ON(tsk == current);
@@ -126,6 +124,12 @@ void __put_task_struct_cb(struct rcu_head *rhp)
126 free_task(tsk); 124 free_task(tsk);
127} 125}
128 126
127void __put_task_struct_cb(struct rcu_head *rhp)
128{
129 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
130 __put_task_struct(tsk);
131}
132
129void __init fork_init(unsigned long mempages) 133void __init fork_init(unsigned long mempages)
130{ 134{
131#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 135#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -721,7 +725,7 @@ out_release:
721 free_fdset (new_fdt->open_fds, new_fdt->max_fdset); 725 free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
722 free_fd_array(new_fdt->fd, new_fdt->max_fds); 726 free_fd_array(new_fdt->fd, new_fdt->max_fds);
723 kmem_cache_free(files_cachep, newf); 727 kmem_cache_free(files_cachep, newf);
724 goto out; 728 return NULL;
725} 729}
726 730
727static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 731static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
@@ -1311,17 +1315,19 @@ long do_fork(unsigned long clone_flags,
1311{ 1315{
1312 struct task_struct *p; 1316 struct task_struct *p;
1313 int trace = 0; 1317 int trace = 0;
1314 long pid = alloc_pidmap(); 1318 struct pid *pid = alloc_pid();
1319 long nr;
1315 1320
1316 if (pid < 0) 1321 if (!pid)
1317 return -EAGAIN; 1322 return -EAGAIN;
1323 nr = pid->nr;
1318 if (unlikely(current->ptrace)) { 1324 if (unlikely(current->ptrace)) {
1319 trace = fork_traceflag (clone_flags); 1325 trace = fork_traceflag (clone_flags);
1320 if (trace) 1326 if (trace)
1321 clone_flags |= CLONE_PTRACE; 1327 clone_flags |= CLONE_PTRACE;
1322 } 1328 }
1323 1329
1324 p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 1330 p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr);
1325 /* 1331 /*
1326 * Do this prior waking up the new thread - the thread pointer 1332 * Do this prior waking up the new thread - the thread pointer
1327 * might get invalid after that point, if the thread exits quickly. 1333 * might get invalid after that point, if the thread exits quickly.
@@ -1348,7 +1354,7 @@ long do_fork(unsigned long clone_flags,
1348 p->state = TASK_STOPPED; 1354 p->state = TASK_STOPPED;
1349 1355
1350 if (unlikely (trace)) { 1356 if (unlikely (trace)) {
1351 current->ptrace_message = pid; 1357 current->ptrace_message = nr;
1352 ptrace_notify ((trace << 8) | SIGTRAP); 1358 ptrace_notify ((trace << 8) | SIGTRAP);
1353 } 1359 }
1354 1360
@@ -1358,10 +1364,10 @@ long do_fork(unsigned long clone_flags,
1358 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 1364 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1359 } 1365 }
1360 } else { 1366 } else {
1361 free_pidmap(pid); 1367 free_pid(pid);
1362 pid = PTR_ERR(p); 1368 nr = PTR_ERR(p);
1363 } 1369 }
1364 return pid; 1370 return nr;
1365} 1371}
1366 1372
1367#ifndef ARCH_MIN_MMSTRUCT_ALIGN 1373#ifndef ARCH_MIN_MMSTRUCT_ALIGN
diff --git a/kernel/futex.c b/kernel/futex.c
index 9c9b2b6b22dd..5699c512057b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1039,9 +1039,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
1039 unsigned long timeout = MAX_SCHEDULE_TIMEOUT; 1039 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
1040 int val2 = 0; 1040 int val2 = 0;
1041 1041
1042 if ((op == FUTEX_WAIT) && utime) { 1042 if (utime && (op == FUTEX_WAIT)) {
1043 if (copy_from_user(&t, utime, sizeof(t)) != 0) 1043 if (copy_from_user(&t, utime, sizeof(t)) != 0)
1044 return -EFAULT; 1044 return -EFAULT;
1045 if (!timespec_valid(&t))
1046 return -EINVAL;
1045 timeout = timespec_to_jiffies(&t) + 1; 1047 timeout = timespec_to_jiffies(&t) + 1;
1046 } 1048 }
1047 /* 1049 /*
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 54274fc85321..1ab6a0ea3d14 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -129,9 +129,11 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
129 unsigned long timeout = MAX_SCHEDULE_TIMEOUT; 129 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
130 int val2 = 0; 130 int val2 = 0;
131 131
132 if ((op == FUTEX_WAIT) && utime) { 132 if (utime && (op == FUTEX_WAIT)) {
133 if (get_compat_timespec(&t, utime)) 133 if (get_compat_timespec(&t, utime))
134 return -EFAULT; 134 return -EFAULT;
135 if (!timespec_valid(&t))
136 return -EINVAL;
135 timeout = timespec_to_jiffies(&t) + 1; 137 timeout = timespec_to_jiffies(&t) + 1;
136 } 138 }
137 if (op >= FUTEX_REQUEUE) 139 if (op >= FUTEX_REQUEUE)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0237a556eb1f..d2a7296c8251 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -501,6 +501,7 @@ int hrtimer_cancel(struct hrtimer *timer)
501 501
502 if (ret >= 0) 502 if (ret >= 0)
503 return ret; 503 return ret;
504 cpu_relax();
504 } 505 }
505} 506}
506 507
@@ -606,6 +607,9 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
606{ 607{
607 struct rb_node *node; 608 struct rb_node *node;
608 609
610 if (!base->first)
611 return;
612
609 if (base->get_softirq_time) 613 if (base->get_softirq_time)
610 base->softirq_time = base->get_softirq_time(); 614 base->softirq_time = base->get_softirq_time();
611 615
@@ -655,29 +659,28 @@ void hrtimer_run_queues(void)
655/* 659/*
656 * Sleep related functions: 660 * Sleep related functions:
657 */ 661 */
658 662static int hrtimer_wakeup(struct hrtimer *timer)
659struct sleep_hrtimer {
660 struct hrtimer timer;
661 struct task_struct *task;
662 int expired;
663};
664
665static int nanosleep_wakeup(struct hrtimer *timer)
666{ 663{
667 struct sleep_hrtimer *t = 664 struct hrtimer_sleeper *t =
668 container_of(timer, struct sleep_hrtimer, timer); 665 container_of(timer, struct hrtimer_sleeper, timer);
666 struct task_struct *task = t->task;
669 667
670 t->expired = 1; 668 t->task = NULL;
671 wake_up_process(t->task); 669 if (task)
670 wake_up_process(task);
672 671
673 return HRTIMER_NORESTART; 672 return HRTIMER_NORESTART;
674} 673}
675 674
676static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode) 675void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task)
677{ 676{
678 t->timer.function = nanosleep_wakeup; 677 sl->timer.function = hrtimer_wakeup;
679 t->task = current; 678 sl->task = task;
680 t->expired = 0; 679}
680
681static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
682{
683 hrtimer_init_sleeper(t, current);
681 684
682 do { 685 do {
683 set_current_state(TASK_INTERRUPTIBLE); 686 set_current_state(TASK_INTERRUPTIBLE);
@@ -685,18 +688,17 @@ static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode)
685 688
686 schedule(); 689 schedule();
687 690
688 if (unlikely(!t->expired)) { 691 hrtimer_cancel(&t->timer);
689 hrtimer_cancel(&t->timer); 692 mode = HRTIMER_ABS;
690 mode = HRTIMER_ABS; 693
691 } 694 } while (t->task && !signal_pending(current));
692 } while (!t->expired && !signal_pending(current));
693 695
694 return t->expired; 696 return t->task == NULL;
695} 697}
696 698
697static long __sched nanosleep_restart(struct restart_block *restart) 699static long __sched nanosleep_restart(struct restart_block *restart)
698{ 700{
699 struct sleep_hrtimer t; 701 struct hrtimer_sleeper t;
700 struct timespec __user *rmtp; 702 struct timespec __user *rmtp;
701 struct timespec tu; 703 struct timespec tu;
702 ktime_t time; 704 ktime_t time;
@@ -729,7 +731,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
729 const enum hrtimer_mode mode, const clockid_t clockid) 731 const enum hrtimer_mode mode, const clockid_t clockid)
730{ 732{
731 struct restart_block *restart; 733 struct restart_block *restart;
732 struct sleep_hrtimer t; 734 struct hrtimer_sleeper t;
733 struct timespec tu; 735 struct timespec tu;
734 ktime_t rem; 736 ktime_t rem;
735 737
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 2b33f852be3e..9f77f50d8143 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,4 +1,5 @@
1 1
2obj-y := handle.o manage.o spurious.o migration.o 2obj-y := handle.o manage.o spurious.o
3obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o 3obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
4obj-$(CONFIG_PROC_FS) += proc.o 4obj-$(CONFIG_PROC_FS) += proc.o
5obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 52a8655fa080..134f9f2e0e39 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -1,6 +1,5 @@
1#include <linux/irq.h>
2 1
3#if defined(CONFIG_GENERIC_PENDING_IRQ) 2#include <linux/irq.h>
4 3
5void set_pending_irq(unsigned int irq, cpumask_t mask) 4void set_pending_irq(unsigned int irq, cpumask_t mask)
6{ 5{
@@ -61,5 +60,3 @@ void move_native_irq(int irq)
61 } 60 }
62 cpus_clear(pending_irq_cpumask[irq]); 61 cpus_clear(pending_irq_cpumask[irq]);
63} 62}
64
65#endif
diff --git a/kernel/module.c b/kernel/module.c
index bd088a7c1499..d24deb0dbbc9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1254,6 +1254,7 @@ static inline int license_is_gpl_compatible(const char *license)
1254 || strcmp(license, "GPL v2") == 0 1254 || strcmp(license, "GPL v2") == 0
1255 || strcmp(license, "GPL and additional rights") == 0 1255 || strcmp(license, "GPL and additional rights") == 0
1256 || strcmp(license, "Dual BSD/GPL") == 0 1256 || strcmp(license, "Dual BSD/GPL") == 0
1257 || strcmp(license, "Dual MIT/GPL") == 0
1257 || strcmp(license, "Dual MPL/GPL") == 0); 1258 || strcmp(license, "Dual MPL/GPL") == 0);
1258} 1259}
1259 1260
diff --git a/kernel/panic.c b/kernel/panic.c
index f895c7c01d5b..cc2a4c9c36ac 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -27,7 +27,6 @@ static int pause_on_oops_flag;
27static DEFINE_SPINLOCK(pause_on_oops_lock); 27static DEFINE_SPINLOCK(pause_on_oops_lock);
28 28
29int panic_timeout; 29int panic_timeout;
30EXPORT_SYMBOL(panic_timeout);
31 30
32ATOMIC_NOTIFIER_HEAD(panic_notifier_list); 31ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
33 32
diff --git a/kernel/pid.c b/kernel/pid.c
index a9f2dfd006d2..eeb836b65ca4 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -28,8 +28,9 @@
28#include <linux/hash.h> 28#include <linux/hash.h>
29 29
30#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) 30#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
31static struct hlist_head *pid_hash[PIDTYPE_MAX]; 31static struct hlist_head *pid_hash;
32static int pidhash_shift; 32static int pidhash_shift;
33static kmem_cache_t *pid_cachep;
33 34
34int pid_max = PID_MAX_DEFAULT; 35int pid_max = PID_MAX_DEFAULT;
35int last_pid; 36int last_pid;
@@ -60,9 +61,22 @@ typedef struct pidmap {
60static pidmap_t pidmap_array[PIDMAP_ENTRIES] = 61static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
61 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; 62 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
62 63
64/*
65 * Note: disable interrupts while the pidmap_lock is held as an
66 * interrupt might come in and do read_lock(&tasklist_lock).
67 *
68 * If we don't disable interrupts there is a nasty deadlock between
69 * detach_pid()->free_pid() and another cpu that does
70 * spin_lock(&pidmap_lock) followed by an interrupt routine that does
71 * read_lock(&tasklist_lock);
72 *
73 * After we clean up the tasklist_lock and know there are no
74 * irq handlers that take it we can leave the interrupts enabled.
75 * For now it is easier to be safe than to prove it can't happen.
76 */
63static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); 77static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
64 78
65fastcall void free_pidmap(int pid) 79static fastcall void free_pidmap(int pid)
66{ 80{
67 pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; 81 pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
68 int offset = pid & BITS_PER_PAGE_MASK; 82 int offset = pid & BITS_PER_PAGE_MASK;
@@ -71,7 +85,7 @@ fastcall void free_pidmap(int pid)
71 atomic_inc(&map->nr_free); 85 atomic_inc(&map->nr_free);
72} 86}
73 87
74int alloc_pidmap(void) 88static int alloc_pidmap(void)
75{ 89{
76 int i, offset, max_scan, pid, last = last_pid; 90 int i, offset, max_scan, pid, last = last_pid;
77 pidmap_t *map; 91 pidmap_t *map;
@@ -89,12 +103,12 @@ int alloc_pidmap(void)
89 * Free the page if someone raced with us 103 * Free the page if someone raced with us
90 * installing it: 104 * installing it:
91 */ 105 */
92 spin_lock(&pidmap_lock); 106 spin_lock_irq(&pidmap_lock);
93 if (map->page) 107 if (map->page)
94 free_page(page); 108 free_page(page);
95 else 109 else
96 map->page = (void *)page; 110 map->page = (void *)page;
97 spin_unlock(&pidmap_lock); 111 spin_unlock_irq(&pidmap_lock);
98 if (unlikely(!map->page)) 112 if (unlikely(!map->page))
99 break; 113 break;
100 } 114 }
@@ -131,13 +145,73 @@ int alloc_pidmap(void)
131 return -1; 145 return -1;
132} 146}
133 147
134struct pid * fastcall find_pid(enum pid_type type, int nr) 148fastcall void put_pid(struct pid *pid)
149{
150 if (!pid)
151 return;
152 if ((atomic_read(&pid->count) == 1) ||
153 atomic_dec_and_test(&pid->count))
154 kmem_cache_free(pid_cachep, pid);
155}
156
157static void delayed_put_pid(struct rcu_head *rhp)
158{
159 struct pid *pid = container_of(rhp, struct pid, rcu);
160 put_pid(pid);
161}
162
163fastcall void free_pid(struct pid *pid)
164{
165 /* We can be called with write_lock_irq(&tasklist_lock) held */
166 unsigned long flags;
167
168 spin_lock_irqsave(&pidmap_lock, flags);
169 hlist_del_rcu(&pid->pid_chain);
170 spin_unlock_irqrestore(&pidmap_lock, flags);
171
172 free_pidmap(pid->nr);
173 call_rcu(&pid->rcu, delayed_put_pid);
174}
175
176struct pid *alloc_pid(void)
177{
178 struct pid *pid;
179 enum pid_type type;
180 int nr = -1;
181
182 pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
183 if (!pid)
184 goto out;
185
186 nr = alloc_pidmap();
187 if (nr < 0)
188 goto out_free;
189
190 atomic_set(&pid->count, 1);
191 pid->nr = nr;
192 for (type = 0; type < PIDTYPE_MAX; ++type)
193 INIT_HLIST_HEAD(&pid->tasks[type]);
194
195 spin_lock_irq(&pidmap_lock);
196 hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
197 spin_unlock_irq(&pidmap_lock);
198
199out:
200 return pid;
201
202out_free:
203 kmem_cache_free(pid_cachep, pid);
204 pid = NULL;
205 goto out;
206}
207
208struct pid * fastcall find_pid(int nr)
135{ 209{
136 struct hlist_node *elem; 210 struct hlist_node *elem;
137 struct pid *pid; 211 struct pid *pid;
138 212
139 hlist_for_each_entry_rcu(pid, elem, 213 hlist_for_each_entry_rcu(pid, elem,
140 &pid_hash[type][pid_hashfn(nr)], pid_chain) { 214 &pid_hash[pid_hashfn(nr)], pid_chain) {
141 if (pid->nr == nr) 215 if (pid->nr == nr)
142 return pid; 216 return pid;
143 } 217 }
@@ -146,77 +220,82 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
146 220
147int fastcall attach_pid(task_t *task, enum pid_type type, int nr) 221int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
148{ 222{
149 struct pid *pid, *task_pid; 223 struct pid_link *link;
150 224 struct pid *pid;
151 task_pid = &task->pids[type]; 225
152 pid = find_pid(type, nr); 226 WARN_ON(!task->pid); /* to be removed soon */
153 task_pid->nr = nr; 227 WARN_ON(!nr); /* to be removed soon */
154 if (pid == NULL) { 228
155 INIT_LIST_HEAD(&task_pid->pid_list); 229 link = &task->pids[type];
156 hlist_add_head_rcu(&task_pid->pid_chain, 230 link->pid = pid = find_pid(nr);
157 &pid_hash[type][pid_hashfn(nr)]); 231 hlist_add_head_rcu(&link->node, &pid->tasks[type]);
158 } else {
159 INIT_HLIST_NODE(&task_pid->pid_chain);
160 list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
161 }
162 232
163 return 0; 233 return 0;
164} 234}
165 235
166static fastcall int __detach_pid(task_t *task, enum pid_type type) 236void fastcall detach_pid(task_t *task, enum pid_type type)
167{ 237{
168 struct pid *pid, *pid_next; 238 struct pid_link *link;
169 int nr = 0; 239 struct pid *pid;
240 int tmp;
170 241
171 pid = &task->pids[type]; 242 link = &task->pids[type];
172 if (!hlist_unhashed(&pid->pid_chain)) { 243 pid = link->pid;
173 244
174 if (list_empty(&pid->pid_list)) { 245 hlist_del_rcu(&link->node);
175 nr = pid->nr; 246 link->pid = NULL;
176 hlist_del_rcu(&pid->pid_chain);
177 } else {
178 pid_next = list_entry(pid->pid_list.next,
179 struct pid, pid_list);
180 /* insert next pid from pid_list to hash */
181 hlist_replace_rcu(&pid->pid_chain,
182 &pid_next->pid_chain);
183 }
184 }
185 247
186 list_del_rcu(&pid->pid_list); 248 for (tmp = PIDTYPE_MAX; --tmp >= 0; )
187 pid->nr = 0; 249 if (!hlist_empty(&pid->tasks[tmp]))
250 return;
188 251
189 return nr; 252 free_pid(pid);
190} 253}
191 254
192void fastcall detach_pid(task_t *task, enum pid_type type) 255struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
193{ 256{
194 int tmp, nr; 257 struct task_struct *result = NULL;
258 if (pid) {
259 struct hlist_node *first;
260 first = rcu_dereference(pid->tasks[type].first);
261 if (first)
262 result = hlist_entry(first, struct task_struct, pids[(type)].node);
263 }
264 return result;
265}
195 266
196 nr = __detach_pid(task, type); 267/*
197 if (!nr) 268 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
198 return; 269 */
270task_t *find_task_by_pid_type(int type, int nr)
271{
272 return pid_task(find_pid(nr), type);
273}
199 274
200 for (tmp = PIDTYPE_MAX; --tmp >= 0; ) 275EXPORT_SYMBOL(find_task_by_pid_type);
201 if (tmp != type && find_pid(tmp, nr))
202 return;
203 276
204 free_pidmap(nr); 277struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
278{
279 struct task_struct *result;
280 rcu_read_lock();
281 result = pid_task(pid, type);
282 if (result)
283 get_task_struct(result);
284 rcu_read_unlock();
285 return result;
205} 286}
206 287
207task_t *find_task_by_pid_type(int type, int nr) 288struct pid *find_get_pid(pid_t nr)
208{ 289{
209 struct pid *pid; 290 struct pid *pid;
210 291
211 pid = find_pid(type, nr); 292 rcu_read_lock();
212 if (!pid) 293 pid = get_pid(find_pid(nr));
213 return NULL; 294 rcu_read_unlock();
214 295
215 return pid_task(&pid->pid_list, type); 296 return pid;
216} 297}
217 298
218EXPORT_SYMBOL(find_task_by_pid_type);
219
220/* 299/*
221 * The pid hash table is scaled according to the amount of memory in the 300 * The pid hash table is scaled according to the amount of memory in the
222 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or 301 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -224,7 +303,7 @@ EXPORT_SYMBOL(find_task_by_pid_type);
224 */ 303 */
225void __init pidhash_init(void) 304void __init pidhash_init(void)
226{ 305{
227 int i, j, pidhash_size; 306 int i, pidhash_size;
228 unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); 307 unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
229 308
230 pidhash_shift = max(4, fls(megabytes * 4)); 309 pidhash_shift = max(4, fls(megabytes * 4));
@@ -233,16 +312,13 @@ void __init pidhash_init(void)
233 312
234 printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", 313 printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
235 pidhash_size, pidhash_shift, 314 pidhash_size, pidhash_shift,
236 PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head)); 315 pidhash_size * sizeof(struct hlist_head));
237 316
238 for (i = 0; i < PIDTYPE_MAX; i++) { 317 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
239 pid_hash[i] = alloc_bootmem(pidhash_size * 318 if (!pid_hash)
240 sizeof(*(pid_hash[i]))); 319 panic("Could not alloc pidhash!\n");
241 if (!pid_hash[i]) 320 for (i = 0; i < pidhash_size; i++)
242 panic("Could not alloc pidhash!\n"); 321 INIT_HLIST_HEAD(&pid_hash[i]);
243 for (j = 0; j < pidhash_size; j++)
244 INIT_HLIST_HEAD(&pid_hash[i][j]);
245 }
246} 322}
247 323
248void __init pidmap_init(void) 324void __init pidmap_init(void)
@@ -251,4 +327,8 @@ void __init pidmap_init(void)
251 /* Reserve PID 0. We never call free_pidmap(0) */ 327 /* Reserve PID 0. We never call free_pidmap(0) */
252 set_bit(0, pidmap_array->page); 328 set_bit(0, pidmap_array->page);
253 atomic_dec(&pidmap_array->nr_free); 329 atomic_dec(&pidmap_array->nr_free);
330
331 pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
332 __alignof__(struct pid),
333 SLAB_PANIC, NULL, NULL);
254} 334}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 9fd8d4f03595..ce0dfb8f4a4e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -41,7 +41,7 @@ config SOFTWARE_SUSPEND
41 depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP) 41 depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
42 ---help--- 42 ---help---
43 Enable the possibility of suspending the machine. 43 Enable the possibility of suspending the machine.
44 It doesn't need APM. 44 It doesn't need ACPI or APM.
45 You may suspend your machine by 'swsusp' or 'shutdown -z <time>' 45 You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
46 (patch for sysvinit needed). 46 (patch for sysvinit needed).
47 47
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 8ac7c35fad77..b2a5f671d6cd 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -26,8 +26,7 @@ static inline int freezeable(struct task_struct * p)
26 (p->flags & PF_NOFREEZE) || 26 (p->flags & PF_NOFREEZE) ||
27 (p->exit_state == EXIT_ZOMBIE) || 27 (p->exit_state == EXIT_ZOMBIE) ||
28 (p->exit_state == EXIT_DEAD) || 28 (p->exit_state == EXIT_DEAD) ||
29 (p->state == TASK_STOPPED) || 29 (p->state == TASK_STOPPED))
30 (p->state == TASK_TRACED))
31 return 0; 30 return 0;
32 return 1; 31 return 1;
33} 32}
diff --git a/kernel/printk.c b/kernel/printk.c
index 8cc19431e74b..c056f3324432 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -360,8 +360,7 @@ static void call_console_drivers(unsigned long start, unsigned long end)
360 unsigned long cur_index, start_print; 360 unsigned long cur_index, start_print;
361 static int msg_level = -1; 361 static int msg_level = -1;
362 362
363 if (((long)(start - end)) > 0) 363 BUG_ON(((long)(start - end)) > 0);
364 BUG();
365 364
366 cur_index = start; 365 cur_index = start;
367 start_print = start; 366 start_print = start;
@@ -708,8 +707,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
708 */ 707 */
709void acquire_console_sem(void) 708void acquire_console_sem(void)
710{ 709{
711 if (in_interrupt()) 710 BUG_ON(in_interrupt());
712 BUG();
713 down(&console_sem); 711 down(&console_sem);
714 console_locked = 1; 712 console_locked = 1;
715 console_may_schedule = 1; 713 console_may_schedule = 1;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 86a7f6c60cb2..0eeb7e66722c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -30,8 +30,7 @@
30 */ 30 */
31void __ptrace_link(task_t *child, task_t *new_parent) 31void __ptrace_link(task_t *child, task_t *new_parent)
32{ 32{
33 if (!list_empty(&child->ptrace_list)) 33 BUG_ON(!list_empty(&child->ptrace_list));
34 BUG();
35 if (child->parent == new_parent) 34 if (child->parent == new_parent)
36 return; 35 return;
37 list_add(&child->ptrace_list, &child->parent->ptrace_children); 36 list_add(&child->ptrace_list, &child->parent->ptrace_children);
diff --git a/kernel/sched.c b/kernel/sched.c
index a9ecac398bb9..365f0b90b4de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -665,11 +665,57 @@ static int effective_prio(task_t *p)
665} 665}
666 666
667/* 667/*
668 * We place interactive tasks back into the active array, if possible.
669 *
670 * To guarantee that this does not starve expired tasks we ignore the
671 * interactivity of a task if the first expired task had to wait more
672 * than a 'reasonable' amount of time. This deadline timeout is
673 * load-dependent, as the frequency of array switched decreases with
674 * increasing number of running tasks. We also ignore the interactivity
675 * if a better static_prio task has expired, and switch periodically
676 * regardless, to ensure that highly interactive tasks do not starve
677 * the less fortunate for unreasonably long periods.
678 */
679static inline int expired_starving(runqueue_t *rq)
680{
681 int limit;
682
683 /*
684 * Arrays were recently switched, all is well
685 */
686 if (!rq->expired_timestamp)
687 return 0;
688
689 limit = STARVATION_LIMIT * rq->nr_running;
690
691 /*
692 * It's time to switch arrays
693 */
694 if (jiffies - rq->expired_timestamp >= limit)
695 return 1;
696
697 /*
698 * There's a better selection in the expired array
699 */
700 if (rq->curr->static_prio > rq->best_expired_prio)
701 return 1;
702
703 /*
704 * All is well
705 */
706 return 0;
707}
708
709/*
668 * __activate_task - move a task to the runqueue. 710 * __activate_task - move a task to the runqueue.
669 */ 711 */
670static inline void __activate_task(task_t *p, runqueue_t *rq) 712static void __activate_task(task_t *p, runqueue_t *rq)
671{ 713{
672 enqueue_task(p, rq->active); 714 prio_array_t *target = rq->active;
715
716 if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
717 target = rq->expired;
718 enqueue_task(p, target);
673 rq->nr_running++; 719 rq->nr_running++;
674} 720}
675 721
@@ -688,7 +734,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
688 unsigned long long __sleep_time = now - p->timestamp; 734 unsigned long long __sleep_time = now - p->timestamp;
689 unsigned long sleep_time; 735 unsigned long sleep_time;
690 736
691 if (unlikely(p->policy == SCHED_BATCH)) 737 if (batch_task(p))
692 sleep_time = 0; 738 sleep_time = 0;
693 else { 739 else {
694 if (__sleep_time > NS_MAX_SLEEP_AVG) 740 if (__sleep_time > NS_MAX_SLEEP_AVG)
@@ -700,21 +746,25 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
700 if (likely(sleep_time > 0)) { 746 if (likely(sleep_time > 0)) {
701 /* 747 /*
702 * User tasks that sleep a long time are categorised as 748 * User tasks that sleep a long time are categorised as
703 * idle and will get just interactive status to stay active & 749 * idle. They will only have their sleep_avg increased to a
704 * prevent them suddenly becoming cpu hogs and starving 750 * level that makes them just interactive priority to stay
705 * other processes. 751 * active yet prevent them suddenly becoming cpu hogs and
752 * starving other processes.
706 */ 753 */
707 if (p->mm && p->activated != -1 && 754 if (p->mm && sleep_time > INTERACTIVE_SLEEP(p)) {
708 sleep_time > INTERACTIVE_SLEEP(p)) { 755 unsigned long ceiling;
709 p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - 756
710 DEF_TIMESLICE); 757 ceiling = JIFFIES_TO_NS(MAX_SLEEP_AVG -
758 DEF_TIMESLICE);
759 if (p->sleep_avg < ceiling)
760 p->sleep_avg = ceiling;
711 } else { 761 } else {
712 /* 762 /*
713 * Tasks waking from uninterruptible sleep are 763 * Tasks waking from uninterruptible sleep are
714 * limited in their sleep_avg rise as they 764 * limited in their sleep_avg rise as they
715 * are likely to be waiting on I/O 765 * are likely to be waiting on I/O
716 */ 766 */
717 if (p->activated == -1 && p->mm) { 767 if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
718 if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) 768 if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
719 sleep_time = 0; 769 sleep_time = 0;
720 else if (p->sleep_avg + sleep_time >= 770 else if (p->sleep_avg + sleep_time >=
@@ -769,7 +819,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
769 * This checks to make sure it's not an uninterruptible task 819 * This checks to make sure it's not an uninterruptible task
770 * that is now waking up. 820 * that is now waking up.
771 */ 821 */
772 if (!p->activated) { 822 if (p->sleep_type == SLEEP_NORMAL) {
773 /* 823 /*
774 * Tasks which were woken up by interrupts (ie. hw events) 824 * Tasks which were woken up by interrupts (ie. hw events)
775 * are most likely of interactive nature. So we give them 825 * are most likely of interactive nature. So we give them
@@ -778,13 +828,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
778 * on a CPU, first time around: 828 * on a CPU, first time around:
779 */ 829 */
780 if (in_interrupt()) 830 if (in_interrupt())
781 p->activated = 2; 831 p->sleep_type = SLEEP_INTERRUPTED;
782 else { 832 else {
783 /* 833 /*
784 * Normal first-time wakeups get a credit too for 834 * Normal first-time wakeups get a credit too for
785 * on-runqueue time, but it will be weighted down: 835 * on-runqueue time, but it will be weighted down:
786 */ 836 */
787 p->activated = 1; 837 p->sleep_type = SLEEP_INTERACTIVE;
788 } 838 }
789 } 839 }
790 p->timestamp = now; 840 p->timestamp = now;
@@ -1272,19 +1322,19 @@ out_activate:
1272 * Tasks on involuntary sleep don't earn 1322 * Tasks on involuntary sleep don't earn
1273 * sleep_avg beyond just interactive state. 1323 * sleep_avg beyond just interactive state.
1274 */ 1324 */
1275 p->activated = -1; 1325 p->sleep_type = SLEEP_NONINTERACTIVE;
1276 } 1326 } else
1277 1327
1278 /* 1328 /*
1279 * Tasks that have marked their sleep as noninteractive get 1329 * Tasks that have marked their sleep as noninteractive get
1280 * woken up without updating their sleep average. (i.e. their 1330 * woken up with their sleep average not weighted in an
1281 * sleep is handled in a priority-neutral manner, no priority 1331 * interactive way.
1282 * boost and no penalty.)
1283 */ 1332 */
1284 if (old_state & TASK_NONINTERACTIVE) 1333 if (old_state & TASK_NONINTERACTIVE)
1285 __activate_task(p, rq); 1334 p->sleep_type = SLEEP_NONINTERACTIVE;
1286 else 1335
1287 activate_task(p, rq, cpu == this_cpu); 1336
1337 activate_task(p, rq, cpu == this_cpu);
1288 /* 1338 /*
1289 * Sync wakeups (i.e. those types of wakeups where the waker 1339 * Sync wakeups (i.e. those types of wakeups where the waker
1290 * has indicated that it will leave the CPU in short order) 1340 * has indicated that it will leave the CPU in short order)
@@ -1658,6 +1708,21 @@ unsigned long nr_iowait(void)
1658 return sum; 1708 return sum;
1659} 1709}
1660 1710
1711unsigned long nr_active(void)
1712{
1713 unsigned long i, running = 0, uninterruptible = 0;
1714
1715 for_each_online_cpu(i) {
1716 running += cpu_rq(i)->nr_running;
1717 uninterruptible += cpu_rq(i)->nr_uninterruptible;
1718 }
1719
1720 if (unlikely((long)uninterruptible < 0))
1721 uninterruptible = 0;
1722
1723 return running + uninterruptible;
1724}
1725
1661#ifdef CONFIG_SMP 1726#ifdef CONFIG_SMP
1662 1727
1663/* 1728/*
@@ -2467,22 +2532,6 @@ unsigned long long current_sched_time(const task_t *tsk)
2467} 2532}
2468 2533
2469/* 2534/*
2470 * We place interactive tasks back into the active array, if possible.
2471 *
2472 * To guarantee that this does not starve expired tasks we ignore the
2473 * interactivity of a task if the first expired task had to wait more
2474 * than a 'reasonable' amount of time. This deadline timeout is
2475 * load-dependent, as the frequency of array switched decreases with
2476 * increasing number of running tasks. We also ignore the interactivity
2477 * if a better static_prio task has expired:
2478 */
2479#define EXPIRED_STARVING(rq) \
2480 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
2481 (jiffies - (rq)->expired_timestamp >= \
2482 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
2483 ((rq)->curr->static_prio > (rq)->best_expired_prio))
2484
2485/*
2486 * Account user cpu time to a process. 2535 * Account user cpu time to a process.
2487 * @p: the process that the cpu time gets accounted to 2536 * @p: the process that the cpu time gets accounted to
2488 * @hardirq_offset: the offset to subtract from hardirq_count() 2537 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2617,7 +2666,7 @@ void scheduler_tick(void)
2617 2666
2618 if (!rq->expired_timestamp) 2667 if (!rq->expired_timestamp)
2619 rq->expired_timestamp = jiffies; 2668 rq->expired_timestamp = jiffies;
2620 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 2669 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
2621 enqueue_task(p, rq->expired); 2670 enqueue_task(p, rq->expired);
2622 if (p->static_prio < rq->best_expired_prio) 2671 if (p->static_prio < rq->best_expired_prio)
2623 rq->best_expired_prio = p->static_prio; 2672 rq->best_expired_prio = p->static_prio;
@@ -2860,6 +2909,12 @@ EXPORT_SYMBOL(sub_preempt_count);
2860 2909
2861#endif 2910#endif
2862 2911
2912static inline int interactive_sleep(enum sleep_type sleep_type)
2913{
2914 return (sleep_type == SLEEP_INTERACTIVE ||
2915 sleep_type == SLEEP_INTERRUPTED);
2916}
2917
2863/* 2918/*
2864 * schedule() is the main scheduler function. 2919 * schedule() is the main scheduler function.
2865 */ 2920 */
@@ -2983,12 +3038,12 @@ go_idle:
2983 queue = array->queue + idx; 3038 queue = array->queue + idx;
2984 next = list_entry(queue->next, task_t, run_list); 3039 next = list_entry(queue->next, task_t, run_list);
2985 3040
2986 if (!rt_task(next) && next->activated > 0) { 3041 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
2987 unsigned long long delta = now - next->timestamp; 3042 unsigned long long delta = now - next->timestamp;
2988 if (unlikely((long long)(now - next->timestamp) < 0)) 3043 if (unlikely((long long)(now - next->timestamp) < 0))
2989 delta = 0; 3044 delta = 0;
2990 3045
2991 if (next->activated == 1) 3046 if (next->sleep_type == SLEEP_INTERACTIVE)
2992 delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; 3047 delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
2993 3048
2994 array = next->array; 3049 array = next->array;
@@ -2998,10 +3053,9 @@ go_idle:
2998 dequeue_task(next, array); 3053 dequeue_task(next, array);
2999 next->prio = new_prio; 3054 next->prio = new_prio;
3000 enqueue_task(next, array); 3055 enqueue_task(next, array);
3001 } else 3056 }
3002 requeue_task(next, array);
3003 } 3057 }
3004 next->activated = 0; 3058 next->sleep_type = SLEEP_NORMAL;
3005switch_tasks: 3059switch_tasks:
3006 if (next == rq->idle) 3060 if (next == rq->idle)
3007 schedstat_inc(rq, sched_goidle); 3061 schedstat_inc(rq, sched_goidle);
diff --git a/kernel/signal.c b/kernel/signal.c
index 4922928d91f6..b14f895027c3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -769,8 +769,7 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
769{ 769{
770 int ret = 0; 770 int ret = 0;
771 771
772 if (!irqs_disabled()) 772 BUG_ON(!irqs_disabled());
773 BUG();
774 assert_spin_locked(&t->sighand->siglock); 773 assert_spin_locked(&t->sighand->siglock);
775 774
776 /* Short-circuit ignored signals. */ 775 /* Short-circuit ignored signals. */
@@ -869,7 +868,6 @@ __group_complete_signal(int sig, struct task_struct *p)
869 if (t == NULL) 868 if (t == NULL)
870 /* restart balancing at this thread */ 869 /* restart balancing at this thread */
871 t = p->signal->curr_target = p; 870 t = p->signal->curr_target = p;
872 BUG_ON(t->tgid != p->tgid);
873 871
874 while (!wants_signal(sig, t)) { 872 while (!wants_signal(sig, t)) {
875 t = next_thread(t); 873 t = next_thread(t);
@@ -1384,8 +1382,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1384 * the overrun count. Other uses should not try to 1382 * the overrun count. Other uses should not try to
1385 * send the signal multiple times. 1383 * send the signal multiple times.
1386 */ 1384 */
1387 if (q->info.si_code != SI_TIMER) 1385 BUG_ON(q->info.si_code != SI_TIMER);
1388 BUG();
1389 q->info.si_overrun++; 1386 q->info.si_overrun++;
1390 goto out; 1387 goto out;
1391 } 1388 }
@@ -1560,6 +1557,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
1560 /* Let the debugger run. */ 1557 /* Let the debugger run. */
1561 set_current_state(TASK_TRACED); 1558 set_current_state(TASK_TRACED);
1562 spin_unlock_irq(&current->sighand->siglock); 1559 spin_unlock_irq(&current->sighand->siglock);
1560 try_to_freeze();
1563 read_lock(&tasklist_lock); 1561 read_lock(&tasklist_lock);
1564 if (likely(current->ptrace & PT_PTRACED) && 1562 if (likely(current->ptrace & PT_PTRACED) &&
1565 likely(current->parent != current->real_parent || 1563 likely(current->parent != current->real_parent ||
diff --git a/kernel/sys.c b/kernel/sys.c
index 7ef7f6054c28..0b6ec0e7936f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1372,18 +1372,29 @@ asmlinkage long sys_getsid(pid_t pid)
1372asmlinkage long sys_setsid(void) 1372asmlinkage long sys_setsid(void)
1373{ 1373{
1374 struct task_struct *group_leader = current->group_leader; 1374 struct task_struct *group_leader = current->group_leader;
1375 struct pid *pid; 1375 pid_t session;
1376 int err = -EPERM; 1376 int err = -EPERM;
1377 1377
1378 mutex_lock(&tty_mutex); 1378 mutex_lock(&tty_mutex);
1379 write_lock_irq(&tasklist_lock); 1379 write_lock_irq(&tasklist_lock);
1380 1380
1381 pid = find_pid(PIDTYPE_PGID, group_leader->pid); 1381 /* Fail if I am already a session leader */
1382 if (pid) 1382 if (group_leader->signal->leader)
1383 goto out;
1384
1385 session = group_leader->pid;
1386 /* Fail if a process group id already exists that equals the
1387 * proposed session id.
1388 *
1389 * Don't check if session id == 1 because kernel threads use this
1390 * session id and so the check will always fail and make it so
1391 * init cannot successfully call setsid.
1392 */
1393 if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
1383 goto out; 1394 goto out;
1384 1395
1385 group_leader->signal->leader = 1; 1396 group_leader->signal->leader = 1;
1386 __set_special_pids(group_leader->pid, group_leader->pid); 1397 __set_special_pids(session, session);
1387 group_leader->signal->tty = NULL; 1398 group_leader->signal->tty = NULL;
1388 group_leader->signal->tty_old_pgrp = 0; 1399 group_leader->signal->tty_old_pgrp = 0;
1389 err = process_group(group_leader); 1400 err = process_group(group_leader);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d82864c4a617..5433195040f1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -120,3 +120,15 @@ cond_syscall(sys32_sysctl);
120cond_syscall(ppc_rtas); 120cond_syscall(ppc_rtas);
121cond_syscall(sys_spu_run); 121cond_syscall(sys_spu_run);
122cond_syscall(sys_spu_create); 122cond_syscall(sys_spu_create);
123
124/* mmu depending weak syscall entries */
125cond_syscall(sys_mprotect);
126cond_syscall(sys_msync);
127cond_syscall(sys_mlock);
128cond_syscall(sys_munlock);
129cond_syscall(sys_mlockall);
130cond_syscall(sys_munlockall);
131cond_syscall(sys_mincore);
132cond_syscall(sys_madvise);
133cond_syscall(sys_mremap);
134cond_syscall(sys_remap_file_pages);
diff --git a/kernel/time.c b/kernel/time.c
index ff8e7019c4c4..b00ddc71cedb 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -410,7 +410,7 @@ EXPORT_SYMBOL(current_kernel_time);
410 * current_fs_time - Return FS time 410 * current_fs_time - Return FS time
411 * @sb: Superblock. 411 * @sb: Superblock.
412 * 412 *
413 * Return the current time truncated to the time granuality supported by 413 * Return the current time truncated to the time granularity supported by
414 * the fs. 414 * the fs.
415 */ 415 */
416struct timespec current_fs_time(struct super_block *sb) 416struct timespec current_fs_time(struct super_block *sb)
@@ -421,11 +421,11 @@ struct timespec current_fs_time(struct super_block *sb)
421EXPORT_SYMBOL(current_fs_time); 421EXPORT_SYMBOL(current_fs_time);
422 422
423/** 423/**
424 * timespec_trunc - Truncate timespec to a granuality 424 * timespec_trunc - Truncate timespec to a granularity
425 * @t: Timespec 425 * @t: Timespec
426 * @gran: Granuality in ns. 426 * @gran: Granularity in ns.
427 * 427 *
428 * Truncate a timespec to a granuality. gran must be smaller than a second. 428 * Truncate a timespec to a granularity. gran must be smaller than a second.
429 * Always rounds down. 429 * Always rounds down.
430 * 430 *
431 * This function should be only used for timestamps returned by 431 * This function should be only used for timestamps returned by
diff --git a/kernel/timer.c b/kernel/timer.c
index ab189dd187cb..883773788836 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -54,7 +54,6 @@ EXPORT_SYMBOL(jiffies_64);
54/* 54/*
55 * per-CPU timer vector definitions: 55 * per-CPU timer vector definitions:
56 */ 56 */
57
58#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 57#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
59#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 58#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
60#define TVN_SIZE (1 << TVN_BITS) 59#define TVN_SIZE (1 << TVN_BITS)
@@ -62,11 +61,6 @@ EXPORT_SYMBOL(jiffies_64);
62#define TVN_MASK (TVN_SIZE - 1) 61#define TVN_MASK (TVN_SIZE - 1)
63#define TVR_MASK (TVR_SIZE - 1) 62#define TVR_MASK (TVR_SIZE - 1)
64 63
65struct timer_base_s {
66 spinlock_t lock;
67 struct timer_list *running_timer;
68};
69
70typedef struct tvec_s { 64typedef struct tvec_s {
71 struct list_head vec[TVN_SIZE]; 65 struct list_head vec[TVN_SIZE];
72} tvec_t; 66} tvec_t;
@@ -76,7 +70,8 @@ typedef struct tvec_root_s {
76} tvec_root_t; 70} tvec_root_t;
77 71
78struct tvec_t_base_s { 72struct tvec_t_base_s {
79 struct timer_base_s t_base; 73 spinlock_t lock;
74 struct timer_list *running_timer;
80 unsigned long timer_jiffies; 75 unsigned long timer_jiffies;
81 tvec_root_t tv1; 76 tvec_root_t tv1;
82 tvec_t tv2; 77 tvec_t tv2;
@@ -86,14 +81,16 @@ struct tvec_t_base_s {
86} ____cacheline_aligned_in_smp; 81} ____cacheline_aligned_in_smp;
87 82
88typedef struct tvec_t_base_s tvec_base_t; 83typedef struct tvec_t_base_s tvec_base_t;
89static DEFINE_PER_CPU(tvec_base_t *, tvec_bases); 84
90static tvec_base_t boot_tvec_bases; 85tvec_base_t boot_tvec_bases;
86EXPORT_SYMBOL(boot_tvec_bases);
87static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = { &boot_tvec_bases };
91 88
92static inline void set_running_timer(tvec_base_t *base, 89static inline void set_running_timer(tvec_base_t *base,
93 struct timer_list *timer) 90 struct timer_list *timer)
94{ 91{
95#ifdef CONFIG_SMP 92#ifdef CONFIG_SMP
96 base->t_base.running_timer = timer; 93 base->running_timer = timer;
97#endif 94#endif
98} 95}
99 96
@@ -139,15 +136,6 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
139 list_add_tail(&timer->entry, vec); 136 list_add_tail(&timer->entry, vec);
140} 137}
141 138
142typedef struct timer_base_s timer_base_t;
143/*
144 * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
145 * at compile time, and we need timer->base to lock the timer.
146 */
147timer_base_t __init_timer_base
148 ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
149EXPORT_SYMBOL(__init_timer_base);
150
151/*** 139/***
152 * init_timer - initialize a timer. 140 * init_timer - initialize a timer.
153 * @timer: the timer to be initialized 141 * @timer: the timer to be initialized
@@ -158,7 +146,7 @@ EXPORT_SYMBOL(__init_timer_base);
158void fastcall init_timer(struct timer_list *timer) 146void fastcall init_timer(struct timer_list *timer)
159{ 147{
160 timer->entry.next = NULL; 148 timer->entry.next = NULL;
161 timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base; 149 timer->base = per_cpu(tvec_bases, raw_smp_processor_id());
162} 150}
163EXPORT_SYMBOL(init_timer); 151EXPORT_SYMBOL(init_timer);
164 152
@@ -174,7 +162,7 @@ static inline void detach_timer(struct timer_list *timer,
174} 162}
175 163
176/* 164/*
177 * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock 165 * We are using hashed locking: holding per_cpu(tvec_bases).lock
178 * means that all timers which are tied to this base via timer->base are 166 * means that all timers which are tied to this base via timer->base are
179 * locked, and the base itself is locked too. 167 * locked, and the base itself is locked too.
180 * 168 *
@@ -185,10 +173,10 @@ static inline void detach_timer(struct timer_list *timer,
185 * possible to set timer->base = NULL and drop the lock: the timer remains 173 * possible to set timer->base = NULL and drop the lock: the timer remains
186 * locked. 174 * locked.
187 */ 175 */
188static timer_base_t *lock_timer_base(struct timer_list *timer, 176static tvec_base_t *lock_timer_base(struct timer_list *timer,
189 unsigned long *flags) 177 unsigned long *flags)
190{ 178{
191 timer_base_t *base; 179 tvec_base_t *base;
192 180
193 for (;;) { 181 for (;;) {
194 base = timer->base; 182 base = timer->base;
@@ -205,8 +193,7 @@ static timer_base_t *lock_timer_base(struct timer_list *timer,
205 193
206int __mod_timer(struct timer_list *timer, unsigned long expires) 194int __mod_timer(struct timer_list *timer, unsigned long expires)
207{ 195{
208 timer_base_t *base; 196 tvec_base_t *base, *new_base;
209 tvec_base_t *new_base;
210 unsigned long flags; 197 unsigned long flags;
211 int ret = 0; 198 int ret = 0;
212 199
@@ -221,7 +208,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
221 208
222 new_base = __get_cpu_var(tvec_bases); 209 new_base = __get_cpu_var(tvec_bases);
223 210
224 if (base != &new_base->t_base) { 211 if (base != new_base) {
225 /* 212 /*
226 * We are trying to schedule the timer on the local CPU. 213 * We are trying to schedule the timer on the local CPU.
227 * However we can't change timer's base while it is running, 214 * However we can't change timer's base while it is running,
@@ -229,21 +216,19 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
229 * handler yet has not finished. This also guarantees that 216 * handler yet has not finished. This also guarantees that
230 * the timer is serialized wrt itself. 217 * the timer is serialized wrt itself.
231 */ 218 */
232 if (unlikely(base->running_timer == timer)) { 219 if (likely(base->running_timer != timer)) {
233 /* The timer remains on a former base */
234 new_base = container_of(base, tvec_base_t, t_base);
235 } else {
236 /* See the comment in lock_timer_base() */ 220 /* See the comment in lock_timer_base() */
237 timer->base = NULL; 221 timer->base = NULL;
238 spin_unlock(&base->lock); 222 spin_unlock(&base->lock);
239 spin_lock(&new_base->t_base.lock); 223 base = new_base;
240 timer->base = &new_base->t_base; 224 spin_lock(&base->lock);
225 timer->base = base;
241 } 226 }
242 } 227 }
243 228
244 timer->expires = expires; 229 timer->expires = expires;
245 internal_add_timer(new_base, timer); 230 internal_add_timer(base, timer);
246 spin_unlock_irqrestore(&new_base->t_base.lock, flags); 231 spin_unlock_irqrestore(&base->lock, flags);
247 232
248 return ret; 233 return ret;
249} 234}
@@ -263,10 +248,10 @@ void add_timer_on(struct timer_list *timer, int cpu)
263 unsigned long flags; 248 unsigned long flags;
264 249
265 BUG_ON(timer_pending(timer) || !timer->function); 250 BUG_ON(timer_pending(timer) || !timer->function);
266 spin_lock_irqsave(&base->t_base.lock, flags); 251 spin_lock_irqsave(&base->lock, flags);
267 timer->base = &base->t_base; 252 timer->base = base;
268 internal_add_timer(base, timer); 253 internal_add_timer(base, timer);
269 spin_unlock_irqrestore(&base->t_base.lock, flags); 254 spin_unlock_irqrestore(&base->lock, flags);
270} 255}
271 256
272 257
@@ -319,7 +304,7 @@ EXPORT_SYMBOL(mod_timer);
319 */ 304 */
320int del_timer(struct timer_list *timer) 305int del_timer(struct timer_list *timer)
321{ 306{
322 timer_base_t *base; 307 tvec_base_t *base;
323 unsigned long flags; 308 unsigned long flags;
324 int ret = 0; 309 int ret = 0;
325 310
@@ -346,7 +331,7 @@ EXPORT_SYMBOL(del_timer);
346 */ 331 */
347int try_to_del_timer_sync(struct timer_list *timer) 332int try_to_del_timer_sync(struct timer_list *timer)
348{ 333{
349 timer_base_t *base; 334 tvec_base_t *base;
350 unsigned long flags; 335 unsigned long flags;
351 int ret = -1; 336 int ret = -1;
352 337
@@ -410,7 +395,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
410 struct timer_list *tmp; 395 struct timer_list *tmp;
411 396
412 tmp = list_entry(curr, struct timer_list, entry); 397 tmp = list_entry(curr, struct timer_list, entry);
413 BUG_ON(tmp->base != &base->t_base); 398 BUG_ON(tmp->base != base);
414 curr = curr->next; 399 curr = curr->next;
415 internal_add_timer(base, tmp); 400 internal_add_timer(base, tmp);
416 } 401 }
@@ -432,7 +417,7 @@ static inline void __run_timers(tvec_base_t *base)
432{ 417{
433 struct timer_list *timer; 418 struct timer_list *timer;
434 419
435 spin_lock_irq(&base->t_base.lock); 420 spin_lock_irq(&base->lock);
436 while (time_after_eq(jiffies, base->timer_jiffies)) { 421 while (time_after_eq(jiffies, base->timer_jiffies)) {
437 struct list_head work_list = LIST_HEAD_INIT(work_list); 422 struct list_head work_list = LIST_HEAD_INIT(work_list);
438 struct list_head *head = &work_list; 423 struct list_head *head = &work_list;
@@ -458,7 +443,7 @@ static inline void __run_timers(tvec_base_t *base)
458 443
459 set_running_timer(base, timer); 444 set_running_timer(base, timer);
460 detach_timer(timer, 1); 445 detach_timer(timer, 1);
461 spin_unlock_irq(&base->t_base.lock); 446 spin_unlock_irq(&base->lock);
462 { 447 {
463 int preempt_count = preempt_count(); 448 int preempt_count = preempt_count();
464 fn(data); 449 fn(data);
@@ -471,11 +456,11 @@ static inline void __run_timers(tvec_base_t *base)
471 BUG(); 456 BUG();
472 } 457 }
473 } 458 }
474 spin_lock_irq(&base->t_base.lock); 459 spin_lock_irq(&base->lock);
475 } 460 }
476 } 461 }
477 set_running_timer(base, NULL); 462 set_running_timer(base, NULL);
478 spin_unlock_irq(&base->t_base.lock); 463 spin_unlock_irq(&base->lock);
479} 464}
480 465
481#ifdef CONFIG_NO_IDLE_HZ 466#ifdef CONFIG_NO_IDLE_HZ
@@ -506,7 +491,7 @@ unsigned long next_timer_interrupt(void)
506 hr_expires += jiffies; 491 hr_expires += jiffies;
507 492
508 base = __get_cpu_var(tvec_bases); 493 base = __get_cpu_var(tvec_bases);
509 spin_lock(&base->t_base.lock); 494 spin_lock(&base->lock);
510 expires = base->timer_jiffies + (LONG_MAX >> 1); 495 expires = base->timer_jiffies + (LONG_MAX >> 1);
511 list = NULL; 496 list = NULL;
512 497
@@ -554,7 +539,7 @@ found:
554 expires = nte->expires; 539 expires = nte->expires;
555 } 540 }
556 } 541 }
557 spin_unlock(&base->t_base.lock); 542 spin_unlock(&base->lock);
558 543
559 if (time_before(hr_expires, expires)) 544 if (time_before(hr_expires, expires))
560 return hr_expires; 545 return hr_expires;
@@ -841,7 +826,7 @@ void update_process_times(int user_tick)
841 */ 826 */
842static unsigned long count_active_tasks(void) 827static unsigned long count_active_tasks(void)
843{ 828{
844 return (nr_running() + nr_uninterruptible()) * FIXED_1; 829 return nr_active() * FIXED_1;
845} 830}
846 831
847/* 832/*
@@ -1240,29 +1225,37 @@ static int __devinit init_timers_cpu(int cpu)
1240{ 1225{
1241 int j; 1226 int j;
1242 tvec_base_t *base; 1227 tvec_base_t *base;
1228 static char __devinitdata tvec_base_done[NR_CPUS];
1243 1229
1244 base = per_cpu(tvec_bases, cpu); 1230 if (!tvec_base_done[cpu]) {
1245 if (!base) {
1246 static char boot_done; 1231 static char boot_done;
1247 1232
1248 /*
1249 * Cannot do allocation in init_timers as that runs before the
1250 * allocator initializes (and would waste memory if there are
1251 * more possible CPUs than will ever be installed/brought up).
1252 */
1253 if (boot_done) { 1233 if (boot_done) {
1234 /*
1235 * The APs use this path later in boot
1236 */
1254 base = kmalloc_node(sizeof(*base), GFP_KERNEL, 1237 base = kmalloc_node(sizeof(*base), GFP_KERNEL,
1255 cpu_to_node(cpu)); 1238 cpu_to_node(cpu));
1256 if (!base) 1239 if (!base)
1257 return -ENOMEM; 1240 return -ENOMEM;
1258 memset(base, 0, sizeof(*base)); 1241 memset(base, 0, sizeof(*base));
1242 per_cpu(tvec_bases, cpu) = base;
1259 } else { 1243 } else {
1260 base = &boot_tvec_bases; 1244 /*
1245 * This is for the boot CPU - we use compile-time
1246 * static initialisation because per-cpu memory isn't
1247 * ready yet and because the memory allocators are not
1248 * initialised either.
1249 */
1261 boot_done = 1; 1250 boot_done = 1;
1251 base = &boot_tvec_bases;
1262 } 1252 }
1263 per_cpu(tvec_bases, cpu) = base; 1253 tvec_base_done[cpu] = 1;
1254 } else {
1255 base = per_cpu(tvec_bases, cpu);
1264 } 1256 }
1265 spin_lock_init(&base->t_base.lock); 1257
1258 spin_lock_init(&base->lock);
1266 for (j = 0; j < TVN_SIZE; j++) { 1259 for (j = 0; j < TVN_SIZE; j++) {
1267 INIT_LIST_HEAD(base->tv5.vec + j); 1260 INIT_LIST_HEAD(base->tv5.vec + j);
1268 INIT_LIST_HEAD(base->tv4.vec + j); 1261 INIT_LIST_HEAD(base->tv4.vec + j);
@@ -1284,7 +1277,7 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
1284 while (!list_empty(head)) { 1277 while (!list_empty(head)) {
1285 timer = list_entry(head->next, struct timer_list, entry); 1278 timer = list_entry(head->next, struct timer_list, entry);
1286 detach_timer(timer, 0); 1279 detach_timer(timer, 0);
1287 timer->base = &new_base->t_base; 1280 timer->base = new_base;
1288 internal_add_timer(new_base, timer); 1281 internal_add_timer(new_base, timer);
1289 } 1282 }
1290} 1283}
@@ -1300,11 +1293,11 @@ static void __devinit migrate_timers(int cpu)
1300 new_base = get_cpu_var(tvec_bases); 1293 new_base = get_cpu_var(tvec_bases);
1301 1294
1302 local_irq_disable(); 1295 local_irq_disable();
1303 spin_lock(&new_base->t_base.lock); 1296 spin_lock(&new_base->lock);
1304 spin_lock(&old_base->t_base.lock); 1297 spin_lock(&old_base->lock);
1298
1299 BUG_ON(old_base->running_timer);
1305 1300
1306 if (old_base->t_base.running_timer)
1307 BUG();
1308 for (i = 0; i < TVR_SIZE; i++) 1301 for (i = 0; i < TVR_SIZE; i++)
1309 migrate_timer_list(new_base, old_base->tv1.vec + i); 1302 migrate_timer_list(new_base, old_base->tv1.vec + i);
1310 for (i = 0; i < TVN_SIZE; i++) { 1303 for (i = 0; i < TVN_SIZE; i++) {
@@ -1314,8 +1307,8 @@ static void __devinit migrate_timers(int cpu)
1314 migrate_timer_list(new_base, old_base->tv5.vec + i); 1307 migrate_timer_list(new_base, old_base->tv5.vec + i);
1315 } 1308 }
1316 1309
1317 spin_unlock(&old_base->t_base.lock); 1310 spin_unlock(&old_base->lock);
1318 spin_unlock(&new_base->t_base.lock); 1311 spin_unlock(&new_base->lock);
1319 local_irq_enable(); 1312 local_irq_enable();
1320 put_cpu_var(tvec_bases); 1313 put_cpu_var(tvec_bases);
1321} 1314}
@@ -1471,7 +1464,7 @@ static void time_interpolator_update(long delta_nsec)
1471 */ 1464 */
1472 if (jiffies % INTERPOLATOR_ADJUST == 0) 1465 if (jiffies % INTERPOLATOR_ADJUST == 0)
1473 { 1466 {
1474 if (time_interpolator->skips == 0 && time_interpolator->offset > TICK_NSEC) 1467 if (time_interpolator->skips == 0 && time_interpolator->offset > tick_nsec)
1475 time_interpolator->nsec_per_cyc--; 1468 time_interpolator->nsec_per_cyc--;
1476 if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) 1469 if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0)
1477 time_interpolator->nsec_per_cyc++; 1470 time_interpolator->nsec_per_cyc++;
@@ -1495,8 +1488,7 @@ register_time_interpolator(struct time_interpolator *ti)
1495 unsigned long flags; 1488 unsigned long flags;
1496 1489
1497 /* Sanity check */ 1490 /* Sanity check */
1498 if (ti->frequency == 0 || ti->mask == 0) 1491 BUG_ON(ti->frequency == 0 || ti->mask == 0);
1499 BUG();
1500 1492
1501 ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; 1493 ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency;
1502 spin_lock(&time_interpolator_lock); 1494 spin_lock(&time_interpolator_lock);