aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/compat.c1
-rw-r--r--kernel/cpuset.c37
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c415
-rw-r--r--kernel/hrtimer.c106
-rw-r--r--kernel/intermodule.c3
-rw-r--r--kernel/itimer.c11
-rw-r--r--kernel/kprobes.c36
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/posix-timers.c53
-rw-r--r--kernel/power/console.c16
-rw-r--r--kernel/power/disk.c15
-rw-r--r--kernel/power/main.c4
-rw-r--r--kernel/power/power.h15
-rw-r--r--kernel/power/snapshot.c4
-rw-r--r--kernel/power/swsusp.c18
-rw-r--r--kernel/ptrace.c28
-rw-r--r--kernel/rcupdate.c76
-rw-r--r--kernel/rcutorture.c10
-rw-r--r--kernel/sched.c186
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/sys.c27
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c47
-rw-r--r--kernel/time.c15
-rw-r--r--kernel/timer.c63
-rw-r--r--kernel/user.c32
29 files changed, 817 insertions, 430 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 685c25175d..d7e7e637b9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -841,7 +841,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
841 841
842 for (aux = context->aux; aux; aux = aux->next) { 842 for (aux = context->aux; aux; aux = aux->next) {
843 843
844 ab = audit_log_start(context, GFP_KERNEL, aux->type); 844 ab = audit_log_start(context, gfp_mask, aux->type);
845 if (!ab) 845 if (!ab)
846 continue; /* audit_panic has been called */ 846 continue; /* audit_panic has been called */
847 847
@@ -878,14 +878,14 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
878 } 878 }
879 879
880 if (context->pwd && context->pwdmnt) { 880 if (context->pwd && context->pwdmnt) {
881 ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); 881 ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
882 if (ab) { 882 if (ab) {
883 audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); 883 audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
884 audit_log_end(ab); 884 audit_log_end(ab);
885 } 885 }
886 } 886 }
887 for (i = 0; i < context->name_count; i++) { 887 for (i = 0; i < context->name_count; i++) {
888 ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); 888 ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
889 if (!ab) 889 if (!ab)
890 continue; /* audit_panic has been called */ 890 continue; /* audit_panic has been called */
891 891
diff --git a/kernel/compat.c b/kernel/compat.c
index 1867290c37..8c9cd88b67 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,7 +23,6 @@
23#include <linux/security.h> 23#include <linux/security.h>
24 24
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/bug.h>
27 26
28int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) 27int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
29{ 28{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fe2f71f92a..12815d3f1a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -641,7 +641,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
641 * task has been modifying its cpuset. 641 * task has been modifying its cpuset.
642 */ 642 */
643 643
644void cpuset_update_task_memory_state() 644void cpuset_update_task_memory_state(void)
645{ 645{
646 int my_cpusets_mem_gen; 646 int my_cpusets_mem_gen;
647 struct task_struct *tsk = current; 647 struct task_struct *tsk = current;
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child)
1977 * We don't need to task_lock() this reference to tsk->cpuset, 1977 * We don't need to task_lock() this reference to tsk->cpuset,
1978 * because tsk is already marked PF_EXITING, so attach_task() won't 1978 * because tsk is already marked PF_EXITING, so attach_task() won't
1979 * mess with it, or task is a failed fork, never visible to attach_task. 1979 * mess with it, or task is a failed fork, never visible to attach_task.
1980 *
1981 * Hack:
1982 *
1983 * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
1984 *
1985 * Don't leave a task unable to allocate memory, as that is an
1986 * accident waiting to happen should someone add a callout in
1987 * do_exit() after the cpuset_exit() call that might allocate.
1988 * If a task tries to allocate memory with an invalid cpuset,
1989 * it will oops in cpuset_update_task_memory_state().
1990 *
1991 * We call cpuset_exit() while the task is still competent to
1992 * handle notify_on_release(), then leave the task attached to
1993 * the root cpuset (top_cpuset) for the remainder of its exit.
1994 *
1995 * To do this properly, we would increment the reference count on
1996 * top_cpuset, and near the very end of the kernel/exit.c do_exit()
1997 * code we would add a second cpuset function call, to drop that
1998 * reference. This would just create an unnecessary hot spot on
1999 * the top_cpuset reference count, to no avail.
2000 *
2001 * Normally, holding a reference to a cpuset without bumping its
2002 * count is unsafe. The cpuset could go away, or someone could
2003 * attach us to a different cpuset, decrementing the count on
2004 * the first cpuset that we never incremented. But in this case,
2005 * top_cpuset isn't going away, and either task has PF_EXITING set,
2006 * which wards off any attach_task() attempts, or task is a failed
2007 * fork, never visible to attach_task.
2008 *
2009 * Another way to do this would be to set the cpuset pointer
2010 * to NULL here, and check in cpuset_update_task_memory_state()
2011 * for a NULL pointer. This hack avoids that NULL check, for no
2012 * cost (other than this way too long comment ;).
1980 **/ 2013 **/
1981 2014
1982void cpuset_exit(struct task_struct *tsk) 2015void cpuset_exit(struct task_struct *tsk)
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk)
1984 struct cpuset *cs; 2017 struct cpuset *cs;
1985 2018
1986 cs = tsk->cpuset; 2019 cs = tsk->cpuset;
1987 tsk->cpuset = NULL; 2020 tsk->cpuset = &top_cpuset; /* Hack - see comment above */
1988 2021
1989 if (notify_on_release(cs)) { 2022 if (notify_on_release(cs)) {
1990 char *pathbuf = NULL; 2023 char *pathbuf = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 93cee36713..531aadca55 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -360,6 +360,9 @@ void daemonize(const char *name, ...)
360 fs = init_task.fs; 360 fs = init_task.fs;
361 current->fs = fs; 361 current->fs = fs;
362 atomic_inc(&fs->count); 362 atomic_inc(&fs->count);
363 exit_namespace(current);
364 current->namespace = init_task.namespace;
365 get_namespace(current->namespace);
363 exit_files(current); 366 exit_files(current);
364 current->files = init_task.files; 367 current->files = init_task.files;
365 atomic_inc(&current->files->count); 368 atomic_inc(&current->files->count);
diff --git a/kernel/fork.c b/kernel/fork.c
index 4ae8cfc1c8..ccdfbb16c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,8 +108,10 @@ void free_task(struct task_struct *tsk)
108} 108}
109EXPORT_SYMBOL(free_task); 109EXPORT_SYMBOL(free_task);
110 110
111void __put_task_struct(struct task_struct *tsk) 111void __put_task_struct_cb(struct rcu_head *rhp)
112{ 112{
113 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
114
113 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 115 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
114 WARN_ON(atomic_read(&tsk->usage)); 116 WARN_ON(atomic_read(&tsk->usage));
115 WARN_ON(tsk == current); 117 WARN_ON(tsk == current);
@@ -446,6 +448,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
446 } 448 }
447} 449}
448 450
451/*
452 * Allocate a new mm structure and copy contents from the
453 * mm structure of the passed in task structure.
454 */
455static struct mm_struct *dup_mm(struct task_struct *tsk)
456{
457 struct mm_struct *mm, *oldmm = current->mm;
458 int err;
459
460 if (!oldmm)
461 return NULL;
462
463 mm = allocate_mm();
464 if (!mm)
465 goto fail_nomem;
466
467 memcpy(mm, oldmm, sizeof(*mm));
468
469 if (!mm_init(mm))
470 goto fail_nomem;
471
472 if (init_new_context(tsk, mm))
473 goto fail_nocontext;
474
475 err = dup_mmap(mm, oldmm);
476 if (err)
477 goto free_pt;
478
479 mm->hiwater_rss = get_mm_rss(mm);
480 mm->hiwater_vm = mm->total_vm;
481
482 return mm;
483
484free_pt:
485 mmput(mm);
486
487fail_nomem:
488 return NULL;
489
490fail_nocontext:
491 /*
492 * If init_new_context() failed, we cannot use mmput() to free the mm
493 * because it calls destroy_context()
494 */
495 mm_free_pgd(mm);
496 free_mm(mm);
497 return NULL;
498}
499
449static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 500static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
450{ 501{
451 struct mm_struct * mm, *oldmm; 502 struct mm_struct * mm, *oldmm;
@@ -473,43 +524,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
473 } 524 }
474 525
475 retval = -ENOMEM; 526 retval = -ENOMEM;
476 mm = allocate_mm(); 527 mm = dup_mm(tsk);
477 if (!mm) 528 if (!mm)
478 goto fail_nomem; 529 goto fail_nomem;
479 530
480 /* Copy the current MM stuff.. */
481 memcpy(mm, oldmm, sizeof(*mm));
482 if (!mm_init(mm))
483 goto fail_nomem;
484
485 if (init_new_context(tsk,mm))
486 goto fail_nocontext;
487
488 retval = dup_mmap(mm, oldmm);
489 if (retval)
490 goto free_pt;
491
492 mm->hiwater_rss = get_mm_rss(mm);
493 mm->hiwater_vm = mm->total_vm;
494
495good_mm: 531good_mm:
496 tsk->mm = mm; 532 tsk->mm = mm;
497 tsk->active_mm = mm; 533 tsk->active_mm = mm;
498 return 0; 534 return 0;
499 535
500free_pt:
501 mmput(mm);
502fail_nomem: 536fail_nomem:
503 return retval; 537 return retval;
504
505fail_nocontext:
506 /*
507 * If init_new_context() failed, we cannot use mmput() to free the mm
508 * because it calls destroy_context()
509 */
510 mm_free_pgd(mm);
511 free_mm(mm);
512 return retval;
513} 538}
514 539
515static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 540static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -597,32 +622,17 @@ out:
597 return newf; 622 return newf;
598} 623}
599 624
600static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 625/*
626 * Allocate a new files structure and copy contents from the
627 * passed in files structure.
628 */
629static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
601{ 630{
602 struct files_struct *oldf, *newf; 631 struct files_struct *newf;
603 struct file **old_fds, **new_fds; 632 struct file **old_fds, **new_fds;
604 int open_files, size, i, error = 0, expand; 633 int open_files, size, i, expand;
605 struct fdtable *old_fdt, *new_fdt; 634 struct fdtable *old_fdt, *new_fdt;
606 635
607 /*
608 * A background process may not have any files ...
609 */
610 oldf = current->files;
611 if (!oldf)
612 goto out;
613
614 if (clone_flags & CLONE_FILES) {
615 atomic_inc(&oldf->count);
616 goto out;
617 }
618
619 /*
620 * Note: we may be using current for both targets (See exec.c)
621 * This works because we cache current->files (old) as oldf. Don't
622 * break this.
623 */
624 tsk->files = NULL;
625 error = -ENOMEM;
626 newf = alloc_files(); 636 newf = alloc_files();
627 if (!newf) 637 if (!newf)
628 goto out; 638 goto out;
@@ -651,9 +661,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
651 if (expand) { 661 if (expand) {
652 spin_unlock(&oldf->file_lock); 662 spin_unlock(&oldf->file_lock);
653 spin_lock(&newf->file_lock); 663 spin_lock(&newf->file_lock);
654 error = expand_files(newf, open_files-1); 664 *errorp = expand_files(newf, open_files-1);
655 spin_unlock(&newf->file_lock); 665 spin_unlock(&newf->file_lock);
656 if (error < 0) 666 if (*errorp < 0)
657 goto out_release; 667 goto out_release;
658 new_fdt = files_fdtable(newf); 668 new_fdt = files_fdtable(newf);
659 /* 669 /*
@@ -702,10 +712,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
702 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 712 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
703 } 713 }
704 714
705 tsk->files = newf;
706 error = 0;
707out: 715out:
708 return error; 716 return newf;
709 717
710out_release: 718out_release:
711 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); 719 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
@@ -715,6 +723,40 @@ out_release:
715 goto out; 723 goto out;
716} 724}
717 725
726static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
727{
728 struct files_struct *oldf, *newf;
729 int error = 0;
730
731 /*
732 * A background process may not have any files ...
733 */
734 oldf = current->files;
735 if (!oldf)
736 goto out;
737
738 if (clone_flags & CLONE_FILES) {
739 atomic_inc(&oldf->count);
740 goto out;
741 }
742
743 /*
744 * Note: we may be using current for both targets (See exec.c)
745 * This works because we cache current->files (old) as oldf. Don't
746 * break this.
747 */
748 tsk->files = NULL;
749 error = -ENOMEM;
750 newf = dup_fd(oldf, &error);
751 if (!newf)
752 goto out;
753
754 tsk->files = newf;
755 error = 0;
756out:
757 return error;
758}
759
718/* 760/*
719 * Helper to unshare the files of the current task. 761 * Helper to unshare the files of the current task.
720 * We don't want to expose copy_files internals to 762 * We don't want to expose copy_files internals to
@@ -802,7 +844,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
802 init_sigpending(&sig->shared_pending); 844 init_sigpending(&sig->shared_pending);
803 INIT_LIST_HEAD(&sig->posix_timers); 845 INIT_LIST_HEAD(&sig->posix_timers);
804 846
805 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); 847 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
806 sig->it_real_incr.tv64 = 0; 848 sig->it_real_incr.tv64 = 0;
807 sig->real_timer.function = it_real_fn; 849 sig->real_timer.function = it_real_fn;
808 sig->real_timer.data = tsk; 850 sig->real_timer.data = tsk;
@@ -1020,6 +1062,12 @@ static task_t *copy_process(unsigned long clone_flags,
1020 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1062 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1021 1063
1022 /* 1064 /*
1065 * sigaltstack should be cleared when sharing the same VM
1066 */
1067 if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
1068 p->sas_ss_sp = p->sas_ss_size = 0;
1069
1070 /*
1023 * Syscall tracing should be turned off in the child regardless 1071 * Syscall tracing should be turned off in the child regardless
1024 * of CLONE_PTRACE. 1072 * of CLONE_PTRACE.
1025 */ 1073 */
@@ -1083,8 +1131,8 @@ static task_t *copy_process(unsigned long clone_flags,
1083 p->real_parent = current; 1131 p->real_parent = current;
1084 p->parent = p->real_parent; 1132 p->parent = p->real_parent;
1085 1133
1134 spin_lock(&current->sighand->siglock);
1086 if (clone_flags & CLONE_THREAD) { 1135 if (clone_flags & CLONE_THREAD) {
1087 spin_lock(&current->sighand->siglock);
1088 /* 1136 /*
1089 * Important: if an exit-all has been started then 1137 * Important: if an exit-all has been started then
1090 * do not create this new thread - the whole thread 1138 * do not create this new thread - the whole thread
@@ -1122,8 +1170,6 @@ static task_t *copy_process(unsigned long clone_flags,
1122 */ 1170 */
1123 p->it_prof_expires = jiffies_to_cputime(1); 1171 p->it_prof_expires = jiffies_to_cputime(1);
1124 } 1172 }
1125
1126 spin_unlock(&current->sighand->siglock);
1127 } 1173 }
1128 1174
1129 /* 1175 /*
@@ -1135,8 +1181,6 @@ static task_t *copy_process(unsigned long clone_flags,
1135 if (unlikely(p->ptrace & PT_PTRACED)) 1181 if (unlikely(p->ptrace & PT_PTRACED))
1136 __ptrace_link(p, current->parent); 1182 __ptrace_link(p, current->parent);
1137 1183
1138 attach_pid(p, PIDTYPE_PID, p->pid);
1139 attach_pid(p, PIDTYPE_TGID, p->tgid);
1140 if (thread_group_leader(p)) { 1184 if (thread_group_leader(p)) {
1141 p->signal->tty = current->signal->tty; 1185 p->signal->tty = current->signal->tty;
1142 p->signal->pgrp = process_group(current); 1186 p->signal->pgrp = process_group(current);
@@ -1146,9 +1190,12 @@ static task_t *copy_process(unsigned long clone_flags,
1146 if (p->pid) 1190 if (p->pid)
1147 __get_cpu_var(process_counts)++; 1191 __get_cpu_var(process_counts)++;
1148 } 1192 }
1193 attach_pid(p, PIDTYPE_TGID, p->tgid);
1194 attach_pid(p, PIDTYPE_PID, p->pid);
1149 1195
1150 nr_threads++; 1196 nr_threads++;
1151 total_forks++; 1197 total_forks++;
1198 spin_unlock(&current->sighand->siglock);
1152 write_unlock_irq(&tasklist_lock); 1199 write_unlock_irq(&tasklist_lock);
1153 proc_fork_connector(p); 1200 proc_fork_connector(p);
1154 return p; 1201 return p;
@@ -1323,3 +1370,249 @@ void __init proc_caches_init(void)
1323 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1370 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1324 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1371 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1325} 1372}
1373
1374
1375/*
1376 * Check constraints on flags passed to the unshare system call and
1377 * force unsharing of additional process context as appropriate.
1378 */
1379static inline void check_unshare_flags(unsigned long *flags_ptr)
1380{
1381 /*
1382 * If unsharing a thread from a thread group, must also
1383 * unshare vm.
1384 */
1385 if (*flags_ptr & CLONE_THREAD)
1386 *flags_ptr |= CLONE_VM;
1387
1388 /*
1389 * If unsharing vm, must also unshare signal handlers.
1390 */
1391 if (*flags_ptr & CLONE_VM)
1392 *flags_ptr |= CLONE_SIGHAND;
1393
1394 /*
1395 * If unsharing signal handlers and the task was created
1396 * using CLONE_THREAD, then must unshare the thread
1397 */
1398 if ((*flags_ptr & CLONE_SIGHAND) &&
1399 (atomic_read(&current->signal->count) > 1))
1400 *flags_ptr |= CLONE_THREAD;
1401
1402 /*
1403 * If unsharing namespace, must also unshare filesystem information.
1404 */
1405 if (*flags_ptr & CLONE_NEWNS)
1406 *flags_ptr |= CLONE_FS;
1407}
1408
1409/*
1410 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1411 */
1412static int unshare_thread(unsigned long unshare_flags)
1413{
1414 if (unshare_flags & CLONE_THREAD)
1415 return -EINVAL;
1416
1417 return 0;
1418}
1419
1420/*
1421 * Unshare the filesystem structure if it is being shared
1422 */
1423static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1424{
1425 struct fs_struct *fs = current->fs;
1426
1427 if ((unshare_flags & CLONE_FS) &&
1428 (fs && atomic_read(&fs->count) > 1)) {
1429 *new_fsp = __copy_fs_struct(current->fs);
1430 if (!*new_fsp)
1431 return -ENOMEM;
1432 }
1433
1434 return 0;
1435}
1436
1437/*
1438 * Unshare the namespace structure if it is being shared
1439 */
1440static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
1441{
1442 struct namespace *ns = current->namespace;
1443
1444 if ((unshare_flags & CLONE_NEWNS) &&
1445 (ns && atomic_read(&ns->count) > 1)) {
1446 if (!capable(CAP_SYS_ADMIN))
1447 return -EPERM;
1448
1449 *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
1450 if (!*new_nsp)
1451 return -ENOMEM;
1452 }
1453
1454 return 0;
1455}
1456
1457/*
1458 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
1459 * supported yet
1460 */
1461static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1462{
1463 struct sighand_struct *sigh = current->sighand;
1464
1465 if ((unshare_flags & CLONE_SIGHAND) &&
1466 (sigh && atomic_read(&sigh->count) > 1))
1467 return -EINVAL;
1468 else
1469 return 0;
1470}
1471
1472/*
1473 * Unshare vm if it is being shared
1474 */
1475static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1476{
1477 struct mm_struct *mm = current->mm;
1478
1479 if ((unshare_flags & CLONE_VM) &&
1480 (mm && atomic_read(&mm->mm_users) > 1)) {
1481 *new_mmp = dup_mm(current);
1482 if (!*new_mmp)
1483 return -ENOMEM;
1484 }
1485
1486 return 0;
1487}
1488
1489/*
1490 * Unshare file descriptor table if it is being shared
1491 */
1492static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1493{
1494 struct files_struct *fd = current->files;
1495 int error = 0;
1496
1497 if ((unshare_flags & CLONE_FILES) &&
1498 (fd && atomic_read(&fd->count) > 1)) {
1499 *new_fdp = dup_fd(fd, &error);
1500 if (!*new_fdp)
1501 return error;
1502 }
1503
1504 return 0;
1505}
1506
1507/*
1508 * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
1509 * supported yet
1510 */
1511static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
1512{
1513 if (unshare_flags & CLONE_SYSVSEM)
1514 return -EINVAL;
1515
1516 return 0;
1517}
1518
1519/*
1520 * unshare allows a process to 'unshare' part of the process
1521 * context which was originally shared using clone. copy_*
1522 * functions used by do_fork() cannot be used here directly
1523 * because they modify an inactive task_struct that is being
1524 * constructed. Here we are modifying the current, active,
1525 * task_struct.
1526 */
1527asmlinkage long sys_unshare(unsigned long unshare_flags)
1528{
1529 int err = 0;
1530 struct fs_struct *fs, *new_fs = NULL;
1531 struct namespace *ns, *new_ns = NULL;
1532 struct sighand_struct *sigh, *new_sigh = NULL;
1533 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1534 struct files_struct *fd, *new_fd = NULL;
1535 struct sem_undo_list *new_ulist = NULL;
1536
1537 check_unshare_flags(&unshare_flags);
1538
1539 if ((err = unshare_thread(unshare_flags)))
1540 goto bad_unshare_out;
1541 if ((err = unshare_fs(unshare_flags, &new_fs)))
1542 goto bad_unshare_cleanup_thread;
1543 if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
1544 goto bad_unshare_cleanup_fs;
1545 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1546 goto bad_unshare_cleanup_ns;
1547 if ((err = unshare_vm(unshare_flags, &new_mm)))
1548 goto bad_unshare_cleanup_sigh;
1549 if ((err = unshare_fd(unshare_flags, &new_fd)))
1550 goto bad_unshare_cleanup_vm;
1551 if ((err = unshare_semundo(unshare_flags, &new_ulist)))
1552 goto bad_unshare_cleanup_fd;
1553
1554 if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
1555
1556 task_lock(current);
1557
1558 if (new_fs) {
1559 fs = current->fs;
1560 current->fs = new_fs;
1561 new_fs = fs;
1562 }
1563
1564 if (new_ns) {
1565 ns = current->namespace;
1566 current->namespace = new_ns;
1567 new_ns = ns;
1568 }
1569
1570 if (new_sigh) {
1571 sigh = current->sighand;
1572 current->sighand = new_sigh;
1573 new_sigh = sigh;
1574 }
1575
1576 if (new_mm) {
1577 mm = current->mm;
1578 active_mm = current->active_mm;
1579 current->mm = new_mm;
1580 current->active_mm = new_mm;
1581 activate_mm(active_mm, new_mm);
1582 new_mm = mm;
1583 }
1584
1585 if (new_fd) {
1586 fd = current->files;
1587 current->files = new_fd;
1588 new_fd = fd;
1589 }
1590
1591 task_unlock(current);
1592 }
1593
1594bad_unshare_cleanup_fd:
1595 if (new_fd)
1596 put_files_struct(new_fd);
1597
1598bad_unshare_cleanup_vm:
1599 if (new_mm)
1600 mmput(new_mm);
1601
1602bad_unshare_cleanup_sigh:
1603 if (new_sigh)
1604 if (atomic_dec_and_test(&new_sigh->count))
1605 kmem_cache_free(sighand_cachep, new_sigh);
1606
1607bad_unshare_cleanup_ns:
1608 if (new_ns)
1609 put_namespace(new_ns);
1610
1611bad_unshare_cleanup_fs:
1612 if (new_fs)
1613 put_fs_struct(new_fs);
1614
1615bad_unshare_cleanup_thread:
1616bad_unshare_out:
1617 return err;
1618}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f1c4155b49..14bc9cfa63 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -21,6 +21,12 @@
21 * Credits: 21 * Credits:
22 * based on kernel/timer.c 22 * based on kernel/timer.c
23 * 23 *
24 * Help, testing, suggestions, bugfixes, improvements were
25 * provided by:
26 *
27 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
28 * et. al.
29 *
24 * For licencing details see kernel-base/COPYING 30 * For licencing details see kernel-base/COPYING
25 */ 31 */
26 32
@@ -66,6 +72,12 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
66 72
67/* 73/*
68 * The timer bases: 74 * The timer bases:
75 *
76 * Note: If we want to add new timer bases, we have to skip the two
77 * clock ids captured by the cpu-timers. We do this by holding empty
78 * entries rather than doing math adjustment of the clock ids.
79 * This ensures that we capture erroneous accesses to these clock ids
80 * rather than moving them into the range of valid clock id's.
69 */ 81 */
70 82
71#define MAX_HRTIMER_BASES 2 83#define MAX_HRTIMER_BASES 2
@@ -406,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
406 /* Switch the timer base, if necessary: */ 418 /* Switch the timer base, if necessary: */
407 new_base = switch_hrtimer_base(timer, base); 419 new_base = switch_hrtimer_base(timer, base);
408 420
409 if (mode == HRTIMER_REL) 421 if (mode == HRTIMER_REL) {
410 tim = ktime_add(tim, new_base->get_time()); 422 tim = ktime_add(tim, new_base->get_time());
423 /*
424 * CONFIG_TIME_LOW_RES is a temporary way for architectures
425 * to signal that they simply return xtime in
426 * do_gettimeoffset(). In this case we want to round up by
427 * resolution when starting a relative timer, to avoid short
428 * timeouts. This will go away with the GTOD framework.
429 */
430#ifdef CONFIG_TIME_LOW_RES
431 tim = ktime_add(tim, base->resolution);
432#endif
433 }
411 timer->expires = tim; 434 timer->expires = tim;
412 435
413 enqueue_hrtimer(timer, new_base); 436 enqueue_hrtimer(timer, new_base);
@@ -482,30 +505,61 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
482 return rem; 505 return rem;
483} 506}
484 507
508#ifdef CONFIG_NO_IDLE_HZ
485/** 509/**
486 * hrtimer_rebase - rebase an initialized hrtimer to a different base 510 * hrtimer_get_next_event - get the time until next expiry event
487 * 511 *
488 * @timer: the timer to be rebased 512 * Returns the delta to the next expiry event or KTIME_MAX if no timer
489 * @clock_id: the clock to be used 513 * is pending.
490 */ 514 */
491void hrtimer_rebase(struct hrtimer *timer, const clockid_t clock_id) 515ktime_t hrtimer_get_next_event(void)
492{ 516{
493 struct hrtimer_base *bases; 517 struct hrtimer_base *base = __get_cpu_var(hrtimer_bases);
518 ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
519 unsigned long flags;
520 int i;
494 521
495 bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); 522 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
496 timer->base = &bases[clock_id]; 523 struct hrtimer *timer;
524
525 spin_lock_irqsave(&base->lock, flags);
526 if (!base->first) {
527 spin_unlock_irqrestore(&base->lock, flags);
528 continue;
529 }
530 timer = rb_entry(base->first, struct hrtimer, node);
531 delta.tv64 = timer->expires.tv64;
532 spin_unlock_irqrestore(&base->lock, flags);
533 delta = ktime_sub(delta, base->get_time());
534 if (delta.tv64 < mindelta.tv64)
535 mindelta.tv64 = delta.tv64;
536 }
537 if (mindelta.tv64 < 0)
538 mindelta.tv64 = 0;
539 return mindelta;
497} 540}
541#endif
498 542
499/** 543/**
500 * hrtimer_init - initialize a timer to the given clock 544 * hrtimer_init - initialize a timer to the given clock
501 * 545 *
502 * @timer: the timer to be initialized 546 * @timer: the timer to be initialized
503 * @clock_id: the clock to be used 547 * @clock_id: the clock to be used
548 * @mode: timer mode abs/rel
504 */ 549 */
505void hrtimer_init(struct hrtimer *timer, const clockid_t clock_id) 550void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
551 enum hrtimer_mode mode)
506{ 552{
553 struct hrtimer_base *bases;
554
507 memset(timer, 0, sizeof(struct hrtimer)); 555 memset(timer, 0, sizeof(struct hrtimer));
508 hrtimer_rebase(timer, clock_id); 556
557 bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
558
559 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS)
560 clock_id = CLOCK_MONOTONIC;
561
562 timer->base = &bases[clock_id];
509} 563}
510 564
511/** 565/**
@@ -550,6 +604,7 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
550 fn = timer->function; 604 fn = timer->function;
551 data = timer->data; 605 data = timer->data;
552 set_curr_timer(base, timer); 606 set_curr_timer(base, timer);
607 timer->state = HRTIMER_RUNNING;
553 __remove_hrtimer(timer, base); 608 __remove_hrtimer(timer, base);
554 spin_unlock_irq(&base->lock); 609 spin_unlock_irq(&base->lock);
555 610
@@ -565,6 +620,10 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
565 620
566 spin_lock_irq(&base->lock); 621 spin_lock_irq(&base->lock);
567 622
623 /* Another CPU has added back the timer */
624 if (timer->state != HRTIMER_RUNNING)
625 continue;
626
568 if (restart == HRTIMER_RESTART) 627 if (restart == HRTIMER_RESTART)
569 enqueue_hrtimer(timer, base); 628 enqueue_hrtimer(timer, base);
570 else 629 else
@@ -638,8 +697,7 @@ schedule_hrtimer_interruptible(struct hrtimer *timer,
638 return schedule_hrtimer(timer, mode); 697 return schedule_hrtimer(timer, mode);
639} 698}
640 699
641static long __sched 700static long __sched nanosleep_restart(struct restart_block *restart)
642nanosleep_restart(struct restart_block *restart, clockid_t clockid)
643{ 701{
644 struct timespec __user *rmtp; 702 struct timespec __user *rmtp;
645 struct timespec tu; 703 struct timespec tu;
@@ -649,7 +707,7 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
649 707
650 restart->fn = do_no_restart_syscall; 708 restart->fn = do_no_restart_syscall;
651 709
652 hrtimer_init(&timer, clockid); 710 hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
653 711
654 timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; 712 timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
655 713
@@ -669,16 +727,6 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
669 return -ERESTART_RESTARTBLOCK; 727 return -ERESTART_RESTARTBLOCK;
670} 728}
671 729
672static long __sched nanosleep_restart_mono(struct restart_block *restart)
673{
674 return nanosleep_restart(restart, CLOCK_MONOTONIC);
675}
676
677static long __sched nanosleep_restart_real(struct restart_block *restart)
678{
679 return nanosleep_restart(restart, CLOCK_REALTIME);
680}
681
682long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, 730long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
683 const enum hrtimer_mode mode, const clockid_t clockid) 731 const enum hrtimer_mode mode, const clockid_t clockid)
684{ 732{
@@ -687,7 +735,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
687 struct timespec tu; 735 struct timespec tu;
688 ktime_t rem; 736 ktime_t rem;
689 737
690 hrtimer_init(&timer, clockid); 738 hrtimer_init(&timer, clockid, mode);
691 739
692 timer.expires = timespec_to_ktime(*rqtp); 740 timer.expires = timespec_to_ktime(*rqtp);
693 741
@@ -695,7 +743,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
695 if (rem.tv64 <= 0) 743 if (rem.tv64 <= 0)
696 return 0; 744 return 0;
697 745
698 /* Absolute timers do not update the rmtp value: */ 746 /* Absolute timers do not update the rmtp value and restart: */
699 if (mode == HRTIMER_ABS) 747 if (mode == HRTIMER_ABS)
700 return -ERESTARTNOHAND; 748 return -ERESTARTNOHAND;
701 749
@@ -705,11 +753,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
705 return -EFAULT; 753 return -EFAULT;
706 754
707 restart = &current_thread_info()->restart_block; 755 restart = &current_thread_info()->restart_block;
708 restart->fn = (clockid == CLOCK_MONOTONIC) ? 756 restart->fn = nanosleep_restart;
709 nanosleep_restart_mono : nanosleep_restart_real;
710 restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; 757 restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
711 restart->arg1 = timer.expires.tv64 >> 32; 758 restart->arg1 = timer.expires.tv64 >> 32;
712 restart->arg2 = (unsigned long) rmtp; 759 restart->arg2 = (unsigned long) rmtp;
760 restart->arg3 = (unsigned long) timer.base->index;
713 761
714 return -ERESTART_RESTARTBLOCK; 762 return -ERESTART_RESTARTBLOCK;
715} 763}
@@ -736,10 +784,8 @@ static void __devinit init_hrtimers_cpu(int cpu)
736 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); 784 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
737 int i; 785 int i;
738 786
739 for (i = 0; i < MAX_HRTIMER_BASES; i++) { 787 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++)
740 spin_lock_init(&base->lock); 788 spin_lock_init(&base->lock);
741 base++;
742 }
743} 789}
744 790
745#ifdef CONFIG_HOTPLUG_CPU 791#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 0cbe633420..55b1e5b85d 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -179,3 +179,6 @@ EXPORT_SYMBOL(inter_module_register);
179EXPORT_SYMBOL(inter_module_unregister); 179EXPORT_SYMBOL(inter_module_unregister);
180EXPORT_SYMBOL(inter_module_get_request); 180EXPORT_SYMBOL(inter_module_get_request);
181EXPORT_SYMBOL(inter_module_put); 181EXPORT_SYMBOL(inter_module_put);
182
183MODULE_LICENSE("GPL");
184
diff --git a/kernel/itimer.c b/kernel/itimer.c
index c2c05c4ff2..379be2f8c8 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -49,9 +49,11 @@ int do_getitimer(int which, struct itimerval *value)
49 49
50 switch (which) { 50 switch (which) {
51 case ITIMER_REAL: 51 case ITIMER_REAL:
52 spin_lock_irq(&tsk->sighand->siglock);
52 value->it_value = itimer_get_remtime(&tsk->signal->real_timer); 53 value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
53 value->it_interval = 54 value->it_interval =
54 ktime_to_timeval(tsk->signal->it_real_incr); 55 ktime_to_timeval(tsk->signal->it_real_incr);
56 spin_unlock_irq(&tsk->sighand->siglock);
55 break; 57 break;
56 case ITIMER_VIRTUAL: 58 case ITIMER_VIRTUAL:
57 read_lock(&tasklist_lock); 59 read_lock(&tasklist_lock);
@@ -150,18 +152,25 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
150 152
151 switch (which) { 153 switch (which) {
152 case ITIMER_REAL: 154 case ITIMER_REAL:
155again:
156 spin_lock_irq(&tsk->sighand->siglock);
153 timer = &tsk->signal->real_timer; 157 timer = &tsk->signal->real_timer;
154 hrtimer_cancel(timer);
155 if (ovalue) { 158 if (ovalue) {
156 ovalue->it_value = itimer_get_remtime(timer); 159 ovalue->it_value = itimer_get_remtime(timer);
157 ovalue->it_interval 160 ovalue->it_interval
158 = ktime_to_timeval(tsk->signal->it_real_incr); 161 = ktime_to_timeval(tsk->signal->it_real_incr);
159 } 162 }
163 /* We are sharing ->siglock with it_real_fn() */
164 if (hrtimer_try_to_cancel(timer) < 0) {
165 spin_unlock_irq(&tsk->sighand->siglock);
166 goto again;
167 }
160 tsk->signal->it_real_incr = 168 tsk->signal->it_real_incr =
161 timeval_to_ktime(value->it_interval); 169 timeval_to_ktime(value->it_interval);
162 expires = timeval_to_ktime(value->it_value); 170 expires = timeval_to_ktime(value->it_value);
163 if (expires.tv64 != 0) 171 if (expires.tv64 != 0)
164 hrtimer_start(timer, expires, HRTIMER_REL); 172 hrtimer_start(timer, expires, HRTIMER_REL);
173 spin_unlock_irq(&tsk->sighand->siglock);
165 break; 174 break;
166 case ITIMER_VIRTUAL: 175 case ITIMER_VIRTUAL:
167 nval = timeval_to_cputime(&value->it_value); 176 nval = timeval_to_cputime(&value->it_value);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3ea6325228..fef1af8a73 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -344,23 +344,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
344 spin_unlock_irqrestore(&kretprobe_lock, flags); 344 spin_unlock_irqrestore(&kretprobe_lock, flags);
345} 345}
346 346
347/*
348 * This kprobe pre_handler is registered with every kretprobe. When probe
349 * hits it will set up the return probe.
350 */
351static int __kprobes pre_handler_kretprobe(struct kprobe *p,
352 struct pt_regs *regs)
353{
354 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
355 unsigned long flags = 0;
356
357 /*TODO: consider to only swap the RA after the last pre_handler fired */
358 spin_lock_irqsave(&kretprobe_lock, flags);
359 arch_prepare_kretprobe(rp, regs);
360 spin_unlock_irqrestore(&kretprobe_lock, flags);
361 return 0;
362}
363
364static inline void free_rp_inst(struct kretprobe *rp) 347static inline void free_rp_inst(struct kretprobe *rp)
365{ 348{
366 struct kretprobe_instance *ri; 349 struct kretprobe_instance *ri;
@@ -578,6 +561,23 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
578 561
579#ifdef ARCH_SUPPORTS_KRETPROBES 562#ifdef ARCH_SUPPORTS_KRETPROBES
580 563
564/*
565 * This kprobe pre_handler is registered with every kretprobe. When probe
566 * hits it will set up the return probe.
567 */
568static int __kprobes pre_handler_kretprobe(struct kprobe *p,
569 struct pt_regs *regs)
570{
571 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
572 unsigned long flags = 0;
573
574 /*TODO: consider to only swap the RA after the last pre_handler fired */
575 spin_lock_irqsave(&kretprobe_lock, flags);
576 arch_prepare_kretprobe(rp, regs);
577 spin_unlock_irqrestore(&kretprobe_lock, flags);
578 return 0;
579}
580
581int __kprobes register_kretprobe(struct kretprobe *rp) 581int __kprobes register_kretprobe(struct kretprobe *rp)
582{ 582{
583 int ret = 0; 583 int ret = 0;
@@ -631,12 +631,12 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp)
631 unregister_kprobe(&rp->kp); 631 unregister_kprobe(&rp->kp);
632 /* No race here */ 632 /* No race here */
633 spin_lock_irqsave(&kretprobe_lock, flags); 633 spin_lock_irqsave(&kretprobe_lock, flags);
634 free_rp_inst(rp);
635 while ((ri = get_used_rp_inst(rp)) != NULL) { 634 while ((ri = get_used_rp_inst(rp)) != NULL) {
636 ri->rp = NULL; 635 ri->rp = NULL;
637 hlist_del(&ri->uflist); 636 hlist_del(&ri->uflist);
638 } 637 }
639 spin_unlock_irqrestore(&kretprobe_lock, flags); 638 spin_unlock_irqrestore(&kretprobe_lock, flags);
639 free_rp_inst(rp);
640} 640}
641 641
642static int __init init_kprobes(void) 642static int __init init_kprobes(void)
diff --git a/kernel/module.c b/kernel/module.c
index 618ed6e23e..5aad477ddc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1670,6 +1670,9 @@ static struct module *load_module(void __user *umod,
1670 goto free_mod; 1670 goto free_mod;
1671 } 1671 }
1672 1672
1673 /* Userspace could have altered the string after the strlen_user() */
1674 args[arglen - 1] = '\0';
1675
1673 if (find_module(mod->name)) { 1676 if (find_module(mod->name)) {
1674 err = -EEXIST; 1677 err = -EEXIST;
1675 goto free_mod; 1678 goto free_mod;
@@ -2092,7 +2095,8 @@ static unsigned long mod_find_symname(struct module *mod, const char *name)
2092 unsigned int i; 2095 unsigned int i;
2093 2096
2094 for (i = 0; i < mod->num_symtab; i++) 2097 for (i = 0; i < mod->num_symtab; i++)
2095 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0) 2098 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
2099 mod->symtab[i].st_info != 'U')
2096 return mod->symtab[i].st_value; 2100 return mod->symtab[i].st_value;
2097 return 0; 2101 return 0;
2098} 2102}
diff --git a/kernel/panic.c b/kernel/panic.c
index c5c4ab2558..126dc43f1c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...)
130#endif 130#endif
131 local_irq_enable(); 131 local_irq_enable();
132 for (i = 0;;) { 132 for (i = 0;;) {
133 touch_softlockup_watchdog();
133 i += panic_blink(i); 134 i += panic_blink(i);
134 mdelay(1); 135 mdelay(1);
135 i++; 136 i++;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 197208b3aa..216f574b5f 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -194,9 +194,7 @@ static inline int common_clock_set(const clockid_t which_clock,
194 194
195static int common_timer_create(struct k_itimer *new_timer) 195static int common_timer_create(struct k_itimer *new_timer)
196{ 196{
197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); 197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
198 new_timer->it.real.timer.data = new_timer;
199 new_timer->it.real.timer.function = posix_timer_fn;
200 return 0; 198 return 0;
201} 199}
202 200
@@ -290,7 +288,8 @@ void do_schedule_next_timer(struct siginfo *info)
290 info->si_overrun = timr->it_overrun_last; 288 info->si_overrun = timr->it_overrun_last;
291 } 289 }
292 290
293 unlock_timer(timr, flags); 291 if (timr)
292 unlock_timer(timr, flags);
294} 293}
295 294
296int posix_timer_event(struct k_itimer *timr,int si_private) 295int posix_timer_event(struct k_itimer *timr,int si_private)
@@ -692,6 +691,7 @@ common_timer_set(struct k_itimer *timr, int flags,
692 struct itimerspec *new_setting, struct itimerspec *old_setting) 691 struct itimerspec *new_setting, struct itimerspec *old_setting)
693{ 692{
694 struct hrtimer *timer = &timr->it.real.timer; 693 struct hrtimer *timer = &timr->it.real.timer;
694 enum hrtimer_mode mode;
695 695
696 if (old_setting) 696 if (old_setting)
697 common_timer_get(timr, old_setting); 697 common_timer_get(timr, old_setting);
@@ -713,14 +713,10 @@ common_timer_set(struct k_itimer *timr, int flags,
713 if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) 713 if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
714 return 0; 714 return 0;
715 715
716 /* Posix madness. Only absolute CLOCK_REALTIME timers 716 mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL;
717 * are affected by clock sets. So we must reiniatilize 717 hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
718 * the timer. 718 timr->it.real.timer.data = timr;
719 */ 719 timr->it.real.timer.function = posix_timer_fn;
720 if (timr->it_clock == CLOCK_REALTIME && (flags & TIMER_ABSTIME))
721 hrtimer_rebase(timer, CLOCK_REALTIME);
722 else
723 hrtimer_rebase(timer, CLOCK_MONOTONIC);
724 720
725 timer->expires = timespec_to_ktime(new_setting->it_value); 721 timer->expires = timespec_to_ktime(new_setting->it_value);
726 722
@@ -728,11 +724,15 @@ common_timer_set(struct k_itimer *timr, int flags,
728 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); 724 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
729 725
730 /* SIGEV_NONE timers are not queued ! See common_timer_get */ 726 /* SIGEV_NONE timers are not queued ! See common_timer_get */
731 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) 727 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
728 /* Setup correct expiry time for relative timers */
729 if (mode == HRTIMER_REL)
730 timer->expires = ktime_add(timer->expires,
731 timer->base->get_time());
732 return 0; 732 return 0;
733 }
733 734
734 hrtimer_start(timer, timer->expires, (flags & TIMER_ABSTIME) ? 735 hrtimer_start(timer, timer->expires, mode);
735 HRTIMER_ABS : HRTIMER_REL);
736 return 0; 736 return 0;
737} 737}
738 738
@@ -875,12 +875,6 @@ int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
875} 875}
876EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); 876EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
877 877
878int do_posix_clock_notimer_create(struct k_itimer *timer)
879{
880 return -EINVAL;
881}
882EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create);
883
884int do_posix_clock_nonanosleep(const clockid_t clock, int flags, 878int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
885 struct timespec *t, struct timespec __user *r) 879 struct timespec *t, struct timespec __user *r)
886{ 880{
@@ -947,21 +941,8 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp)
947static int common_nsleep(const clockid_t which_clock, int flags, 941static int common_nsleep(const clockid_t which_clock, int flags,
948 struct timespec *tsave, struct timespec __user *rmtp) 942 struct timespec *tsave, struct timespec __user *rmtp)
949{ 943{
950 int mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; 944 return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
951 int clockid = which_clock; 945 HRTIMER_ABS : HRTIMER_REL, which_clock);
952
953 switch (which_clock) {
954 case CLOCK_REALTIME:
955 /* Posix madness. Only absolute timers on clock realtime
956 are affected by clock set. */
957 if (mode != HRTIMER_ABS)
958 clockid = CLOCK_MONOTONIC;
959 case CLOCK_MONOTONIC:
960 break;
961 default:
962 return -EINVAL;
963 }
964 return hrtimer_nanosleep(tsave, rmtp, mode, clockid);
965} 946}
966 947
967asmlinkage long 948asmlinkage long
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 7ff375e7c9..623786d441 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,18 +9,13 @@
9#include <linux/console.h> 9#include <linux/console.h>
10#include "power.h" 10#include "power.h"
11 11
12static int new_loglevel = 10; 12#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
13static int orig_loglevel; 13#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
14#ifdef SUSPEND_CONSOLE 14
15static int orig_fgconsole, orig_kmsg; 15static int orig_fgconsole, orig_kmsg;
16#endif
17 16
18int pm_prepare_console(void) 17int pm_prepare_console(void)
19{ 18{
20 orig_loglevel = console_loglevel;
21 console_loglevel = new_loglevel;
22
23#ifdef SUSPEND_CONSOLE
24 acquire_console_sem(); 19 acquire_console_sem();
25 20
26 orig_fgconsole = fg_console; 21 orig_fgconsole = fg_console;
@@ -41,18 +36,15 @@ int pm_prepare_console(void)
41 } 36 }
42 orig_kmsg = kmsg_redirect; 37 orig_kmsg = kmsg_redirect;
43 kmsg_redirect = SUSPEND_CONSOLE; 38 kmsg_redirect = SUSPEND_CONSOLE;
44#endif
45 return 0; 39 return 0;
46} 40}
47 41
48void pm_restore_console(void) 42void pm_restore_console(void)
49{ 43{
50 console_loglevel = orig_loglevel;
51#ifdef SUSPEND_CONSOLE
52 acquire_console_sem(); 44 acquire_console_sem();
53 set_console(orig_fgconsole); 45 set_console(orig_fgconsole);
54 release_console_sem(); 46 release_console_sem();
55 kmsg_redirect = orig_kmsg; 47 kmsg_redirect = orig_kmsg;
56#endif
57 return; 48 return;
58} 49}
50#endif
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e24446f8d8..0b43847dc9 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -53,7 +53,7 @@ static void power_down(suspend_disk_method_t mode)
53 53
54 switch(mode) { 54 switch(mode) {
55 case PM_DISK_PLATFORM: 55 case PM_DISK_PLATFORM:
56 kernel_power_off_prepare(); 56 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
57 error = pm_ops->enter(PM_SUSPEND_DISK); 57 error = pm_ops->enter(PM_SUSPEND_DISK);
58 break; 58 break;
59 case PM_DISK_SHUTDOWN: 59 case PM_DISK_SHUTDOWN:
@@ -95,13 +95,6 @@ static int prepare_processes(void)
95 goto thaw; 95 goto thaw;
96 } 96 }
97 97
98 if (pm_disk_mode == PM_DISK_PLATFORM) {
99 if (pm_ops && pm_ops->prepare) {
100 if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
101 goto thaw;
102 }
103 }
104
105 /* Free memory before shutting down devices. */ 98 /* Free memory before shutting down devices. */
106 if (!(error = swsusp_shrink_memory())) 99 if (!(error = swsusp_shrink_memory()))
107 return 0; 100 return 0;
@@ -367,14 +360,14 @@ power_attr(resume);
367 360
368static ssize_t image_size_show(struct subsystem * subsys, char *buf) 361static ssize_t image_size_show(struct subsystem * subsys, char *buf)
369{ 362{
370 return sprintf(buf, "%u\n", image_size); 363 return sprintf(buf, "%lu\n", image_size);
371} 364}
372 365
373static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) 366static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n)
374{ 367{
375 unsigned int size; 368 unsigned long size;
376 369
377 if (sscanf(buf, "%u", &size) == 1) { 370 if (sscanf(buf, "%lu", &size) == 1) {
378 image_size = size; 371 image_size = size;
379 return n; 372 return n;
380 } 373 }
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d253f3ae2f..9cb235cba4 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -133,10 +133,10 @@ static int suspend_enter(suspend_state_t state)
133static void suspend_finish(suspend_state_t state) 133static void suspend_finish(suspend_state_t state)
134{ 134{
135 device_resume(); 135 device_resume();
136 if (pm_ops && pm_ops->finish)
137 pm_ops->finish(state);
138 thaw_processes(); 136 thaw_processes();
139 enable_nonboot_cpus(); 137 enable_nonboot_cpus();
138 if (pm_ops && pm_ops->finish)
139 pm_ops->finish(state);
140 pm_restore_console(); 140 pm_restore_console();
141} 141}
142 142
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7e8492fd14..388dba6808 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,14 +1,6 @@
1#include <linux/suspend.h> 1#include <linux/suspend.h>
2#include <linux/utsname.h> 2#include <linux/utsname.h>
3 3
4/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but
5 we probably do not take enough locks for switching consoles, etc,
6 so bad things might happen.
7*/
8#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
9#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
10#endif
11
12struct swsusp_info { 4struct swsusp_info {
13 struct new_utsname uts; 5 struct new_utsname uts;
14 u32 version_code; 6 u32 version_code;
@@ -42,17 +34,14 @@ static struct subsys_attribute _name##_attr = { \
42 34
43extern struct subsystem power_subsys; 35extern struct subsystem power_subsys;
44 36
45extern int pm_prepare_console(void);
46extern void pm_restore_console(void);
47
48/* References to section boundaries */ 37/* References to section boundaries */
49extern const void __nosave_begin, __nosave_end; 38extern const void __nosave_begin, __nosave_end;
50 39
51extern unsigned int nr_copy_pages; 40extern unsigned int nr_copy_pages;
52extern struct pbe *pagedir_nosave; 41extern struct pbe *pagedir_nosave;
53 42
54/* Preferred image size in MB (default 500) */ 43/* Preferred image size in bytes (default 500 MB) */
55extern unsigned int image_size; 44extern unsigned long image_size;
56 45
57extern asmlinkage int swsusp_arch_suspend(void); 46extern asmlinkage int swsusp_arch_suspend(void);
58extern asmlinkage int swsusp_arch_resume(void); 47extern asmlinkage int swsusp_arch_resume(void);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 41f66365f0..8d5a5986d6 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone)
91 * corrected eventually when the cases giving rise to this 91 * corrected eventually when the cases giving rise to this
92 * are better understood. 92 * are better understood.
93 */ 93 */
94 if (PageReserved(page)) { 94 if (PageReserved(page))
95 printk("highmem reserved page?!\n");
96 continue; 95 continue;
97 }
98 BUG_ON(PageNosave(page)); 96 BUG_ON(PageNosave(page));
99 if (PageNosaveFree(page)) 97 if (PageNosaveFree(page))
100 continue; 98 continue;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 55a18d26ab..2d9d08f72f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -70,12 +70,12 @@
70#include "power.h" 70#include "power.h"
71 71
72/* 72/*
73 * Preferred image size in MB (tunable via /sys/power/image_size). 73 * Preferred image size in bytes (tunable via /sys/power/image_size).
74 * When it is set to N, swsusp will do its best to ensure the image 74 * When it is set to N, swsusp will do its best to ensure the image
75 * size will not exceed N MB, but if that is impossible, it will 75 * size will not exceed N bytes, but if that is impossible, it will
76 * try to create the smallest image possible. 76 * try to create the smallest image possible.
77 */ 77 */
78unsigned int image_size = 500; 78unsigned long image_size = 500 * 1024 * 1024;
79 79
80#ifdef CONFIG_HIGHMEM 80#ifdef CONFIG_HIGHMEM
81unsigned int count_highmem_pages(void); 81unsigned int count_highmem_pages(void);
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */
153{ 153{
154 int i; 154 int i;
155 155
156 if (!swsusp_resume_device)
157 return -ENODEV;
158 spin_lock(&swap_lock); 156 spin_lock(&swap_lock);
159 for (i = 0; i < MAX_SWAPFILES; i++) { 157 for (i = 0; i < MAX_SWAPFILES; i++) {
160 if (!(swap_info[i].flags & SWP_WRITEOK)) 158 if (!(swap_info[i].flags & SWP_WRITEOK))
161 continue; 159 continue;
162 if (is_resume_device(swap_info + i)) { 160 if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
163 spin_unlock(&swap_lock); 161 spin_unlock(&swap_lock);
164 root_swap = i; 162 root_swap = i;
165 return 0; 163 return 0;
@@ -590,7 +588,7 @@ int swsusp_shrink_memory(void)
590 if (!tmp) 588 if (!tmp)
591 return -ENOMEM; 589 return -ENOMEM;
592 pages += tmp; 590 pages += tmp;
593 } else if (size > (image_size * 1024 * 1024) / PAGE_SIZE) { 591 } else if (size > image_size / PAGE_SIZE) {
594 tmp = shrink_all_memory(SHRINK_BITE); 592 tmp = shrink_all_memory(SHRINK_BITE);
595 pages += tmp; 593 pages += tmp;
596 } 594 }
@@ -743,7 +741,6 @@ static int submit(int rw, pgoff_t page_off, void *page)
743 if (!bio) 741 if (!bio)
744 return -ENOMEM; 742 return -ENOMEM;
745 bio->bi_sector = page_off * (PAGE_SIZE >> 9); 743 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
746 bio_get(bio);
747 bio->bi_bdev = resume_bdev; 744 bio->bi_bdev = resume_bdev;
748 bio->bi_end_io = end_io; 745 bio->bi_end_io = end_io;
749 746
@@ -753,14 +750,13 @@ static int submit(int rw, pgoff_t page_off, void *page)
753 goto Done; 750 goto Done;
754 } 751 }
755 752
756 if (rw == WRITE)
757 bio_set_pages_dirty(bio);
758 753
759 atomic_set(&io_done, 1); 754 atomic_set(&io_done, 1);
760 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 755 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
761 while (atomic_read(&io_done)) 756 while (atomic_read(&io_done))
762 yield(); 757 yield();
763 758 if (rw == READ)
759 bio_set_pages_dirty(bio);
764 Done: 760 Done:
765 bio_put(bio); 761 bio_put(bio);
766 return error; 762 return error;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5f33cdb6ff..d95a72c927 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
72 */ 72 */
73void __ptrace_unlink(task_t *child) 73void __ptrace_unlink(task_t *child)
74{ 74{
75 if (!child->ptrace) 75 BUG_ON(!child->ptrace);
76 BUG(); 76
77 child->ptrace = 0; 77 child->ptrace = 0;
78 if (!list_empty(&child->ptrace_list)) { 78 if (!list_empty(&child->ptrace_list)) {
79 list_del_init(&child->ptrace_list); 79 list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
184 return retval; 184 return retval;
185} 185}
186 186
187void __ptrace_detach(struct task_struct *child, unsigned int data)
188{
189 child->exit_code = data;
190 /* .. re-parent .. */
191 __ptrace_unlink(child);
192 /* .. and wake it up. */
193 if (child->exit_state != EXIT_ZOMBIE)
194 wake_up_process(child);
195}
196
187int ptrace_detach(struct task_struct *child, unsigned int data) 197int ptrace_detach(struct task_struct *child, unsigned int data)
188{ 198{
189 if (!valid_signal(data)) 199 if (!valid_signal(data))
190 return -EIO; 200 return -EIO;
191 201
192 /* Architecture-specific hardware disable .. */ 202 /* Architecture-specific hardware disable .. */
193 ptrace_disable(child); 203 ptrace_disable(child);
194 204
195 /* .. re-parent .. */
196 child->exit_code = data;
197
198 write_lock_irq(&tasklist_lock); 205 write_lock_irq(&tasklist_lock);
199 __ptrace_unlink(child); 206 if (child->ptrace)
200 /* .. and wake it up. */ 207 __ptrace_detach(child, data);
201 if (child->exit_state != EXIT_ZOMBIE)
202 wake_up_process(child);
203 write_unlock_irq(&tasklist_lock); 208 write_unlock_irq(&tasklist_lock);
204 209
205 return 0; 210 return 0;
@@ -242,8 +247,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
242 if (write) { 247 if (write) {
243 copy_to_user_page(vma, page, addr, 248 copy_to_user_page(vma, page, addr,
244 maddr + offset, buf, bytes); 249 maddr + offset, buf, bytes);
245 if (!PageCompound(page)) 250 set_page_dirty_lock(page);
246 set_page_dirty_lock(page);
247 } else { 251 } else {
248 copy_from_user_page(vma, page, addr, 252 copy_from_user_page(vma, page, addr,
249 buf, maddr + offset, bytes); 253 buf, maddr + offset, bytes);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 0cf8146bd5..8cf15a569f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
67 67
68/* Fake initialization required by compiler */ 68/* Fake initialization required by compiler */
69static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; 69static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
70static int maxbatch = 10000; 70static int blimit = 10;
71static int qhimark = 10000;
72static int qlowmark = 100;
73#ifdef CONFIG_SMP
74static int rsinterval = 1000;
75#endif
76
77static atomic_t rcu_barrier_cpu_count;
78static struct semaphore rcu_barrier_sema;
79static struct completion rcu_barrier_completion;
80
81#ifdef CONFIG_SMP
82static void force_quiescent_state(struct rcu_data *rdp,
83 struct rcu_ctrlblk *rcp)
84{
85 int cpu;
86 cpumask_t cpumask;
87 set_need_resched();
88 if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
89 rdp->last_rs_qlen = rdp->qlen;
90 /*
91 * Don't send IPI to itself. With irqs disabled,
92 * rdp->cpu is the current cpu.
93 */
94 cpumask = rcp->cpumask;
95 cpu_clear(rdp->cpu, cpumask);
96 for_each_cpu_mask(cpu, cpumask)
97 smp_send_reschedule(cpu);
98 }
99}
100#else
101static inline void force_quiescent_state(struct rcu_data *rdp,
102 struct rcu_ctrlblk *rcp)
103{
104 set_need_resched();
105}
106#endif
71 107
72/** 108/**
73 * call_rcu - Queue an RCU callback for invocation after a grace period. 109 * call_rcu - Queue an RCU callback for invocation after a grace period.
@@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head,
92 rdp = &__get_cpu_var(rcu_data); 128 rdp = &__get_cpu_var(rcu_data);
93 *rdp->nxttail = head; 129 *rdp->nxttail = head;
94 rdp->nxttail = &head->next; 130 rdp->nxttail = &head->next;
95 131 if (unlikely(++rdp->qlen > qhimark)) {
96 if (unlikely(++rdp->count > 10000)) 132 rdp->blimit = INT_MAX;
97 set_need_resched(); 133 force_quiescent_state(rdp, &rcu_ctrlblk);
98 134 }
99 local_irq_restore(flags); 135 local_irq_restore(flags);
100} 136}
101 137
102static atomic_t rcu_barrier_cpu_count;
103static struct semaphore rcu_barrier_sema;
104static struct completion rcu_barrier_completion;
105
106/** 138/**
107 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. 139 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
108 * @head: structure to be used for queueing the RCU updates. 140 * @head: structure to be used for queueing the RCU updates.
@@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
131 rdp = &__get_cpu_var(rcu_bh_data); 163 rdp = &__get_cpu_var(rcu_bh_data);
132 *rdp->nxttail = head; 164 *rdp->nxttail = head;
133 rdp->nxttail = &head->next; 165 rdp->nxttail = &head->next;
134 rdp->count++; 166
135/* 167 if (unlikely(++rdp->qlen > qhimark)) {
136 * Should we directly call rcu_do_batch() here ? 168 rdp->blimit = INT_MAX;
137 * if (unlikely(rdp->count > 10000)) 169 force_quiescent_state(rdp, &rcu_bh_ctrlblk);
138 * rcu_do_batch(rdp); 170 }
139 */ 171
140 local_irq_restore(flags); 172 local_irq_restore(flags);
141} 173}
142 174
@@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
199 next = rdp->donelist = list->next; 231 next = rdp->donelist = list->next;
200 list->func(list); 232 list->func(list);
201 list = next; 233 list = next;
202 rdp->count--; 234 rdp->qlen--;
203 if (++count >= maxbatch) 235 if (++count >= rdp->blimit)
204 break; 236 break;
205 } 237 }
238 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
239 rdp->blimit = blimit;
206 if (!rdp->donelist) 240 if (!rdp->donelist)
207 rdp->donetail = &rdp->donelist; 241 rdp->donetail = &rdp->donelist;
208 else 242 else
@@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
473 rdp->quiescbatch = rcp->completed; 507 rdp->quiescbatch = rcp->completed;
474 rdp->qs_pending = 0; 508 rdp->qs_pending = 0;
475 rdp->cpu = cpu; 509 rdp->cpu = cpu;
510 rdp->blimit = blimit;
476} 511}
477 512
478static void __devinit rcu_online_cpu(int cpu) 513static void __devinit rcu_online_cpu(int cpu)
@@ -567,7 +602,12 @@ void synchronize_kernel(void)
567 synchronize_rcu(); 602 synchronize_rcu();
568} 603}
569 604
570module_param(maxbatch, int, 0); 605module_param(blimit, int, 0);
606module_param(qhimark, int, 0);
607module_param(qlowmark, int, 0);
608#ifdef CONFIG_SMP
609module_param(rsinterval, int, 0);
610#endif
571EXPORT_SYMBOL_GPL(rcu_batches_completed); 611EXPORT_SYMBOL_GPL(rcu_batches_completed);
572EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ 612EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */
573EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ 613EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 773219907d..7712912dbc 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -114,16 +114,16 @@ rcu_torture_alloc(void)
114{ 114{
115 struct list_head *p; 115 struct list_head *p;
116 116
117 spin_lock(&rcu_torture_lock); 117 spin_lock_bh(&rcu_torture_lock);
118 if (list_empty(&rcu_torture_freelist)) { 118 if (list_empty(&rcu_torture_freelist)) {
119 atomic_inc(&n_rcu_torture_alloc_fail); 119 atomic_inc(&n_rcu_torture_alloc_fail);
120 spin_unlock(&rcu_torture_lock); 120 spin_unlock_bh(&rcu_torture_lock);
121 return NULL; 121 return NULL;
122 } 122 }
123 atomic_inc(&n_rcu_torture_alloc); 123 atomic_inc(&n_rcu_torture_alloc);
124 p = rcu_torture_freelist.next; 124 p = rcu_torture_freelist.next;
125 list_del_init(p); 125 list_del_init(p);
126 spin_unlock(&rcu_torture_lock); 126 spin_unlock_bh(&rcu_torture_lock);
127 return container_of(p, struct rcu_torture, rtort_free); 127 return container_of(p, struct rcu_torture, rtort_free);
128} 128}
129 129
@@ -134,9 +134,9 @@ static void
134rcu_torture_free(struct rcu_torture *p) 134rcu_torture_free(struct rcu_torture *p)
135{ 135{
136 atomic_inc(&n_rcu_torture_free); 136 atomic_inc(&n_rcu_torture_free);
137 spin_lock(&rcu_torture_lock); 137 spin_lock_bh(&rcu_torture_lock);
138 list_add_tail(&p->rtort_free, &rcu_torture_freelist); 138 list_add_tail(&p->rtort_free, &rcu_torture_freelist);
139 spin_unlock(&rcu_torture_lock); 139 spin_unlock_bh(&rcu_torture_lock);
140} 140}
141 141
142static void 142static void
diff --git a/kernel/sched.c b/kernel/sched.c
index 3ee2ae4512..4d46e90f59 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p)
178#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ 178#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
179 < (long long) (sd)->cache_hot_time) 179 < (long long) (sd)->cache_hot_time)
180 180
181void __put_task_struct_cb(struct rcu_head *rhp)
182{
183 __put_task_struct(container_of(rhp, struct task_struct, rcu));
184}
185
186EXPORT_SYMBOL_GPL(__put_task_struct_cb);
187
188/* 181/*
189 * These are the runqueue data structures: 182 * These are the runqueue data structures:
190 */ 183 */
@@ -215,7 +208,6 @@ struct runqueue {
215 */ 208 */
216 unsigned long nr_running; 209 unsigned long nr_running;
217#ifdef CONFIG_SMP 210#ifdef CONFIG_SMP
218 unsigned long prio_bias;
219 unsigned long cpu_load[3]; 211 unsigned long cpu_load[3];
220#endif 212#endif
221 unsigned long long nr_switches; 213 unsigned long long nr_switches;
@@ -669,68 +661,13 @@ static int effective_prio(task_t *p)
669 return prio; 661 return prio;
670} 662}
671 663
672#ifdef CONFIG_SMP
673static inline void inc_prio_bias(runqueue_t *rq, int prio)
674{
675 rq->prio_bias += MAX_PRIO - prio;
676}
677
678static inline void dec_prio_bias(runqueue_t *rq, int prio)
679{
680 rq->prio_bias -= MAX_PRIO - prio;
681}
682
683static inline void inc_nr_running(task_t *p, runqueue_t *rq)
684{
685 rq->nr_running++;
686 if (rt_task(p)) {
687 if (p != rq->migration_thread)
688 /*
689 * The migration thread does the actual balancing. Do
690 * not bias by its priority as the ultra high priority
691 * will skew balancing adversely.
692 */
693 inc_prio_bias(rq, p->prio);
694 } else
695 inc_prio_bias(rq, p->static_prio);
696}
697
698static inline void dec_nr_running(task_t *p, runqueue_t *rq)
699{
700 rq->nr_running--;
701 if (rt_task(p)) {
702 if (p != rq->migration_thread)
703 dec_prio_bias(rq, p->prio);
704 } else
705 dec_prio_bias(rq, p->static_prio);
706}
707#else
708static inline void inc_prio_bias(runqueue_t *rq, int prio)
709{
710}
711
712static inline void dec_prio_bias(runqueue_t *rq, int prio)
713{
714}
715
716static inline void inc_nr_running(task_t *p, runqueue_t *rq)
717{
718 rq->nr_running++;
719}
720
721static inline void dec_nr_running(task_t *p, runqueue_t *rq)
722{
723 rq->nr_running--;
724}
725#endif
726
727/* 664/*
728 * __activate_task - move a task to the runqueue. 665 * __activate_task - move a task to the runqueue.
729 */ 666 */
730static inline void __activate_task(task_t *p, runqueue_t *rq) 667static inline void __activate_task(task_t *p, runqueue_t *rq)
731{ 668{
732 enqueue_task(p, rq->active); 669 enqueue_task(p, rq->active);
733 inc_nr_running(p, rq); 670 rq->nr_running++;
734} 671}
735 672
736/* 673/*
@@ -739,7 +676,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
739static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 676static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
740{ 677{
741 enqueue_task_head(p, rq->active); 678 enqueue_task_head(p, rq->active);
742 inc_nr_running(p, rq); 679 rq->nr_running++;
743} 680}
744 681
745static int recalc_task_prio(task_t *p, unsigned long long now) 682static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -863,7 +800,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
863 */ 800 */
864static void deactivate_task(struct task_struct *p, runqueue_t *rq) 801static void deactivate_task(struct task_struct *p, runqueue_t *rq)
865{ 802{
866 dec_nr_running(p, rq); 803 rq->nr_running--;
867 dequeue_task(p, p->array); 804 dequeue_task(p, p->array);
868 p->array = NULL; 805 p->array = NULL;
869} 806}
@@ -1007,61 +944,27 @@ void kick_process(task_t *p)
1007 * We want to under-estimate the load of migration sources, to 944 * We want to under-estimate the load of migration sources, to
1008 * balance conservatively. 945 * balance conservatively.
1009 */ 946 */
1010static unsigned long __source_load(int cpu, int type, enum idle_type idle) 947static inline unsigned long source_load(int cpu, int type)
1011{ 948{
1012 runqueue_t *rq = cpu_rq(cpu); 949 runqueue_t *rq = cpu_rq(cpu);
1013 unsigned long running = rq->nr_running; 950 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1014 unsigned long source_load, cpu_load = rq->cpu_load[type-1],
1015 load_now = running * SCHED_LOAD_SCALE;
1016
1017 if (type == 0) 951 if (type == 0)
1018 source_load = load_now; 952 return load_now;
1019 else
1020 source_load = min(cpu_load, load_now);
1021
1022 if (running > 1 || (idle == NOT_IDLE && running))
1023 /*
1024 * If we are busy rebalancing the load is biased by
1025 * priority to create 'nice' support across cpus. When
1026 * idle rebalancing we should only bias the source_load if
1027 * there is more than one task running on that queue to
1028 * prevent idle rebalance from trying to pull tasks from a
1029 * queue with only one running task.
1030 */
1031 source_load = source_load * rq->prio_bias / running;
1032
1033 return source_load;
1034}
1035 953
1036static inline unsigned long source_load(int cpu, int type) 954 return min(rq->cpu_load[type-1], load_now);
1037{
1038 return __source_load(cpu, type, NOT_IDLE);
1039} 955}
1040 956
1041/* 957/*
1042 * Return a high guess at the load of a migration-target cpu 958 * Return a high guess at the load of a migration-target cpu
1043 */ 959 */
1044static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) 960static inline unsigned long target_load(int cpu, int type)
1045{ 961{
1046 runqueue_t *rq = cpu_rq(cpu); 962 runqueue_t *rq = cpu_rq(cpu);
1047 unsigned long running = rq->nr_running; 963 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1048 unsigned long target_load, cpu_load = rq->cpu_load[type-1],
1049 load_now = running * SCHED_LOAD_SCALE;
1050
1051 if (type == 0) 964 if (type == 0)
1052 target_load = load_now; 965 return load_now;
1053 else
1054 target_load = max(cpu_load, load_now);
1055
1056 if (running > 1 || (idle == NOT_IDLE && running))
1057 target_load = target_load * rq->prio_bias / running;
1058 966
1059 return target_load; 967 return max(rq->cpu_load[type-1], load_now);
1060}
1061
1062static inline unsigned long target_load(int cpu, int type)
1063{
1064 return __target_load(cpu, type, NOT_IDLE);
1065} 968}
1066 969
1067/* 970/*
@@ -1294,9 +1197,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
1294 } 1197 }
1295 } 1198 }
1296 1199
1297 if (p->last_waker_cpu != this_cpu)
1298 goto out_set_cpu;
1299
1300 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1200 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
1301 goto out_set_cpu; 1201 goto out_set_cpu;
1302 1202
@@ -1367,8 +1267,6 @@ out_set_cpu:
1367 cpu = task_cpu(p); 1267 cpu = task_cpu(p);
1368 } 1268 }
1369 1269
1370 p->last_waker_cpu = this_cpu;
1371
1372out_activate: 1270out_activate:
1373#endif /* CONFIG_SMP */ 1271#endif /* CONFIG_SMP */
1374 if (old_state == TASK_UNINTERRUPTIBLE) { 1272 if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1450,12 +1348,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1450#ifdef CONFIG_SCHEDSTATS 1348#ifdef CONFIG_SCHEDSTATS
1451 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1349 memset(&p->sched_info, 0, sizeof(p->sched_info));
1452#endif 1350#endif
1453#if defined(CONFIG_SMP) 1351#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1454 p->last_waker_cpu = cpu;
1455#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
1456 p->oncpu = 0; 1352 p->oncpu = 0;
1457#endif 1353#endif
1458#endif
1459#ifdef CONFIG_PREEMPT 1354#ifdef CONFIG_PREEMPT
1460 /* Want to start with kernel preemption disabled. */ 1355 /* Want to start with kernel preemption disabled. */
1461 task_thread_info(p)->preempt_count = 1; 1356 task_thread_info(p)->preempt_count = 1;
@@ -1530,7 +1425,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1530 list_add_tail(&p->run_list, &current->run_list); 1425 list_add_tail(&p->run_list, &current->run_list);
1531 p->array = current->array; 1426 p->array = current->array;
1532 p->array->nr_active++; 1427 p->array->nr_active++;
1533 inc_nr_running(p, rq); 1428 rq->nr_running++;
1534 } 1429 }
1535 set_need_resched(); 1430 set_need_resched();
1536 } else 1431 } else
@@ -1875,9 +1770,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1875 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1770 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1876{ 1771{
1877 dequeue_task(p, src_array); 1772 dequeue_task(p, src_array);
1878 dec_nr_running(p, src_rq); 1773 src_rq->nr_running--;
1879 set_task_cpu(p, this_cpu); 1774 set_task_cpu(p, this_cpu);
1880 inc_nr_running(p, this_rq); 1775 this_rq->nr_running++;
1881 enqueue_task(p, this_array); 1776 enqueue_task(p, this_array);
1882 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1777 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1883 + this_rq->timestamp_last_tick; 1778 + this_rq->timestamp_last_tick;
@@ -2056,9 +1951,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2056 1951
2057 /* Bias balancing toward cpus of our domain */ 1952 /* Bias balancing toward cpus of our domain */
2058 if (local_group) 1953 if (local_group)
2059 load = __target_load(i, load_idx, idle); 1954 load = target_load(i, load_idx);
2060 else 1955 else
2061 load = __source_load(i, load_idx, idle); 1956 load = source_load(i, load_idx);
2062 1957
2063 avg_load += load; 1958 avg_load += load;
2064 } 1959 }
@@ -2171,7 +2066,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2171 int i; 2066 int i;
2172 2067
2173 for_each_cpu_mask(i, group->cpumask) { 2068 for_each_cpu_mask(i, group->cpumask) {
2174 load = __source_load(i, 0, idle); 2069 load = source_load(i, 0);
2175 2070
2176 if (load > max_load) { 2071 if (load > max_load) {
2177 max_load = load; 2072 max_load = load;
@@ -3571,10 +3466,8 @@ void set_user_nice(task_t *p, long nice)
3571 goto out_unlock; 3466 goto out_unlock;
3572 } 3467 }
3573 array = p->array; 3468 array = p->array;
3574 if (array) { 3469 if (array)
3575 dequeue_task(p, array); 3470 dequeue_task(p, array);
3576 dec_prio_bias(rq, p->static_prio);
3577 }
3578 3471
3579 old_prio = p->prio; 3472 old_prio = p->prio;
3580 new_prio = NICE_TO_PRIO(nice); 3473 new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3477,6 @@ void set_user_nice(task_t *p, long nice)
3584 3477
3585 if (array) { 3478 if (array) {
3586 enqueue_task(p, array); 3479 enqueue_task(p, array);
3587 inc_prio_bias(rq, p->static_prio);
3588 /* 3480 /*
3589 * If the task increased its priority or is running and 3481 * If the task increased its priority or is running and
3590 * lowered its priority, then reschedule its CPU: 3482 * lowered its priority, then reschedule its CPU:
@@ -4031,7 +3923,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
4031 goto out_unlock; 3923 goto out_unlock;
4032 3924
4033 retval = 0; 3925 retval = 0;
4034 cpus_and(*mask, p->cpus_allowed, cpu_possible_map); 3926 cpus_and(*mask, p->cpus_allowed, cpu_online_map);
4035 3927
4036out_unlock: 3928out_unlock:
4037 read_unlock(&tasklist_lock); 3929 read_unlock(&tasklist_lock);
@@ -4129,6 +4021,8 @@ static inline void __cond_resched(void)
4129 */ 4021 */
4130 if (unlikely(preempt_count())) 4022 if (unlikely(preempt_count()))
4131 return; 4023 return;
4024 if (unlikely(system_state != SYSTEM_RUNNING))
4025 return;
4132 do { 4026 do {
4133 add_preempt_count(PREEMPT_ACTIVE); 4027 add_preempt_count(PREEMPT_ACTIVE);
4134 schedule(); 4028 schedule();
@@ -4434,6 +4328,7 @@ void __devinit init_idle(task_t *idle, int cpu)
4434 runqueue_t *rq = cpu_rq(cpu); 4328 runqueue_t *rq = cpu_rq(cpu);
4435 unsigned long flags; 4329 unsigned long flags;
4436 4330
4331 idle->timestamp = sched_clock();
4437 idle->sleep_avg = 0; 4332 idle->sleep_avg = 0;
4438 idle->array = NULL; 4333 idle->array = NULL;
4439 idle->prio = MAX_PRIO; 4334 idle->prio = MAX_PRIO;
@@ -5141,7 +5036,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5141#define SEARCH_SCOPE 2 5036#define SEARCH_SCOPE 2
5142#define MIN_CACHE_SIZE (64*1024U) 5037#define MIN_CACHE_SIZE (64*1024U)
5143#define DEFAULT_CACHE_SIZE (5*1024*1024U) 5038#define DEFAULT_CACHE_SIZE (5*1024*1024U)
5144#define ITERATIONS 2 5039#define ITERATIONS 1
5145#define SIZE_THRESH 130 5040#define SIZE_THRESH 130
5146#define COST_THRESH 130 5041#define COST_THRESH 130
5147 5042
@@ -5159,7 +5054,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5159#define MAX_DOMAIN_DISTANCE 32 5054#define MAX_DOMAIN_DISTANCE 32
5160 5055
5161static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = 5056static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
5162 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; 5057 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
5058/*
5059 * Architectures may override the migration cost and thus avoid
5060 * boot-time calibration. Unit is nanoseconds. Mostly useful for
5061 * virtualized hardware:
5062 */
5063#ifdef CONFIG_DEFAULT_MIGRATION_COST
5064 CONFIG_DEFAULT_MIGRATION_COST
5065#else
5066 -1LL
5067#endif
5068};
5163 5069
5164/* 5070/*
5165 * Allow override of migration cost - in units of microseconds. 5071 * Allow override of migration cost - in units of microseconds.
@@ -5480,9 +5386,9 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
5480 break; 5386 break;
5481 } 5387 }
5482 /* 5388 /*
5483 * Increase the cachesize in 5% steps: 5389 * Increase the cachesize in 10% steps:
5484 */ 5390 */
5485 size = size * 20 / 19; 5391 size = size * 10 / 9;
5486 } 5392 }
5487 5393
5488 if (migration_debug) 5394 if (migration_debug)
@@ -5551,13 +5457,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
5551 -1 5457 -1
5552#endif 5458#endif
5553 ); 5459 );
5554 printk("migration_cost="); 5460 if (system_state == SYSTEM_BOOTING) {
5555 for (distance = 0; distance <= max_distance; distance++) { 5461 printk("migration_cost=");
5556 if (distance) 5462 for (distance = 0; distance <= max_distance; distance++) {
5557 printk(","); 5463 if (distance)
5558 printk("%ld", (long)migration_cost[distance] / 1000); 5464 printk(",");
5465 printk("%ld", (long)migration_cost[distance] / 1000);
5466 }
5467 printk("\n");
5559 } 5468 }
5560 printk("\n");
5561 j1 = jiffies; 5469 j1 = jiffies;
5562 if (migration_debug) 5470 if (migration_debug)
5563 printk("migration: %ld seconds\n", (j1-j0)/HZ); 5471 printk("migration: %ld seconds\n", (j1-j0)/HZ);
@@ -6109,7 +6017,7 @@ void __init sched_init(void)
6109 runqueue_t *rq; 6017 runqueue_t *rq;
6110 int i, j, k; 6018 int i, j, k;
6111 6019
6112 for (i = 0; i < NR_CPUS; i++) { 6020 for_each_cpu(i) {
6113 prio_array_t *array; 6021 prio_array_t *array;
6114 6022
6115 rq = cpu_rq(i); 6023 rq = cpu_rq(i);
diff --git a/kernel/signal.c b/kernel/signal.c
index d3efafd810..ea154104a0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -283,7 +283,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
283 return(q); 283 return(q);
284} 284}
285 285
286static inline void __sigqueue_free(struct sigqueue *q) 286static void __sigqueue_free(struct sigqueue *q)
287{ 287{
288 if (q->flags & SIGQUEUE_PREALLOC) 288 if (q->flags & SIGQUEUE_PREALLOC)
289 return; 289 return;
@@ -2430,7 +2430,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
2430} 2430}
2431 2431
2432int 2432int
2433do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) 2433do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2434{ 2434{
2435 struct k_sigaction *k; 2435 struct k_sigaction *k;
2436 sigset_t mask; 2436 sigset_t mask;
@@ -2454,6 +2454,8 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2454 *oact = *k; 2454 *oact = *k;
2455 2455
2456 if (act) { 2456 if (act) {
2457 sigdelsetmask(&act->sa.sa_mask,
2458 sigmask(SIGKILL) | sigmask(SIGSTOP));
2457 /* 2459 /*
2458 * POSIX 3.3.1.3: 2460 * POSIX 3.3.1.3:
2459 * "Setting a signal action to SIG_IGN for a signal that is 2461 * "Setting a signal action to SIG_IGN for a signal that is
@@ -2479,8 +2481,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2479 read_lock(&tasklist_lock); 2481 read_lock(&tasklist_lock);
2480 spin_lock_irq(&t->sighand->siglock); 2482 spin_lock_irq(&t->sighand->siglock);
2481 *k = *act; 2483 *k = *act;
2482 sigdelsetmask(&k->sa.sa_mask,
2483 sigmask(SIGKILL) | sigmask(SIGSTOP));
2484 sigemptyset(&mask); 2484 sigemptyset(&mask);
2485 sigaddset(&mask, sig); 2485 sigaddset(&mask, sig);
2486 rm_from_queue_full(&mask, &t->signal->shared_pending); 2486 rm_from_queue_full(&mask, &t->signal->shared_pending);
@@ -2495,8 +2495,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2495 } 2495 }
2496 2496
2497 *k = *act; 2497 *k = *act;
2498 sigdelsetmask(&k->sa.sa_mask,
2499 sigmask(SIGKILL) | sigmask(SIGSTOP));
2500 } 2498 }
2501 2499
2502 spin_unlock_irq(&current->sighand->siglock); 2500 spin_unlock_irq(&current->sighand->siglock);
@@ -2702,6 +2700,7 @@ sys_signal(int sig, __sighandler_t handler)
2702 2700
2703 new_sa.sa.sa_handler = handler; 2701 new_sa.sa.sa_handler = handler;
2704 new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; 2702 new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2703 sigemptyset(&new_sa.sa.sa_mask);
2705 2704
2706 ret = do_sigaction(sig, &new_sa, &old_sa); 2705 ret = do_sigaction(sig, &new_sa, &old_sa);
2707 2706
diff --git a/kernel/sys.c b/kernel/sys.c
index d09cac23fd..f91218a546 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -428,7 +428,7 @@ void kernel_kexec(void)
428{ 428{
429#ifdef CONFIG_KEXEC 429#ifdef CONFIG_KEXEC
430 struct kimage *image; 430 struct kimage *image;
431 image = xchg(&kexec_image, 0); 431 image = xchg(&kexec_image, NULL);
432 if (!image) { 432 if (!image) {
433 return; 433 return;
434 } 434 }
@@ -440,23 +440,25 @@ void kernel_kexec(void)
440} 440}
441EXPORT_SYMBOL_GPL(kernel_kexec); 441EXPORT_SYMBOL_GPL(kernel_kexec);
442 442
443void kernel_shutdown_prepare(enum system_states state)
444{
445 notifier_call_chain(&reboot_notifier_list,
446 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
447 system_state = state;
448 device_shutdown();
449}
443/** 450/**
444 * kernel_halt - halt the system 451 * kernel_halt - halt the system
445 * 452 *
446 * Shutdown everything and perform a clean system halt. 453 * Shutdown everything and perform a clean system halt.
447 */ 454 */
448void kernel_halt_prepare(void)
449{
450 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
451 system_state = SYSTEM_HALT;
452 device_shutdown();
453}
454void kernel_halt(void) 455void kernel_halt(void)
455{ 456{
456 kernel_halt_prepare(); 457 kernel_shutdown_prepare(SYSTEM_HALT);
457 printk(KERN_EMERG "System halted.\n"); 458 printk(KERN_EMERG "System halted.\n");
458 machine_halt(); 459 machine_halt();
459} 460}
461
460EXPORT_SYMBOL_GPL(kernel_halt); 462EXPORT_SYMBOL_GPL(kernel_halt);
461 463
462/** 464/**
@@ -464,20 +466,13 @@ EXPORT_SYMBOL_GPL(kernel_halt);
464 * 466 *
465 * Shutdown everything and perform a clean system power_off. 467 * Shutdown everything and perform a clean system power_off.
466 */ 468 */
467void kernel_power_off_prepare(void)
468{
469 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
470 system_state = SYSTEM_POWER_OFF;
471 device_shutdown();
472}
473void kernel_power_off(void) 469void kernel_power_off(void)
474{ 470{
475 kernel_power_off_prepare(); 471 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
476 printk(KERN_EMERG "Power down.\n"); 472 printk(KERN_EMERG "Power down.\n");
477 machine_power_off(); 473 machine_power_off();
478} 474}
479EXPORT_SYMBOL_GPL(kernel_power_off); 475EXPORT_SYMBOL_GPL(kernel_power_off);
480
481/* 476/*
482 * Reboot system call: for obvious reasons only root may call it, 477 * Reboot system call: for obvious reasons only root may call it,
483 * and even root needs to set up some magic numbers in the registers 478 * and even root needs to set up some magic numbers in the registers
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 17313b99e5..1067090db6 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16);
104cond_syscall(sys_setuid16); 104cond_syscall(sys_setuid16);
105cond_syscall(sys_vm86old); 105cond_syscall(sys_vm86old);
106cond_syscall(sys_vm86); 106cond_syscall(sys_vm86);
107cond_syscall(compat_sys_ipc);
108cond_syscall(compat_sys_sysctl);
107 109
108/* arch-specific weak syscall entries */ 110/* arch-specific weak syscall entries */
109cond_syscall(sys_pciconfig_read); 111cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cb99a42f8b..32b48e8ee3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -44,13 +44,14 @@
44#include <linux/limits.h> 44#include <linux/limits.h>
45#include <linux/dcache.h> 45#include <linux/dcache.h>
46#include <linux/syscalls.h> 46#include <linux/syscalls.h>
47#include <linux/nfs_fs.h>
48#include <linux/acpi.h>
47 49
48#include <asm/uaccess.h> 50#include <asm/uaccess.h>
49#include <asm/processor.h> 51#include <asm/processor.h>
50 52
51#ifdef CONFIG_ROOT_NFS 53extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
52#include <linux/nfs_fs.h> 54 void __user *buffer, size_t *lenp, loff_t *ppos);
53#endif
54 55
55#if defined(CONFIG_SYSCTL) 56#if defined(CONFIG_SYSCTL)
56 57
@@ -126,7 +127,9 @@ extern int sysctl_hz_timer;
126extern int acct_parm[]; 127extern int acct_parm[];
127#endif 128#endif
128 129
129int randomize_va_space = 1; 130#ifdef CONFIG_IA64
131extern int no_unaligned_warning;
132#endif
130 133
131static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, 134static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
132 ctl_table *, void **); 135 ctl_table *, void **);
@@ -640,6 +643,7 @@ static ctl_table kern_table[] = {
640 .proc_handler = &proc_dointvec, 643 .proc_handler = &proc_dointvec,
641 }, 644 },
642#endif 645#endif
646#if defined(CONFIG_MMU)
643 { 647 {
644 .ctl_name = KERN_RANDOMIZE, 648 .ctl_name = KERN_RANDOMIZE,
645 .procname = "randomize_va_space", 649 .procname = "randomize_va_space",
@@ -648,6 +652,7 @@ static ctl_table kern_table[] = {
648 .mode = 0644, 652 .mode = 0644,
649 .proc_handler = &proc_dointvec, 653 .proc_handler = &proc_dointvec,
650 }, 654 },
655#endif
651#if defined(CONFIG_S390) && defined(CONFIG_SMP) 656#if defined(CONFIG_S390) && defined(CONFIG_SMP)
652 { 657 {
653 .ctl_name = KERN_SPIN_RETRY, 658 .ctl_name = KERN_SPIN_RETRY,
@@ -658,6 +663,26 @@ static ctl_table kern_table[] = {
658 .proc_handler = &proc_dointvec, 663 .proc_handler = &proc_dointvec,
659 }, 664 },
660#endif 665#endif
666#ifdef CONFIG_ACPI_SLEEP
667 {
668 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
669 .procname = "acpi_video_flags",
670 .data = &acpi_video_flags,
671 .maxlen = sizeof (unsigned long),
672 .mode = 0644,
673 .proc_handler = &proc_doulongvec_minmax,
674 },
675#endif
676#ifdef CONFIG_IA64
677 {
678 .ctl_name = KERN_IA64_UNALIGNED,
679 .procname = "ignore-unaligned-usertrap",
680 .data = &no_unaligned_warning,
681 .maxlen = sizeof (int),
682 .mode = 0644,
683 .proc_handler = &proc_dointvec,
684 },
685#endif
661 { .ctl_name = 0 } 686 { .ctl_name = 0 }
662}; 687};
663 688
@@ -878,7 +903,17 @@ static ctl_table vm_table[] = {
878 .maxlen = sizeof(zone_reclaim_mode), 903 .maxlen = sizeof(zone_reclaim_mode),
879 .mode = 0644, 904 .mode = 0644,
880 .proc_handler = &proc_dointvec, 905 .proc_handler = &proc_dointvec,
881 .strategy = &zero, 906 .strategy = &sysctl_intvec,
907 .extra1 = &zero,
908 },
909 {
910 .ctl_name = VM_ZONE_RECLAIM_INTERVAL,
911 .procname = "zone_reclaim_interval",
912 .data = &zone_reclaim_interval,
913 .maxlen = sizeof(zone_reclaim_interval),
914 .mode = 0644,
915 .proc_handler = &proc_dointvec_jiffies,
916 .strategy = &sysctl_jiffies,
882 }, 917 },
883#endif 918#endif
884 { .ctl_name = 0 } 919 { .ctl_name = 0 }
@@ -911,7 +946,7 @@ static ctl_table fs_table[] = {
911 .data = &files_stat, 946 .data = &files_stat,
912 .maxlen = 3*sizeof(int), 947 .maxlen = 3*sizeof(int),
913 .mode = 0444, 948 .mode = 0444,
914 .proc_handler = &proc_dointvec, 949 .proc_handler = &proc_nr_files,
915 }, 950 },
916 { 951 {
917 .ctl_name = FS_MAXFILE, 952 .ctl_name = FS_MAXFILE,
diff --git a/kernel/time.c b/kernel/time.c
index 7477b1d207..804539165d 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -155,7 +155,7 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
155 static int firsttime = 1; 155 static int firsttime = 1;
156 int error = 0; 156 int error = 0;
157 157
158 if (!timespec_valid(tv)) 158 if (tv && !timespec_valid(tv))
159 return -EINVAL; 159 return -EINVAL;
160 160
161 error = security_settime(tv, tz); 161 error = security_settime(tv, tz);
@@ -637,15 +637,16 @@ void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
637 * 637 *
638 * Returns the timespec representation of the nsec parameter. 638 * Returns the timespec representation of the nsec parameter.
639 */ 639 */
640inline struct timespec ns_to_timespec(const nsec_t nsec) 640struct timespec ns_to_timespec(const nsec_t nsec)
641{ 641{
642 struct timespec ts; 642 struct timespec ts;
643 643
644 if (nsec) 644 if (!nsec)
645 ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, 645 return (struct timespec) {0, 0};
646 &ts.tv_nsec); 646
647 else 647 ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
648 ts.tv_sec = ts.tv_nsec = 0; 648 if (unlikely(nsec < 0))
649 set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
649 650
650 return ts; 651 return ts;
651} 652}
diff --git a/kernel/timer.c b/kernel/timer.c
index 4f1cb0ab52..bf7c4193b9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -489,13 +489,25 @@ unsigned long next_timer_interrupt(void)
489 struct list_head *list; 489 struct list_head *list;
490 struct timer_list *nte; 490 struct timer_list *nte;
491 unsigned long expires; 491 unsigned long expires;
492 unsigned long hr_expires = MAX_JIFFY_OFFSET;
493 ktime_t hr_delta;
492 tvec_t *varray[4]; 494 tvec_t *varray[4];
493 int i, j; 495 int i, j;
494 496
497 hr_delta = hrtimer_get_next_event();
498 if (hr_delta.tv64 != KTIME_MAX) {
499 struct timespec tsdelta;
500 tsdelta = ktime_to_timespec(hr_delta);
501 hr_expires = timespec_to_jiffies(&tsdelta);
502 if (hr_expires < 3)
503 return hr_expires + jiffies;
504 }
505 hr_expires += jiffies;
506
495 base = &__get_cpu_var(tvec_bases); 507 base = &__get_cpu_var(tvec_bases);
496 spin_lock(&base->t_base.lock); 508 spin_lock(&base->t_base.lock);
497 expires = base->timer_jiffies + (LONG_MAX >> 1); 509 expires = base->timer_jiffies + (LONG_MAX >> 1);
498 list = 0; 510 list = NULL;
499 511
500 /* Look for timer events in tv1. */ 512 /* Look for timer events in tv1. */
501 j = base->timer_jiffies & TVR_MASK; 513 j = base->timer_jiffies & TVR_MASK;
@@ -542,6 +554,10 @@ found:
542 } 554 }
543 } 555 }
544 spin_unlock(&base->t_base.lock); 556 spin_unlock(&base->t_base.lock);
557
558 if (time_before(hr_expires, expires))
559 return hr_expires;
560
545 return expires; 561 return expires;
546} 562}
547#endif 563#endif
@@ -717,12 +733,16 @@ static void second_overflow(void)
717#endif 733#endif
718} 734}
719 735
720/* in the NTP reference this is called "hardclock()" */ 736/*
721static void update_wall_time_one_tick(void) 737 * Returns how many microseconds we need to add to xtime this tick
738 * in doing an adjustment requested with adjtime.
739 */
740static long adjtime_adjustment(void)
722{ 741{
723 long time_adjust_step, delta_nsec; 742 long time_adjust_step;
724 743
725 if ((time_adjust_step = time_adjust) != 0 ) { 744 time_adjust_step = time_adjust;
745 if (time_adjust_step) {
726 /* 746 /*
727 * We are doing an adjtime thing. Prepare time_adjust_step to 747 * We are doing an adjtime thing. Prepare time_adjust_step to
728 * be within bounds. Note that a positive time_adjust means we 748 * be within bounds. Note that a positive time_adjust means we
@@ -733,10 +753,19 @@ static void update_wall_time_one_tick(void)
733 */ 753 */
734 time_adjust_step = min(time_adjust_step, (long)tickadj); 754 time_adjust_step = min(time_adjust_step, (long)tickadj);
735 time_adjust_step = max(time_adjust_step, (long)-tickadj); 755 time_adjust_step = max(time_adjust_step, (long)-tickadj);
756 }
757 return time_adjust_step;
758}
736 759
760/* in the NTP reference this is called "hardclock()" */
761static void update_wall_time_one_tick(void)
762{
763 long time_adjust_step, delta_nsec;
764
765 time_adjust_step = adjtime_adjustment();
766 if (time_adjust_step)
737 /* Reduce by this step the amount of time left */ 767 /* Reduce by this step the amount of time left */
738 time_adjust -= time_adjust_step; 768 time_adjust -= time_adjust_step;
739 }
740 delta_nsec = tick_nsec + time_adjust_step * 1000; 769 delta_nsec = tick_nsec + time_adjust_step * 1000;
741 /* 770 /*
742 * Advance the phase, once it gets to one microsecond, then 771 * Advance the phase, once it gets to one microsecond, then
@@ -759,6 +788,22 @@ static void update_wall_time_one_tick(void)
759} 788}
760 789
761/* 790/*
791 * Return how long ticks are at the moment, that is, how much time
792 * update_wall_time_one_tick will add to xtime next time we call it
793 * (assuming no calls to do_adjtimex in the meantime).
794 * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
795 * bits to the right of the binary point.
796 * This function has no side-effects.
797 */
798u64 current_tick_length(void)
799{
800 long delta_nsec;
801
802 delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
803 return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
804}
805
806/*
762 * Using a loop looks inefficient, but "ticks" is 807 * Using a loop looks inefficient, but "ticks" is
763 * usually just one (we shouldn't be losing ticks, 808 * usually just one (we shouldn't be losing ticks,
764 * we're doing this this way mainly for interrupt 809 * we're doing this this way mainly for interrupt
@@ -896,6 +941,8 @@ static inline void update_times(void)
896void do_timer(struct pt_regs *regs) 941void do_timer(struct pt_regs *regs)
897{ 942{
898 jiffies_64++; 943 jiffies_64++;
944 /* prevent loading jiffies before storing new jiffies_64 value. */
945 barrier();
899 update_times(); 946 update_times();
900 softlockup_tick(regs); 947 softlockup_tick(regs);
901} 948}
@@ -1322,10 +1369,10 @@ static inline u64 time_interpolator_get_cycles(unsigned int src)
1322 return x(); 1369 return x();
1323 1370
1324 case TIME_SOURCE_MMIO64 : 1371 case TIME_SOURCE_MMIO64 :
1325 return readq((void __iomem *) time_interpolator->addr); 1372 return readq_relaxed((void __iomem *)time_interpolator->addr);
1326 1373
1327 case TIME_SOURCE_MMIO32 : 1374 case TIME_SOURCE_MMIO32 :
1328 return readl((void __iomem *) time_interpolator->addr); 1375 return readl_relaxed((void __iomem *)time_interpolator->addr);
1329 1376
1330 default: return get_cycles(); 1377 default: return get_cycles();
1331 } 1378 }
diff --git a/kernel/user.c b/kernel/user.c
index 89e562feb1..d9deae43a9 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/key.h> 15#include <linux/key.h>
16#include <linux/interrupt.h>
16 17
17/* 18/*
18 * UID task count cache, to get fast user lookup in "alloc_uid" 19 * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -27,6 +28,16 @@
27 28
28static kmem_cache_t *uid_cachep; 29static kmem_cache_t *uid_cachep;
29static struct list_head uidhash_table[UIDHASH_SZ]; 30static struct list_head uidhash_table[UIDHASH_SZ];
31
32/*
33 * The uidhash_lock is mostly taken from process context, but it is
34 * occasionally also taken from softirq/tasklet context, when
35 * task-structs get RCU-freed. Hence all locking must be softirq-safe.
36 * But free_uid() is also called with local interrupts disabled, and running
37 * local_bh_enable() with local interrupts disabled is an error - we'll run
38 * softirq callbacks, and they can unconditionally enable interrupts, and
39 * the caller of free_uid() didn't expect that..
40 */
30static DEFINE_SPINLOCK(uidhash_lock); 41static DEFINE_SPINLOCK(uidhash_lock);
31 42
32struct user_struct root_user = { 43struct user_struct root_user = {
@@ -82,15 +93,19 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has
82struct user_struct *find_user(uid_t uid) 93struct user_struct *find_user(uid_t uid)
83{ 94{
84 struct user_struct *ret; 95 struct user_struct *ret;
96 unsigned long flags;
85 97
86 spin_lock(&uidhash_lock); 98 spin_lock_irqsave(&uidhash_lock, flags);
87 ret = uid_hash_find(uid, uidhashentry(uid)); 99 ret = uid_hash_find(uid, uidhashentry(uid));
88 spin_unlock(&uidhash_lock); 100 spin_unlock_irqrestore(&uidhash_lock, flags);
89 return ret; 101 return ret;
90} 102}
91 103
92void free_uid(struct user_struct *up) 104void free_uid(struct user_struct *up)
93{ 105{
106 unsigned long flags;
107
108 local_irq_save(flags);
94 if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { 109 if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
95 uid_hash_remove(up); 110 uid_hash_remove(up);
96 key_put(up->uid_keyring); 111 key_put(up->uid_keyring);
@@ -98,6 +113,7 @@ void free_uid(struct user_struct *up)
98 kmem_cache_free(uid_cachep, up); 113 kmem_cache_free(uid_cachep, up);
99 spin_unlock(&uidhash_lock); 114 spin_unlock(&uidhash_lock);
100 } 115 }
116 local_irq_restore(flags);
101} 117}
102 118
103struct user_struct * alloc_uid(uid_t uid) 119struct user_struct * alloc_uid(uid_t uid)
@@ -105,9 +121,9 @@ struct user_struct * alloc_uid(uid_t uid)
105 struct list_head *hashent = uidhashentry(uid); 121 struct list_head *hashent = uidhashentry(uid);
106 struct user_struct *up; 122 struct user_struct *up;
107 123
108 spin_lock(&uidhash_lock); 124 spin_lock_irq(&uidhash_lock);
109 up = uid_hash_find(uid, hashent); 125 up = uid_hash_find(uid, hashent);
110 spin_unlock(&uidhash_lock); 126 spin_unlock_irq(&uidhash_lock);
111 127
112 if (!up) { 128 if (!up) {
113 struct user_struct *new; 129 struct user_struct *new;
@@ -137,7 +153,7 @@ struct user_struct * alloc_uid(uid_t uid)
137 * Before adding this, check whether we raced 153 * Before adding this, check whether we raced
138 * on adding the same user already.. 154 * on adding the same user already..
139 */ 155 */
140 spin_lock(&uidhash_lock); 156 spin_lock_irq(&uidhash_lock);
141 up = uid_hash_find(uid, hashent); 157 up = uid_hash_find(uid, hashent);
142 if (up) { 158 if (up) {
143 key_put(new->uid_keyring); 159 key_put(new->uid_keyring);
@@ -147,7 +163,7 @@ struct user_struct * alloc_uid(uid_t uid)
147 uid_hash_insert(new, hashent); 163 uid_hash_insert(new, hashent);
148 up = new; 164 up = new;
149 } 165 }
150 spin_unlock(&uidhash_lock); 166 spin_unlock_irq(&uidhash_lock);
151 167
152 } 168 }
153 return up; 169 return up;
@@ -183,9 +199,9 @@ static int __init uid_cache_init(void)
183 INIT_LIST_HEAD(uidhash_table + n); 199 INIT_LIST_HEAD(uidhash_table + n);
184 200
185 /* Insert the root user immediately (init already runs as root) */ 201 /* Insert the root user immediately (init already runs as root) */
186 spin_lock(&uidhash_lock); 202 spin_lock_irq(&uidhash_lock);
187 uid_hash_insert(&root_user, uidhashentry(0)); 203 uid_hash_insert(&root_user, uidhashentry(0));
188 spin_unlock(&uidhash_lock); 204 spin_unlock_irq(&uidhash_lock);
189 205
190 return 0; 206 return 0;
191} 207}