aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/compat.c1
-rw-r--r--kernel/cpuset.c37
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c405
-rw-r--r--kernel/hrtimer.c83
-rw-r--r--kernel/intermodule.c3
-rw-r--r--kernel/itimer.c11
-rw-r--r--kernel/kprobes.c36
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/posix-timers.c53
-rw-r--r--kernel/power/console.c16
-rw-r--r--kernel/power/disk.c15
-rw-r--r--kernel/power/main.c4
-rw-r--r--kernel/power/power.h15
-rw-r--r--kernel/power/snapshot.c4
-rw-r--r--kernel/power/swsusp.c18
-rw-r--r--kernel/ptrace.c28
-rw-r--r--kernel/rcutorture.c10
-rw-r--r--kernel/sched.c176
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/sys.c27
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c32
-rw-r--r--kernel/time.c15
-rw-r--r--kernel/timer.c41
-rw-r--r--kernel/user.c32
28 files changed, 682 insertions, 409 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 685c25175d96..d7e7e637b92a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -841,7 +841,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
841 841
842 for (aux = context->aux; aux; aux = aux->next) { 842 for (aux = context->aux; aux; aux = aux->next) {
843 843
844 ab = audit_log_start(context, GFP_KERNEL, aux->type); 844 ab = audit_log_start(context, gfp_mask, aux->type);
845 if (!ab) 845 if (!ab)
846 continue; /* audit_panic has been called */ 846 continue; /* audit_panic has been called */
847 847
@@ -878,14 +878,14 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
878 } 878 }
879 879
880 if (context->pwd && context->pwdmnt) { 880 if (context->pwd && context->pwdmnt) {
881 ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); 881 ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
882 if (ab) { 882 if (ab) {
883 audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); 883 audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
884 audit_log_end(ab); 884 audit_log_end(ab);
885 } 885 }
886 } 886 }
887 for (i = 0; i < context->name_count; i++) { 887 for (i = 0; i < context->name_count; i++) {
888 ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); 888 ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
889 if (!ab) 889 if (!ab)
890 continue; /* audit_panic has been called */ 890 continue; /* audit_panic has been called */
891 891
diff --git a/kernel/compat.c b/kernel/compat.c
index 1867290c37e3..8c9cd88b6785 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,7 +23,6 @@
23#include <linux/security.h> 23#include <linux/security.h>
24 24
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/bug.h>
27 26
28int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) 27int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
29{ 28{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fe2f71f92ae0..12815d3f1a05 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -641,7 +641,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
641 * task has been modifying its cpuset. 641 * task has been modifying its cpuset.
642 */ 642 */
643 643
644void cpuset_update_task_memory_state() 644void cpuset_update_task_memory_state(void)
645{ 645{
646 int my_cpusets_mem_gen; 646 int my_cpusets_mem_gen;
647 struct task_struct *tsk = current; 647 struct task_struct *tsk = current;
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child)
1977 * We don't need to task_lock() this reference to tsk->cpuset, 1977 * We don't need to task_lock() this reference to tsk->cpuset,
1978 * because tsk is already marked PF_EXITING, so attach_task() won't 1978 * because tsk is already marked PF_EXITING, so attach_task() won't
1979 * mess with it, or task is a failed fork, never visible to attach_task. 1979 * mess with it, or task is a failed fork, never visible to attach_task.
1980 *
1981 * Hack:
1982 *
1983 * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
1984 *
1985 * Don't leave a task unable to allocate memory, as that is an
1986 * accident waiting to happen should someone add a callout in
1987 * do_exit() after the cpuset_exit() call that might allocate.
1988 * If a task tries to allocate memory with an invalid cpuset,
1989 * it will oops in cpuset_update_task_memory_state().
1990 *
1991 * We call cpuset_exit() while the task is still competent to
1992 * handle notify_on_release(), then leave the task attached to
1993 * the root cpuset (top_cpuset) for the remainder of its exit.
1994 *
1995 * To do this properly, we would increment the reference count on
1996 * top_cpuset, and near the very end of the kernel/exit.c do_exit()
1997 * code we would add a second cpuset function call, to drop that
1998 * reference. This would just create an unnecessary hot spot on
1999 * the top_cpuset reference count, to no avail.
2000 *
2001 * Normally, holding a reference to a cpuset without bumping its
2002 * count is unsafe. The cpuset could go away, or someone could
2003 * attach us to a different cpuset, decrementing the count on
2004 * the first cpuset that we never incremented. But in this case,
2005 * top_cpuset isn't going away, and either task has PF_EXITING set,
2006 * which wards off any attach_task() attempts, or task is a failed
2007 * fork, never visible to attach_task.
2008 *
2009 * Another way to do this would be to set the cpuset pointer
2010 * to NULL here, and check in cpuset_update_task_memory_state()
2011 * for a NULL pointer. This hack avoids that NULL check, for no
2012 * cost (other than this way too long comment ;).
1980 **/ 2013 **/
1981 2014
1982void cpuset_exit(struct task_struct *tsk) 2015void cpuset_exit(struct task_struct *tsk)
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk)
1984 struct cpuset *cs; 2017 struct cpuset *cs;
1985 2018
1986 cs = tsk->cpuset; 2019 cs = tsk->cpuset;
1987 tsk->cpuset = NULL; 2020 tsk->cpuset = &top_cpuset; /* Hack - see comment above */
1988 2021
1989 if (notify_on_release(cs)) { 2022 if (notify_on_release(cs)) {
1990 char *pathbuf = NULL; 2023 char *pathbuf = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 93cee3671332..531aadca5530 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -360,6 +360,9 @@ void daemonize(const char *name, ...)
360 fs = init_task.fs; 360 fs = init_task.fs;
361 current->fs = fs; 361 current->fs = fs;
362 atomic_inc(&fs->count); 362 atomic_inc(&fs->count);
363 exit_namespace(current);
364 current->namespace = init_task.namespace;
365 get_namespace(current->namespace);
363 exit_files(current); 366 exit_files(current);
364 current->files = init_task.files; 367 current->files = init_task.files;
365 atomic_inc(&current->files->count); 368 atomic_inc(&current->files->count);
diff --git a/kernel/fork.c b/kernel/fork.c
index 4ae8cfc1c89c..fbea12d7a943 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -446,6 +446,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
446 } 446 }
447} 447}
448 448
449/*
450 * Allocate a new mm structure and copy contents from the
451 * mm structure of the passed in task structure.
452 */
453static struct mm_struct *dup_mm(struct task_struct *tsk)
454{
455 struct mm_struct *mm, *oldmm = current->mm;
456 int err;
457
458 if (!oldmm)
459 return NULL;
460
461 mm = allocate_mm();
462 if (!mm)
463 goto fail_nomem;
464
465 memcpy(mm, oldmm, sizeof(*mm));
466
467 if (!mm_init(mm))
468 goto fail_nomem;
469
470 if (init_new_context(tsk, mm))
471 goto fail_nocontext;
472
473 err = dup_mmap(mm, oldmm);
474 if (err)
475 goto free_pt;
476
477 mm->hiwater_rss = get_mm_rss(mm);
478 mm->hiwater_vm = mm->total_vm;
479
480 return mm;
481
482free_pt:
483 mmput(mm);
484
485fail_nomem:
486 return NULL;
487
488fail_nocontext:
489 /*
490 * If init_new_context() failed, we cannot use mmput() to free the mm
491 * because it calls destroy_context()
492 */
493 mm_free_pgd(mm);
494 free_mm(mm);
495 return NULL;
496}
497
449static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 498static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
450{ 499{
451 struct mm_struct * mm, *oldmm; 500 struct mm_struct * mm, *oldmm;
@@ -473,43 +522,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
473 } 522 }
474 523
475 retval = -ENOMEM; 524 retval = -ENOMEM;
476 mm = allocate_mm(); 525 mm = dup_mm(tsk);
477 if (!mm) 526 if (!mm)
478 goto fail_nomem; 527 goto fail_nomem;
479 528
480 /* Copy the current MM stuff.. */
481 memcpy(mm, oldmm, sizeof(*mm));
482 if (!mm_init(mm))
483 goto fail_nomem;
484
485 if (init_new_context(tsk,mm))
486 goto fail_nocontext;
487
488 retval = dup_mmap(mm, oldmm);
489 if (retval)
490 goto free_pt;
491
492 mm->hiwater_rss = get_mm_rss(mm);
493 mm->hiwater_vm = mm->total_vm;
494
495good_mm: 529good_mm:
496 tsk->mm = mm; 530 tsk->mm = mm;
497 tsk->active_mm = mm; 531 tsk->active_mm = mm;
498 return 0; 532 return 0;
499 533
500free_pt:
501 mmput(mm);
502fail_nomem: 534fail_nomem:
503 return retval; 535 return retval;
504
505fail_nocontext:
506 /*
507 * If init_new_context() failed, we cannot use mmput() to free the mm
508 * because it calls destroy_context()
509 */
510 mm_free_pgd(mm);
511 free_mm(mm);
512 return retval;
513} 536}
514 537
515static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 538static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -597,32 +620,17 @@ out:
597 return newf; 620 return newf;
598} 621}
599 622
600static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 623/*
624 * Allocate a new files structure and copy contents from the
625 * passed in files structure.
626 */
627static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
601{ 628{
602 struct files_struct *oldf, *newf; 629 struct files_struct *newf;
603 struct file **old_fds, **new_fds; 630 struct file **old_fds, **new_fds;
604 int open_files, size, i, error = 0, expand; 631 int open_files, size, i, expand;
605 struct fdtable *old_fdt, *new_fdt; 632 struct fdtable *old_fdt, *new_fdt;
606 633
607 /*
608 * A background process may not have any files ...
609 */
610 oldf = current->files;
611 if (!oldf)
612 goto out;
613
614 if (clone_flags & CLONE_FILES) {
615 atomic_inc(&oldf->count);
616 goto out;
617 }
618
619 /*
620 * Note: we may be using current for both targets (See exec.c)
621 * This works because we cache current->files (old) as oldf. Don't
622 * break this.
623 */
624 tsk->files = NULL;
625 error = -ENOMEM;
626 newf = alloc_files(); 634 newf = alloc_files();
627 if (!newf) 635 if (!newf)
628 goto out; 636 goto out;
@@ -651,9 +659,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
651 if (expand) { 659 if (expand) {
652 spin_unlock(&oldf->file_lock); 660 spin_unlock(&oldf->file_lock);
653 spin_lock(&newf->file_lock); 661 spin_lock(&newf->file_lock);
654 error = expand_files(newf, open_files-1); 662 *errorp = expand_files(newf, open_files-1);
655 spin_unlock(&newf->file_lock); 663 spin_unlock(&newf->file_lock);
656 if (error < 0) 664 if (*errorp < 0)
657 goto out_release; 665 goto out_release;
658 new_fdt = files_fdtable(newf); 666 new_fdt = files_fdtable(newf);
659 /* 667 /*
@@ -702,10 +710,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
702 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 710 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
703 } 711 }
704 712
705 tsk->files = newf;
706 error = 0;
707out: 713out:
708 return error; 714 return newf;
709 715
710out_release: 716out_release:
711 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); 717 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
@@ -715,6 +721,40 @@ out_release:
715 goto out; 721 goto out;
716} 722}
717 723
724static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
725{
726 struct files_struct *oldf, *newf;
727 int error = 0;
728
729 /*
730 * A background process may not have any files ...
731 */
732 oldf = current->files;
733 if (!oldf)
734 goto out;
735
736 if (clone_flags & CLONE_FILES) {
737 atomic_inc(&oldf->count);
738 goto out;
739 }
740
741 /*
742 * Note: we may be using current for both targets (See exec.c)
743 * This works because we cache current->files (old) as oldf. Don't
744 * break this.
745 */
746 tsk->files = NULL;
747 error = -ENOMEM;
748 newf = dup_fd(oldf, &error);
749 if (!newf)
750 goto out;
751
752 tsk->files = newf;
753 error = 0;
754out:
755 return error;
756}
757
718/* 758/*
719 * Helper to unshare the files of the current task. 759 * Helper to unshare the files of the current task.
720 * We don't want to expose copy_files internals to 760 * We don't want to expose copy_files internals to
@@ -802,7 +842,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
802 init_sigpending(&sig->shared_pending); 842 init_sigpending(&sig->shared_pending);
803 INIT_LIST_HEAD(&sig->posix_timers); 843 INIT_LIST_HEAD(&sig->posix_timers);
804 844
805 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); 845 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
806 sig->it_real_incr.tv64 = 0; 846 sig->it_real_incr.tv64 = 0;
807 sig->real_timer.function = it_real_fn; 847 sig->real_timer.function = it_real_fn;
808 sig->real_timer.data = tsk; 848 sig->real_timer.data = tsk;
@@ -1083,8 +1123,8 @@ static task_t *copy_process(unsigned long clone_flags,
1083 p->real_parent = current; 1123 p->real_parent = current;
1084 p->parent = p->real_parent; 1124 p->parent = p->real_parent;
1085 1125
1126 spin_lock(&current->sighand->siglock);
1086 if (clone_flags & CLONE_THREAD) { 1127 if (clone_flags & CLONE_THREAD) {
1087 spin_lock(&current->sighand->siglock);
1088 /* 1128 /*
1089 * Important: if an exit-all has been started then 1129 * Important: if an exit-all has been started then
1090 * do not create this new thread - the whole thread 1130 * do not create this new thread - the whole thread
@@ -1122,8 +1162,6 @@ static task_t *copy_process(unsigned long clone_flags,
1122 */ 1162 */
1123 p->it_prof_expires = jiffies_to_cputime(1); 1163 p->it_prof_expires = jiffies_to_cputime(1);
1124 } 1164 }
1125
1126 spin_unlock(&current->sighand->siglock);
1127 } 1165 }
1128 1166
1129 /* 1167 /*
@@ -1135,8 +1173,6 @@ static task_t *copy_process(unsigned long clone_flags,
1135 if (unlikely(p->ptrace & PT_PTRACED)) 1173 if (unlikely(p->ptrace & PT_PTRACED))
1136 __ptrace_link(p, current->parent); 1174 __ptrace_link(p, current->parent);
1137 1175
1138 attach_pid(p, PIDTYPE_PID, p->pid);
1139 attach_pid(p, PIDTYPE_TGID, p->tgid);
1140 if (thread_group_leader(p)) { 1176 if (thread_group_leader(p)) {
1141 p->signal->tty = current->signal->tty; 1177 p->signal->tty = current->signal->tty;
1142 p->signal->pgrp = process_group(current); 1178 p->signal->pgrp = process_group(current);
@@ -1146,9 +1182,12 @@ static task_t *copy_process(unsigned long clone_flags,
1146 if (p->pid) 1182 if (p->pid)
1147 __get_cpu_var(process_counts)++; 1183 __get_cpu_var(process_counts)++;
1148 } 1184 }
1185 attach_pid(p, PIDTYPE_TGID, p->tgid);
1186 attach_pid(p, PIDTYPE_PID, p->pid);
1149 1187
1150 nr_threads++; 1188 nr_threads++;
1151 total_forks++; 1189 total_forks++;
1190 spin_unlock(&current->sighand->siglock);
1152 write_unlock_irq(&tasklist_lock); 1191 write_unlock_irq(&tasklist_lock);
1153 proc_fork_connector(p); 1192 proc_fork_connector(p);
1154 return p; 1193 return p;
@@ -1323,3 +1362,249 @@ void __init proc_caches_init(void)
1323 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1362 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1324 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1363 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1325} 1364}
1365
1366
1367/*
1368 * Check constraints on flags passed to the unshare system call and
1369 * force unsharing of additional process context as appropriate.
1370 */
1371static inline void check_unshare_flags(unsigned long *flags_ptr)
1372{
1373 /*
1374 * If unsharing a thread from a thread group, must also
1375 * unshare vm.
1376 */
1377 if (*flags_ptr & CLONE_THREAD)
1378 *flags_ptr |= CLONE_VM;
1379
1380 /*
1381 * If unsharing vm, must also unshare signal handlers.
1382 */
1383 if (*flags_ptr & CLONE_VM)
1384 *flags_ptr |= CLONE_SIGHAND;
1385
1386 /*
1387 * If unsharing signal handlers and the task was created
1388 * using CLONE_THREAD, then must unshare the thread
1389 */
1390 if ((*flags_ptr & CLONE_SIGHAND) &&
1391 (atomic_read(&current->signal->count) > 1))
1392 *flags_ptr |= CLONE_THREAD;
1393
1394 /*
1395 * If unsharing namespace, must also unshare filesystem information.
1396 */
1397 if (*flags_ptr & CLONE_NEWNS)
1398 *flags_ptr |= CLONE_FS;
1399}
1400
1401/*
1402 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1403 */
1404static int unshare_thread(unsigned long unshare_flags)
1405{
1406 if (unshare_flags & CLONE_THREAD)
1407 return -EINVAL;
1408
1409 return 0;
1410}
1411
1412/*
1413 * Unshare the filesystem structure if it is being shared
1414 */
1415static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1416{
1417 struct fs_struct *fs = current->fs;
1418
1419 if ((unshare_flags & CLONE_FS) &&
1420 (fs && atomic_read(&fs->count) > 1)) {
1421 *new_fsp = __copy_fs_struct(current->fs);
1422 if (!*new_fsp)
1423 return -ENOMEM;
1424 }
1425
1426 return 0;
1427}
1428
1429/*
1430 * Unshare the namespace structure if it is being shared
1431 */
1432static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
1433{
1434 struct namespace *ns = current->namespace;
1435
1436 if ((unshare_flags & CLONE_NEWNS) &&
1437 (ns && atomic_read(&ns->count) > 1)) {
1438 if (!capable(CAP_SYS_ADMIN))
1439 return -EPERM;
1440
1441 *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
1442 if (!*new_nsp)
1443 return -ENOMEM;
1444 }
1445
1446 return 0;
1447}
1448
1449/*
1450 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
1451 * supported yet
1452 */
1453static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1454{
1455 struct sighand_struct *sigh = current->sighand;
1456
1457 if ((unshare_flags & CLONE_SIGHAND) &&
1458 (sigh && atomic_read(&sigh->count) > 1))
1459 return -EINVAL;
1460 else
1461 return 0;
1462}
1463
1464/*
1465 * Unshare vm if it is being shared
1466 */
1467static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1468{
1469 struct mm_struct *mm = current->mm;
1470
1471 if ((unshare_flags & CLONE_VM) &&
1472 (mm && atomic_read(&mm->mm_users) > 1)) {
1473 *new_mmp = dup_mm(current);
1474 if (!*new_mmp)
1475 return -ENOMEM;
1476 }
1477
1478 return 0;
1479}
1480
1481/*
1482 * Unshare file descriptor table if it is being shared
1483 */
1484static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1485{
1486 struct files_struct *fd = current->files;
1487 int error = 0;
1488
1489 if ((unshare_flags & CLONE_FILES) &&
1490 (fd && atomic_read(&fd->count) > 1)) {
1491 *new_fdp = dup_fd(fd, &error);
1492 if (!*new_fdp)
1493 return error;
1494 }
1495
1496 return 0;
1497}
1498
1499/*
1500 * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
1501 * supported yet
1502 */
1503static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
1504{
1505 if (unshare_flags & CLONE_SYSVSEM)
1506 return -EINVAL;
1507
1508 return 0;
1509}
1510
1511/*
1512 * unshare allows a process to 'unshare' part of the process
1513 * context which was originally shared using clone. copy_*
1514 * functions used by do_fork() cannot be used here directly
1515 * because they modify an inactive task_struct that is being
1516 * constructed. Here we are modifying the current, active,
1517 * task_struct.
1518 */
1519asmlinkage long sys_unshare(unsigned long unshare_flags)
1520{
1521 int err = 0;
1522 struct fs_struct *fs, *new_fs = NULL;
1523 struct namespace *ns, *new_ns = NULL;
1524 struct sighand_struct *sigh, *new_sigh = NULL;
1525 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1526 struct files_struct *fd, *new_fd = NULL;
1527 struct sem_undo_list *new_ulist = NULL;
1528
1529 check_unshare_flags(&unshare_flags);
1530
1531 if ((err = unshare_thread(unshare_flags)))
1532 goto bad_unshare_out;
1533 if ((err = unshare_fs(unshare_flags, &new_fs)))
1534 goto bad_unshare_cleanup_thread;
1535 if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
1536 goto bad_unshare_cleanup_fs;
1537 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1538 goto bad_unshare_cleanup_ns;
1539 if ((err = unshare_vm(unshare_flags, &new_mm)))
1540 goto bad_unshare_cleanup_sigh;
1541 if ((err = unshare_fd(unshare_flags, &new_fd)))
1542 goto bad_unshare_cleanup_vm;
1543 if ((err = unshare_semundo(unshare_flags, &new_ulist)))
1544 goto bad_unshare_cleanup_fd;
1545
1546 if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
1547
1548 task_lock(current);
1549
1550 if (new_fs) {
1551 fs = current->fs;
1552 current->fs = new_fs;
1553 new_fs = fs;
1554 }
1555
1556 if (new_ns) {
1557 ns = current->namespace;
1558 current->namespace = new_ns;
1559 new_ns = ns;
1560 }
1561
1562 if (new_sigh) {
1563 sigh = current->sighand;
1564 current->sighand = new_sigh;
1565 new_sigh = sigh;
1566 }
1567
1568 if (new_mm) {
1569 mm = current->mm;
1570 active_mm = current->active_mm;
1571 current->mm = new_mm;
1572 current->active_mm = new_mm;
1573 activate_mm(active_mm, new_mm);
1574 new_mm = mm;
1575 }
1576
1577 if (new_fd) {
1578 fd = current->files;
1579 current->files = new_fd;
1580 new_fd = fd;
1581 }
1582
1583 task_unlock(current);
1584 }
1585
1586bad_unshare_cleanup_fd:
1587 if (new_fd)
1588 put_files_struct(new_fd);
1589
1590bad_unshare_cleanup_vm:
1591 if (new_mm)
1592 mmput(new_mm);
1593
1594bad_unshare_cleanup_sigh:
1595 if (new_sigh)
1596 if (atomic_dec_and_test(&new_sigh->count))
1597 kmem_cache_free(sighand_cachep, new_sigh);
1598
1599bad_unshare_cleanup_ns:
1600 if (new_ns)
1601 put_namespace(new_ns);
1602
1603bad_unshare_cleanup_fs:
1604 if (new_fs)
1605 put_fs_struct(new_fs);
1606
1607bad_unshare_cleanup_thread:
1608bad_unshare_out:
1609 return err;
1610}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f1c4155b49ac..5ae51f1bc7c8 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -21,6 +21,12 @@
21 * Credits: 21 * Credits:
22 * based on kernel/timer.c 22 * based on kernel/timer.c
23 * 23 *
24 * Help, testing, suggestions, bugfixes, improvements were
25 * provided by:
26 *
27 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
28 * et. al.
29 *
24 * For licencing details see kernel-base/COPYING 30 * For licencing details see kernel-base/COPYING
25 */ 31 */
26 32
@@ -66,6 +72,12 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
66 72
67/* 73/*
68 * The timer bases: 74 * The timer bases:
75 *
76 * Note: If we want to add new timer bases, we have to skip the two
77 * clock ids captured by the cpu-timers. We do this by holding empty
78 * entries rather than doing math adjustment of the clock ids.
79 * This ensures that we capture erroneous accesses to these clock ids
80 * rather than moving them into the range of valid clock id's.
69 */ 81 */
70 82
71#define MAX_HRTIMER_BASES 2 83#define MAX_HRTIMER_BASES 2
@@ -406,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
406 /* Switch the timer base, if necessary: */ 418 /* Switch the timer base, if necessary: */
407 new_base = switch_hrtimer_base(timer, base); 419 new_base = switch_hrtimer_base(timer, base);
408 420
409 if (mode == HRTIMER_REL) 421 if (mode == HRTIMER_REL) {
410 tim = ktime_add(tim, new_base->get_time()); 422 tim = ktime_add(tim, new_base->get_time());
423 /*
424 * CONFIG_TIME_LOW_RES is a temporary way for architectures
425 * to signal that they simply return xtime in
426 * do_gettimeoffset(). In this case we want to round up by
427 * resolution when starting a relative timer, to avoid short
428 * timeouts. This will go away with the GTOD framework.
429 */
430#ifdef CONFIG_TIME_LOW_RES
431 tim = ktime_add(tim, base->resolution);
432#endif
433 }
411 timer->expires = tim; 434 timer->expires = tim;
412 435
413 enqueue_hrtimer(timer, new_base); 436 enqueue_hrtimer(timer, new_base);
@@ -483,29 +506,25 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
483} 506}
484 507
485/** 508/**
486 * hrtimer_rebase - rebase an initialized hrtimer to a different base 509 * hrtimer_init - initialize a timer to the given clock
487 * 510 *
488 * @timer: the timer to be rebased 511 * @timer: the timer to be initialized
489 * @clock_id: the clock to be used 512 * @clock_id: the clock to be used
513 * @mode: timer mode abs/rel
490 */ 514 */
491void hrtimer_rebase(struct hrtimer *timer, const clockid_t clock_id) 515void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
516 enum hrtimer_mode mode)
492{ 517{
493 struct hrtimer_base *bases; 518 struct hrtimer_base *bases;
494 519
520 memset(timer, 0, sizeof(struct hrtimer));
521
495 bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); 522 bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
496 timer->base = &bases[clock_id];
497}
498 523
499/** 524 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS)
500 * hrtimer_init - initialize a timer to the given clock 525 clock_id = CLOCK_MONOTONIC;
501 * 526
502 * @timer: the timer to be initialized 527 timer->base = &bases[clock_id];
503 * @clock_id: the clock to be used
504 */
505void hrtimer_init(struct hrtimer *timer, const clockid_t clock_id)
506{
507 memset(timer, 0, sizeof(struct hrtimer));
508 hrtimer_rebase(timer, clock_id);
509} 528}
510 529
511/** 530/**
@@ -550,6 +569,7 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
550 fn = timer->function; 569 fn = timer->function;
551 data = timer->data; 570 data = timer->data;
552 set_curr_timer(base, timer); 571 set_curr_timer(base, timer);
572 timer->state = HRTIMER_RUNNING;
553 __remove_hrtimer(timer, base); 573 __remove_hrtimer(timer, base);
554 spin_unlock_irq(&base->lock); 574 spin_unlock_irq(&base->lock);
555 575
@@ -565,6 +585,10 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
565 585
566 spin_lock_irq(&base->lock); 586 spin_lock_irq(&base->lock);
567 587
588 /* Another CPU has added back the timer */
589 if (timer->state != HRTIMER_RUNNING)
590 continue;
591
568 if (restart == HRTIMER_RESTART) 592 if (restart == HRTIMER_RESTART)
569 enqueue_hrtimer(timer, base); 593 enqueue_hrtimer(timer, base);
570 else 594 else
@@ -638,8 +662,7 @@ schedule_hrtimer_interruptible(struct hrtimer *timer,
638 return schedule_hrtimer(timer, mode); 662 return schedule_hrtimer(timer, mode);
639} 663}
640 664
641static long __sched 665static long __sched nanosleep_restart(struct restart_block *restart)
642nanosleep_restart(struct restart_block *restart, clockid_t clockid)
643{ 666{
644 struct timespec __user *rmtp; 667 struct timespec __user *rmtp;
645 struct timespec tu; 668 struct timespec tu;
@@ -649,7 +672,7 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
649 672
650 restart->fn = do_no_restart_syscall; 673 restart->fn = do_no_restart_syscall;
651 674
652 hrtimer_init(&timer, clockid); 675 hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
653 676
654 timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; 677 timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
655 678
@@ -669,16 +692,6 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
669 return -ERESTART_RESTARTBLOCK; 692 return -ERESTART_RESTARTBLOCK;
670} 693}
671 694
672static long __sched nanosleep_restart_mono(struct restart_block *restart)
673{
674 return nanosleep_restart(restart, CLOCK_MONOTONIC);
675}
676
677static long __sched nanosleep_restart_real(struct restart_block *restart)
678{
679 return nanosleep_restart(restart, CLOCK_REALTIME);
680}
681
682long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, 695long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
683 const enum hrtimer_mode mode, const clockid_t clockid) 696 const enum hrtimer_mode mode, const clockid_t clockid)
684{ 697{
@@ -687,7 +700,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
687 struct timespec tu; 700 struct timespec tu;
688 ktime_t rem; 701 ktime_t rem;
689 702
690 hrtimer_init(&timer, clockid); 703 hrtimer_init(&timer, clockid, mode);
691 704
692 timer.expires = timespec_to_ktime(*rqtp); 705 timer.expires = timespec_to_ktime(*rqtp);
693 706
@@ -695,7 +708,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
695 if (rem.tv64 <= 0) 708 if (rem.tv64 <= 0)
696 return 0; 709 return 0;
697 710
698 /* Absolute timers do not update the rmtp value: */ 711 /* Absolute timers do not update the rmtp value and restart: */
699 if (mode == HRTIMER_ABS) 712 if (mode == HRTIMER_ABS)
700 return -ERESTARTNOHAND; 713 return -ERESTARTNOHAND;
701 714
@@ -705,11 +718,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
705 return -EFAULT; 718 return -EFAULT;
706 719
707 restart = &current_thread_info()->restart_block; 720 restart = &current_thread_info()->restart_block;
708 restart->fn = (clockid == CLOCK_MONOTONIC) ? 721 restart->fn = nanosleep_restart;
709 nanosleep_restart_mono : nanosleep_restart_real;
710 restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; 722 restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
711 restart->arg1 = timer.expires.tv64 >> 32; 723 restart->arg1 = timer.expires.tv64 >> 32;
712 restart->arg2 = (unsigned long) rmtp; 724 restart->arg2 = (unsigned long) rmtp;
725 restart->arg3 = (unsigned long) timer.base->index;
713 726
714 return -ERESTART_RESTARTBLOCK; 727 return -ERESTART_RESTARTBLOCK;
715} 728}
@@ -736,10 +749,8 @@ static void __devinit init_hrtimers_cpu(int cpu)
736 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); 749 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
737 int i; 750 int i;
738 751
739 for (i = 0; i < MAX_HRTIMER_BASES; i++) { 752 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++)
740 spin_lock_init(&base->lock); 753 spin_lock_init(&base->lock);
741 base++;
742 }
743} 754}
744 755
745#ifdef CONFIG_HOTPLUG_CPU 756#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 0cbe633420fb..55b1e5b85db9 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -179,3 +179,6 @@ EXPORT_SYMBOL(inter_module_register);
179EXPORT_SYMBOL(inter_module_unregister); 179EXPORT_SYMBOL(inter_module_unregister);
180EXPORT_SYMBOL(inter_module_get_request); 180EXPORT_SYMBOL(inter_module_get_request);
181EXPORT_SYMBOL(inter_module_put); 181EXPORT_SYMBOL(inter_module_put);
182
183MODULE_LICENSE("GPL");
184
diff --git a/kernel/itimer.c b/kernel/itimer.c
index c2c05c4ff28d..379be2f8c84c 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -49,9 +49,11 @@ int do_getitimer(int which, struct itimerval *value)
49 49
50 switch (which) { 50 switch (which) {
51 case ITIMER_REAL: 51 case ITIMER_REAL:
52 spin_lock_irq(&tsk->sighand->siglock);
52 value->it_value = itimer_get_remtime(&tsk->signal->real_timer); 53 value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
53 value->it_interval = 54 value->it_interval =
54 ktime_to_timeval(tsk->signal->it_real_incr); 55 ktime_to_timeval(tsk->signal->it_real_incr);
56 spin_unlock_irq(&tsk->sighand->siglock);
55 break; 57 break;
56 case ITIMER_VIRTUAL: 58 case ITIMER_VIRTUAL:
57 read_lock(&tasklist_lock); 59 read_lock(&tasklist_lock);
@@ -150,18 +152,25 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
150 152
151 switch (which) { 153 switch (which) {
152 case ITIMER_REAL: 154 case ITIMER_REAL:
155again:
156 spin_lock_irq(&tsk->sighand->siglock);
153 timer = &tsk->signal->real_timer; 157 timer = &tsk->signal->real_timer;
154 hrtimer_cancel(timer);
155 if (ovalue) { 158 if (ovalue) {
156 ovalue->it_value = itimer_get_remtime(timer); 159 ovalue->it_value = itimer_get_remtime(timer);
157 ovalue->it_interval 160 ovalue->it_interval
158 = ktime_to_timeval(tsk->signal->it_real_incr); 161 = ktime_to_timeval(tsk->signal->it_real_incr);
159 } 162 }
163 /* We are sharing ->siglock with it_real_fn() */
164 if (hrtimer_try_to_cancel(timer) < 0) {
165 spin_unlock_irq(&tsk->sighand->siglock);
166 goto again;
167 }
160 tsk->signal->it_real_incr = 168 tsk->signal->it_real_incr =
161 timeval_to_ktime(value->it_interval); 169 timeval_to_ktime(value->it_interval);
162 expires = timeval_to_ktime(value->it_value); 170 expires = timeval_to_ktime(value->it_value);
163 if (expires.tv64 != 0) 171 if (expires.tv64 != 0)
164 hrtimer_start(timer, expires, HRTIMER_REL); 172 hrtimer_start(timer, expires, HRTIMER_REL);
173 spin_unlock_irq(&tsk->sighand->siglock);
165 break; 174 break;
166 case ITIMER_VIRTUAL: 175 case ITIMER_VIRTUAL:
167 nval = timeval_to_cputime(&value->it_value); 176 nval = timeval_to_cputime(&value->it_value);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3ea6325228da..fef1af8a73ce 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -344,23 +344,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
344 spin_unlock_irqrestore(&kretprobe_lock, flags); 344 spin_unlock_irqrestore(&kretprobe_lock, flags);
345} 345}
346 346
347/*
348 * This kprobe pre_handler is registered with every kretprobe. When probe
349 * hits it will set up the return probe.
350 */
351static int __kprobes pre_handler_kretprobe(struct kprobe *p,
352 struct pt_regs *regs)
353{
354 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
355 unsigned long flags = 0;
356
357 /*TODO: consider to only swap the RA after the last pre_handler fired */
358 spin_lock_irqsave(&kretprobe_lock, flags);
359 arch_prepare_kretprobe(rp, regs);
360 spin_unlock_irqrestore(&kretprobe_lock, flags);
361 return 0;
362}
363
364static inline void free_rp_inst(struct kretprobe *rp) 347static inline void free_rp_inst(struct kretprobe *rp)
365{ 348{
366 struct kretprobe_instance *ri; 349 struct kretprobe_instance *ri;
@@ -578,6 +561,23 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
578 561
579#ifdef ARCH_SUPPORTS_KRETPROBES 562#ifdef ARCH_SUPPORTS_KRETPROBES
580 563
564/*
565 * This kprobe pre_handler is registered with every kretprobe. When probe
566 * hits it will set up the return probe.
567 */
568static int __kprobes pre_handler_kretprobe(struct kprobe *p,
569 struct pt_regs *regs)
570{
571 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
572 unsigned long flags = 0;
573
574 /*TODO: consider to only swap the RA after the last pre_handler fired */
575 spin_lock_irqsave(&kretprobe_lock, flags);
576 arch_prepare_kretprobe(rp, regs);
577 spin_unlock_irqrestore(&kretprobe_lock, flags);
578 return 0;
579}
580
581int __kprobes register_kretprobe(struct kretprobe *rp) 581int __kprobes register_kretprobe(struct kretprobe *rp)
582{ 582{
583 int ret = 0; 583 int ret = 0;
@@ -631,12 +631,12 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp)
631 unregister_kprobe(&rp->kp); 631 unregister_kprobe(&rp->kp);
632 /* No race here */ 632 /* No race here */
633 spin_lock_irqsave(&kretprobe_lock, flags); 633 spin_lock_irqsave(&kretprobe_lock, flags);
634 free_rp_inst(rp);
635 while ((ri = get_used_rp_inst(rp)) != NULL) { 634 while ((ri = get_used_rp_inst(rp)) != NULL) {
636 ri->rp = NULL; 635 ri->rp = NULL;
637 hlist_del(&ri->uflist); 636 hlist_del(&ri->uflist);
638 } 637 }
639 spin_unlock_irqrestore(&kretprobe_lock, flags); 638 spin_unlock_irqrestore(&kretprobe_lock, flags);
639 free_rp_inst(rp);
640} 640}
641 641
642static int __init init_kprobes(void) 642static int __init init_kprobes(void)
diff --git a/kernel/module.c b/kernel/module.c
index 618ed6e23ecc..5aad477ddc79 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1670,6 +1670,9 @@ static struct module *load_module(void __user *umod,
1670 goto free_mod; 1670 goto free_mod;
1671 } 1671 }
1672 1672
1673 /* Userspace could have altered the string after the strlen_user() */
1674 args[arglen - 1] = '\0';
1675
1673 if (find_module(mod->name)) { 1676 if (find_module(mod->name)) {
1674 err = -EEXIST; 1677 err = -EEXIST;
1675 goto free_mod; 1678 goto free_mod;
@@ -2092,7 +2095,8 @@ static unsigned long mod_find_symname(struct module *mod, const char *name)
2092 unsigned int i; 2095 unsigned int i;
2093 2096
2094 for (i = 0; i < mod->num_symtab; i++) 2097 for (i = 0; i < mod->num_symtab; i++)
2095 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0) 2098 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
2099 mod->symtab[i].st_info != 'U')
2096 return mod->symtab[i].st_value; 2100 return mod->symtab[i].st_value;
2097 return 0; 2101 return 0;
2098} 2102}
diff --git a/kernel/panic.c b/kernel/panic.c
index c5c4ab255834..126dc43f1c74 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...)
130#endif 130#endif
131 local_irq_enable(); 131 local_irq_enable();
132 for (i = 0;;) { 132 for (i = 0;;) {
133 touch_softlockup_watchdog();
133 i += panic_blink(i); 134 i += panic_blink(i);
134 mdelay(1); 135 mdelay(1);
135 i++; 136 i++;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 197208b3aa2a..216f574b5ffb 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -194,9 +194,7 @@ static inline int common_clock_set(const clockid_t which_clock,
194 194
195static int common_timer_create(struct k_itimer *new_timer) 195static int common_timer_create(struct k_itimer *new_timer)
196{ 196{
197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); 197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
198 new_timer->it.real.timer.data = new_timer;
199 new_timer->it.real.timer.function = posix_timer_fn;
200 return 0; 198 return 0;
201} 199}
202 200
@@ -290,7 +288,8 @@ void do_schedule_next_timer(struct siginfo *info)
290 info->si_overrun = timr->it_overrun_last; 288 info->si_overrun = timr->it_overrun_last;
291 } 289 }
292 290
293 unlock_timer(timr, flags); 291 if (timr)
292 unlock_timer(timr, flags);
294} 293}
295 294
296int posix_timer_event(struct k_itimer *timr,int si_private) 295int posix_timer_event(struct k_itimer *timr,int si_private)
@@ -692,6 +691,7 @@ common_timer_set(struct k_itimer *timr, int flags,
692 struct itimerspec *new_setting, struct itimerspec *old_setting) 691 struct itimerspec *new_setting, struct itimerspec *old_setting)
693{ 692{
694 struct hrtimer *timer = &timr->it.real.timer; 693 struct hrtimer *timer = &timr->it.real.timer;
694 enum hrtimer_mode mode;
695 695
696 if (old_setting) 696 if (old_setting)
697 common_timer_get(timr, old_setting); 697 common_timer_get(timr, old_setting);
@@ -713,14 +713,10 @@ common_timer_set(struct k_itimer *timr, int flags,
713 if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) 713 if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
714 return 0; 714 return 0;
715 715
716 /* Posix madness. Only absolute CLOCK_REALTIME timers 716 mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL;
717 * are affected by clock sets. So we must reiniatilize 717 hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
718 * the timer. 718 timr->it.real.timer.data = timr;
719 */ 719 timr->it.real.timer.function = posix_timer_fn;
720 if (timr->it_clock == CLOCK_REALTIME && (flags & TIMER_ABSTIME))
721 hrtimer_rebase(timer, CLOCK_REALTIME);
722 else
723 hrtimer_rebase(timer, CLOCK_MONOTONIC);
724 720
725 timer->expires = timespec_to_ktime(new_setting->it_value); 721 timer->expires = timespec_to_ktime(new_setting->it_value);
726 722
@@ -728,11 +724,15 @@ common_timer_set(struct k_itimer *timr, int flags,
728 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); 724 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
729 725
730 /* SIGEV_NONE timers are not queued ! See common_timer_get */ 726 /* SIGEV_NONE timers are not queued ! See common_timer_get */
731 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) 727 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
728 /* Setup correct expiry time for relative timers */
729 if (mode == HRTIMER_REL)
730 timer->expires = ktime_add(timer->expires,
731 timer->base->get_time());
732 return 0; 732 return 0;
733 }
733 734
734 hrtimer_start(timer, timer->expires, (flags & TIMER_ABSTIME) ? 735 hrtimer_start(timer, timer->expires, mode);
735 HRTIMER_ABS : HRTIMER_REL);
736 return 0; 736 return 0;
737} 737}
738 738
@@ -875,12 +875,6 @@ int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
875} 875}
876EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); 876EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
877 877
878int do_posix_clock_notimer_create(struct k_itimer *timer)
879{
880 return -EINVAL;
881}
882EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create);
883
884int do_posix_clock_nonanosleep(const clockid_t clock, int flags, 878int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
885 struct timespec *t, struct timespec __user *r) 879 struct timespec *t, struct timespec __user *r)
886{ 880{
@@ -947,21 +941,8 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp)
947static int common_nsleep(const clockid_t which_clock, int flags, 941static int common_nsleep(const clockid_t which_clock, int flags,
948 struct timespec *tsave, struct timespec __user *rmtp) 942 struct timespec *tsave, struct timespec __user *rmtp)
949{ 943{
950 int mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; 944 return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
951 int clockid = which_clock; 945 HRTIMER_ABS : HRTIMER_REL, which_clock);
952
953 switch (which_clock) {
954 case CLOCK_REALTIME:
955 /* Posix madness. Only absolute timers on clock realtime
956 are affected by clock set. */
957 if (mode != HRTIMER_ABS)
958 clockid = CLOCK_MONOTONIC;
959 case CLOCK_MONOTONIC:
960 break;
961 default:
962 return -EINVAL;
963 }
964 return hrtimer_nanosleep(tsave, rmtp, mode, clockid);
965} 946}
966 947
967asmlinkage long 948asmlinkage long
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 7ff375e7c95f..623786d44159 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,18 +9,13 @@
9#include <linux/console.h> 9#include <linux/console.h>
10#include "power.h" 10#include "power.h"
11 11
12static int new_loglevel = 10; 12#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
13static int orig_loglevel; 13#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
14#ifdef SUSPEND_CONSOLE 14
15static int orig_fgconsole, orig_kmsg; 15static int orig_fgconsole, orig_kmsg;
16#endif
17 16
18int pm_prepare_console(void) 17int pm_prepare_console(void)
19{ 18{
20 orig_loglevel = console_loglevel;
21 console_loglevel = new_loglevel;
22
23#ifdef SUSPEND_CONSOLE
24 acquire_console_sem(); 19 acquire_console_sem();
25 20
26 orig_fgconsole = fg_console; 21 orig_fgconsole = fg_console;
@@ -41,18 +36,15 @@ int pm_prepare_console(void)
41 } 36 }
42 orig_kmsg = kmsg_redirect; 37 orig_kmsg = kmsg_redirect;
43 kmsg_redirect = SUSPEND_CONSOLE; 38 kmsg_redirect = SUSPEND_CONSOLE;
44#endif
45 return 0; 39 return 0;
46} 40}
47 41
48void pm_restore_console(void) 42void pm_restore_console(void)
49{ 43{
50 console_loglevel = orig_loglevel;
51#ifdef SUSPEND_CONSOLE
52 acquire_console_sem(); 44 acquire_console_sem();
53 set_console(orig_fgconsole); 45 set_console(orig_fgconsole);
54 release_console_sem(); 46 release_console_sem();
55 kmsg_redirect = orig_kmsg; 47 kmsg_redirect = orig_kmsg;
56#endif
57 return; 48 return;
58} 49}
50#endif
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e24446f8d8cd..0b43847dc980 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -53,7 +53,7 @@ static void power_down(suspend_disk_method_t mode)
53 53
54 switch(mode) { 54 switch(mode) {
55 case PM_DISK_PLATFORM: 55 case PM_DISK_PLATFORM:
56 kernel_power_off_prepare(); 56 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
57 error = pm_ops->enter(PM_SUSPEND_DISK); 57 error = pm_ops->enter(PM_SUSPEND_DISK);
58 break; 58 break;
59 case PM_DISK_SHUTDOWN: 59 case PM_DISK_SHUTDOWN:
@@ -95,13 +95,6 @@ static int prepare_processes(void)
95 goto thaw; 95 goto thaw;
96 } 96 }
97 97
98 if (pm_disk_mode == PM_DISK_PLATFORM) {
99 if (pm_ops && pm_ops->prepare) {
100 if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
101 goto thaw;
102 }
103 }
104
105 /* Free memory before shutting down devices. */ 98 /* Free memory before shutting down devices. */
106 if (!(error = swsusp_shrink_memory())) 99 if (!(error = swsusp_shrink_memory()))
107 return 0; 100 return 0;
@@ -367,14 +360,14 @@ power_attr(resume);
367 360
368static ssize_t image_size_show(struct subsystem * subsys, char *buf) 361static ssize_t image_size_show(struct subsystem * subsys, char *buf)
369{ 362{
370 return sprintf(buf, "%u\n", image_size); 363 return sprintf(buf, "%lu\n", image_size);
371} 364}
372 365
373static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) 366static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n)
374{ 367{
375 unsigned int size; 368 unsigned long size;
376 369
377 if (sscanf(buf, "%u", &size) == 1) { 370 if (sscanf(buf, "%lu", &size) == 1) {
378 image_size = size; 371 image_size = size;
379 return n; 372 return n;
380 } 373 }
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d253f3ae2fa5..9cb235cba4a9 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -133,10 +133,10 @@ static int suspend_enter(suspend_state_t state)
133static void suspend_finish(suspend_state_t state) 133static void suspend_finish(suspend_state_t state)
134{ 134{
135 device_resume(); 135 device_resume();
136 if (pm_ops && pm_ops->finish)
137 pm_ops->finish(state);
138 thaw_processes(); 136 thaw_processes();
139 enable_nonboot_cpus(); 137 enable_nonboot_cpus();
138 if (pm_ops && pm_ops->finish)
139 pm_ops->finish(state);
140 pm_restore_console(); 140 pm_restore_console();
141} 141}
142 142
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7e8492fd1423..388dba680841 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,14 +1,6 @@
1#include <linux/suspend.h> 1#include <linux/suspend.h>
2#include <linux/utsname.h> 2#include <linux/utsname.h>
3 3
4/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but
5 we probably do not take enough locks for switching consoles, etc,
6 so bad things might happen.
7*/
8#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
9#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
10#endif
11
12struct swsusp_info { 4struct swsusp_info {
13 struct new_utsname uts; 5 struct new_utsname uts;
14 u32 version_code; 6 u32 version_code;
@@ -42,17 +34,14 @@ static struct subsys_attribute _name##_attr = { \
42 34
43extern struct subsystem power_subsys; 35extern struct subsystem power_subsys;
44 36
45extern int pm_prepare_console(void);
46extern void pm_restore_console(void);
47
48/* References to section boundaries */ 37/* References to section boundaries */
49extern const void __nosave_begin, __nosave_end; 38extern const void __nosave_begin, __nosave_end;
50 39
51extern unsigned int nr_copy_pages; 40extern unsigned int nr_copy_pages;
52extern struct pbe *pagedir_nosave; 41extern struct pbe *pagedir_nosave;
53 42
54/* Preferred image size in MB (default 500) */ 43/* Preferred image size in bytes (default 500 MB) */
55extern unsigned int image_size; 44extern unsigned long image_size;
56 45
57extern asmlinkage int swsusp_arch_suspend(void); 46extern asmlinkage int swsusp_arch_suspend(void);
58extern asmlinkage int swsusp_arch_resume(void); 47extern asmlinkage int swsusp_arch_resume(void);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 41f66365f0d8..8d5a5986d621 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone)
91 * corrected eventually when the cases giving rise to this 91 * corrected eventually when the cases giving rise to this
92 * are better understood. 92 * are better understood.
93 */ 93 */
94 if (PageReserved(page)) { 94 if (PageReserved(page))
95 printk("highmem reserved page?!\n");
96 continue; 95 continue;
97 }
98 BUG_ON(PageNosave(page)); 96 BUG_ON(PageNosave(page));
99 if (PageNosaveFree(page)) 97 if (PageNosaveFree(page))
100 continue; 98 continue;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 55a18d26abed..2d9d08f72f76 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -70,12 +70,12 @@
70#include "power.h" 70#include "power.h"
71 71
72/* 72/*
73 * Preferred image size in MB (tunable via /sys/power/image_size). 73 * Preferred image size in bytes (tunable via /sys/power/image_size).
74 * When it is set to N, swsusp will do its best to ensure the image 74 * When it is set to N, swsusp will do its best to ensure the image
75 * size will not exceed N MB, but if that is impossible, it will 75 * size will not exceed N bytes, but if that is impossible, it will
76 * try to create the smallest image possible. 76 * try to create the smallest image possible.
77 */ 77 */
78unsigned int image_size = 500; 78unsigned long image_size = 500 * 1024 * 1024;
79 79
80#ifdef CONFIG_HIGHMEM 80#ifdef CONFIG_HIGHMEM
81unsigned int count_highmem_pages(void); 81unsigned int count_highmem_pages(void);
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */
153{ 153{
154 int i; 154 int i;
155 155
156 if (!swsusp_resume_device)
157 return -ENODEV;
158 spin_lock(&swap_lock); 156 spin_lock(&swap_lock);
159 for (i = 0; i < MAX_SWAPFILES; i++) { 157 for (i = 0; i < MAX_SWAPFILES; i++) {
160 if (!(swap_info[i].flags & SWP_WRITEOK)) 158 if (!(swap_info[i].flags & SWP_WRITEOK))
161 continue; 159 continue;
162 if (is_resume_device(swap_info + i)) { 160 if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
163 spin_unlock(&swap_lock); 161 spin_unlock(&swap_lock);
164 root_swap = i; 162 root_swap = i;
165 return 0; 163 return 0;
@@ -590,7 +588,7 @@ int swsusp_shrink_memory(void)
590 if (!tmp) 588 if (!tmp)
591 return -ENOMEM; 589 return -ENOMEM;
592 pages += tmp; 590 pages += tmp;
593 } else if (size > (image_size * 1024 * 1024) / PAGE_SIZE) { 591 } else if (size > image_size / PAGE_SIZE) {
594 tmp = shrink_all_memory(SHRINK_BITE); 592 tmp = shrink_all_memory(SHRINK_BITE);
595 pages += tmp; 593 pages += tmp;
596 } 594 }
@@ -743,7 +741,6 @@ static int submit(int rw, pgoff_t page_off, void *page)
743 if (!bio) 741 if (!bio)
744 return -ENOMEM; 742 return -ENOMEM;
745 bio->bi_sector = page_off * (PAGE_SIZE >> 9); 743 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
746 bio_get(bio);
747 bio->bi_bdev = resume_bdev; 744 bio->bi_bdev = resume_bdev;
748 bio->bi_end_io = end_io; 745 bio->bi_end_io = end_io;
749 746
@@ -753,14 +750,13 @@ static int submit(int rw, pgoff_t page_off, void *page)
753 goto Done; 750 goto Done;
754 } 751 }
755 752
756 if (rw == WRITE)
757 bio_set_pages_dirty(bio);
758 753
759 atomic_set(&io_done, 1); 754 atomic_set(&io_done, 1);
760 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 755 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
761 while (atomic_read(&io_done)) 756 while (atomic_read(&io_done))
762 yield(); 757 yield();
763 758 if (rw == READ)
759 bio_set_pages_dirty(bio);
764 Done: 760 Done:
765 bio_put(bio); 761 bio_put(bio);
766 return error; 762 return error;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5f33cdb6fff5..d95a72c9279d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
72 */ 72 */
73void __ptrace_unlink(task_t *child) 73void __ptrace_unlink(task_t *child)
74{ 74{
75 if (!child->ptrace) 75 BUG_ON(!child->ptrace);
76 BUG(); 76
77 child->ptrace = 0; 77 child->ptrace = 0;
78 if (!list_empty(&child->ptrace_list)) { 78 if (!list_empty(&child->ptrace_list)) {
79 list_del_init(&child->ptrace_list); 79 list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
184 return retval; 184 return retval;
185} 185}
186 186
187void __ptrace_detach(struct task_struct *child, unsigned int data)
188{
189 child->exit_code = data;
190 /* .. re-parent .. */
191 __ptrace_unlink(child);
192 /* .. and wake it up. */
193 if (child->exit_state != EXIT_ZOMBIE)
194 wake_up_process(child);
195}
196
187int ptrace_detach(struct task_struct *child, unsigned int data) 197int ptrace_detach(struct task_struct *child, unsigned int data)
188{ 198{
189 if (!valid_signal(data)) 199 if (!valid_signal(data))
190 return -EIO; 200 return -EIO;
191 201
192 /* Architecture-specific hardware disable .. */ 202 /* Architecture-specific hardware disable .. */
193 ptrace_disable(child); 203 ptrace_disable(child);
194 204
195 /* .. re-parent .. */
196 child->exit_code = data;
197
198 write_lock_irq(&tasklist_lock); 205 write_lock_irq(&tasklist_lock);
199 __ptrace_unlink(child); 206 if (child->ptrace)
200 /* .. and wake it up. */ 207 __ptrace_detach(child, data);
201 if (child->exit_state != EXIT_ZOMBIE)
202 wake_up_process(child);
203 write_unlock_irq(&tasklist_lock); 208 write_unlock_irq(&tasklist_lock);
204 209
205 return 0; 210 return 0;
@@ -242,8 +247,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
242 if (write) { 247 if (write) {
243 copy_to_user_page(vma, page, addr, 248 copy_to_user_page(vma, page, addr,
244 maddr + offset, buf, bytes); 249 maddr + offset, buf, bytes);
245 if (!PageCompound(page)) 250 set_page_dirty_lock(page);
246 set_page_dirty_lock(page);
247 } else { 251 } else {
248 copy_from_user_page(vma, page, addr, 252 copy_from_user_page(vma, page, addr,
249 buf, maddr + offset, bytes); 253 buf, maddr + offset, bytes);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 773219907dd8..7712912dbc84 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -114,16 +114,16 @@ rcu_torture_alloc(void)
114{ 114{
115 struct list_head *p; 115 struct list_head *p;
116 116
117 spin_lock(&rcu_torture_lock); 117 spin_lock_bh(&rcu_torture_lock);
118 if (list_empty(&rcu_torture_freelist)) { 118 if (list_empty(&rcu_torture_freelist)) {
119 atomic_inc(&n_rcu_torture_alloc_fail); 119 atomic_inc(&n_rcu_torture_alloc_fail);
120 spin_unlock(&rcu_torture_lock); 120 spin_unlock_bh(&rcu_torture_lock);
121 return NULL; 121 return NULL;
122 } 122 }
123 atomic_inc(&n_rcu_torture_alloc); 123 atomic_inc(&n_rcu_torture_alloc);
124 p = rcu_torture_freelist.next; 124 p = rcu_torture_freelist.next;
125 list_del_init(p); 125 list_del_init(p);
126 spin_unlock(&rcu_torture_lock); 126 spin_unlock_bh(&rcu_torture_lock);
127 return container_of(p, struct rcu_torture, rtort_free); 127 return container_of(p, struct rcu_torture, rtort_free);
128} 128}
129 129
@@ -134,9 +134,9 @@ static void
134rcu_torture_free(struct rcu_torture *p) 134rcu_torture_free(struct rcu_torture *p)
135{ 135{
136 atomic_inc(&n_rcu_torture_free); 136 atomic_inc(&n_rcu_torture_free);
137 spin_lock(&rcu_torture_lock); 137 spin_lock_bh(&rcu_torture_lock);
138 list_add_tail(&p->rtort_free, &rcu_torture_freelist); 138 list_add_tail(&p->rtort_free, &rcu_torture_freelist);
139 spin_unlock(&rcu_torture_lock); 139 spin_unlock_bh(&rcu_torture_lock);
140} 140}
141 141
142static void 142static void
diff --git a/kernel/sched.c b/kernel/sched.c
index 3ee2ae45125f..12d291bf3379 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -215,7 +215,6 @@ struct runqueue {
215 */ 215 */
216 unsigned long nr_running; 216 unsigned long nr_running;
217#ifdef CONFIG_SMP 217#ifdef CONFIG_SMP
218 unsigned long prio_bias;
219 unsigned long cpu_load[3]; 218 unsigned long cpu_load[3];
220#endif 219#endif
221 unsigned long long nr_switches; 220 unsigned long long nr_switches;
@@ -669,68 +668,13 @@ static int effective_prio(task_t *p)
669 return prio; 668 return prio;
670} 669}
671 670
672#ifdef CONFIG_SMP
673static inline void inc_prio_bias(runqueue_t *rq, int prio)
674{
675 rq->prio_bias += MAX_PRIO - prio;
676}
677
678static inline void dec_prio_bias(runqueue_t *rq, int prio)
679{
680 rq->prio_bias -= MAX_PRIO - prio;
681}
682
683static inline void inc_nr_running(task_t *p, runqueue_t *rq)
684{
685 rq->nr_running++;
686 if (rt_task(p)) {
687 if (p != rq->migration_thread)
688 /*
689 * The migration thread does the actual balancing. Do
690 * not bias by its priority as the ultra high priority
691 * will skew balancing adversely.
692 */
693 inc_prio_bias(rq, p->prio);
694 } else
695 inc_prio_bias(rq, p->static_prio);
696}
697
698static inline void dec_nr_running(task_t *p, runqueue_t *rq)
699{
700 rq->nr_running--;
701 if (rt_task(p)) {
702 if (p != rq->migration_thread)
703 dec_prio_bias(rq, p->prio);
704 } else
705 dec_prio_bias(rq, p->static_prio);
706}
707#else
708static inline void inc_prio_bias(runqueue_t *rq, int prio)
709{
710}
711
712static inline void dec_prio_bias(runqueue_t *rq, int prio)
713{
714}
715
716static inline void inc_nr_running(task_t *p, runqueue_t *rq)
717{
718 rq->nr_running++;
719}
720
721static inline void dec_nr_running(task_t *p, runqueue_t *rq)
722{
723 rq->nr_running--;
724}
725#endif
726
727/* 671/*
728 * __activate_task - move a task to the runqueue. 672 * __activate_task - move a task to the runqueue.
729 */ 673 */
730static inline void __activate_task(task_t *p, runqueue_t *rq) 674static inline void __activate_task(task_t *p, runqueue_t *rq)
731{ 675{
732 enqueue_task(p, rq->active); 676 enqueue_task(p, rq->active);
733 inc_nr_running(p, rq); 677 rq->nr_running++;
734} 678}
735 679
736/* 680/*
@@ -739,7 +683,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
739static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 683static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
740{ 684{
741 enqueue_task_head(p, rq->active); 685 enqueue_task_head(p, rq->active);
742 inc_nr_running(p, rq); 686 rq->nr_running++;
743} 687}
744 688
745static int recalc_task_prio(task_t *p, unsigned long long now) 689static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -863,7 +807,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
863 */ 807 */
864static void deactivate_task(struct task_struct *p, runqueue_t *rq) 808static void deactivate_task(struct task_struct *p, runqueue_t *rq)
865{ 809{
866 dec_nr_running(p, rq); 810 rq->nr_running--;
867 dequeue_task(p, p->array); 811 dequeue_task(p, p->array);
868 p->array = NULL; 812 p->array = NULL;
869} 813}
@@ -1007,61 +951,27 @@ void kick_process(task_t *p)
1007 * We want to under-estimate the load of migration sources, to 951 * We want to under-estimate the load of migration sources, to
1008 * balance conservatively. 952 * balance conservatively.
1009 */ 953 */
1010static unsigned long __source_load(int cpu, int type, enum idle_type idle) 954static inline unsigned long source_load(int cpu, int type)
1011{ 955{
1012 runqueue_t *rq = cpu_rq(cpu); 956 runqueue_t *rq = cpu_rq(cpu);
1013 unsigned long running = rq->nr_running; 957 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1014 unsigned long source_load, cpu_load = rq->cpu_load[type-1],
1015 load_now = running * SCHED_LOAD_SCALE;
1016
1017 if (type == 0) 958 if (type == 0)
1018 source_load = load_now; 959 return load_now;
1019 else
1020 source_load = min(cpu_load, load_now);
1021
1022 if (running > 1 || (idle == NOT_IDLE && running))
1023 /*
1024 * If we are busy rebalancing the load is biased by
1025 * priority to create 'nice' support across cpus. When
1026 * idle rebalancing we should only bias the source_load if
1027 * there is more than one task running on that queue to
1028 * prevent idle rebalance from trying to pull tasks from a
1029 * queue with only one running task.
1030 */
1031 source_load = source_load * rq->prio_bias / running;
1032 960
1033 return source_load; 961 return min(rq->cpu_load[type-1], load_now);
1034}
1035
1036static inline unsigned long source_load(int cpu, int type)
1037{
1038 return __source_load(cpu, type, NOT_IDLE);
1039} 962}
1040 963
1041/* 964/*
1042 * Return a high guess at the load of a migration-target cpu 965 * Return a high guess at the load of a migration-target cpu
1043 */ 966 */
1044static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) 967static inline unsigned long target_load(int cpu, int type)
1045{ 968{
1046 runqueue_t *rq = cpu_rq(cpu); 969 runqueue_t *rq = cpu_rq(cpu);
1047 unsigned long running = rq->nr_running; 970 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1048 unsigned long target_load, cpu_load = rq->cpu_load[type-1],
1049 load_now = running * SCHED_LOAD_SCALE;
1050
1051 if (type == 0) 971 if (type == 0)
1052 target_load = load_now; 972 return load_now;
1053 else
1054 target_load = max(cpu_load, load_now);
1055
1056 if (running > 1 || (idle == NOT_IDLE && running))
1057 target_load = target_load * rq->prio_bias / running;
1058 973
1059 return target_load; 974 return max(rq->cpu_load[type-1], load_now);
1060}
1061
1062static inline unsigned long target_load(int cpu, int type)
1063{
1064 return __target_load(cpu, type, NOT_IDLE);
1065} 975}
1066 976
1067/* 977/*
@@ -1294,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
1294 } 1204 }
1295 } 1205 }
1296 1206
1297 if (p->last_waker_cpu != this_cpu)
1298 goto out_set_cpu;
1299
1300 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1207 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
1301 goto out_set_cpu; 1208 goto out_set_cpu;
1302 1209
@@ -1367,8 +1274,6 @@ out_set_cpu:
1367 cpu = task_cpu(p); 1274 cpu = task_cpu(p);
1368 } 1275 }
1369 1276
1370 p->last_waker_cpu = this_cpu;
1371
1372out_activate: 1277out_activate:
1373#endif /* CONFIG_SMP */ 1278#endif /* CONFIG_SMP */
1374 if (old_state == TASK_UNINTERRUPTIBLE) { 1279 if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1450,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1450#ifdef CONFIG_SCHEDSTATS 1355#ifdef CONFIG_SCHEDSTATS
1451 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1356 memset(&p->sched_info, 0, sizeof(p->sched_info));
1452#endif 1357#endif
1453#if defined(CONFIG_SMP) 1358#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1454 p->last_waker_cpu = cpu;
1455#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
1456 p->oncpu = 0; 1359 p->oncpu = 0;
1457#endif 1360#endif
1458#endif
1459#ifdef CONFIG_PREEMPT 1361#ifdef CONFIG_PREEMPT
1460 /* Want to start with kernel preemption disabled. */ 1362 /* Want to start with kernel preemption disabled. */
1461 task_thread_info(p)->preempt_count = 1; 1363 task_thread_info(p)->preempt_count = 1;
@@ -1530,7 +1432,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1530 list_add_tail(&p->run_list, &current->run_list); 1432 list_add_tail(&p->run_list, &current->run_list);
1531 p->array = current->array; 1433 p->array = current->array;
1532 p->array->nr_active++; 1434 p->array->nr_active++;
1533 inc_nr_running(p, rq); 1435 rq->nr_running++;
1534 } 1436 }
1535 set_need_resched(); 1437 set_need_resched();
1536 } else 1438 } else
@@ -1875,9 +1777,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1875 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1777 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1876{ 1778{
1877 dequeue_task(p, src_array); 1779 dequeue_task(p, src_array);
1878 dec_nr_running(p, src_rq); 1780 src_rq->nr_running--;
1879 set_task_cpu(p, this_cpu); 1781 set_task_cpu(p, this_cpu);
1880 inc_nr_running(p, this_rq); 1782 this_rq->nr_running++;
1881 enqueue_task(p, this_array); 1783 enqueue_task(p, this_array);
1882 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1784 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1883 + this_rq->timestamp_last_tick; 1785 + this_rq->timestamp_last_tick;
@@ -2056,9 +1958,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2056 1958
2057 /* Bias balancing toward cpus of our domain */ 1959 /* Bias balancing toward cpus of our domain */
2058 if (local_group) 1960 if (local_group)
2059 load = __target_load(i, load_idx, idle); 1961 load = target_load(i, load_idx);
2060 else 1962 else
2061 load = __source_load(i, load_idx, idle); 1963 load = source_load(i, load_idx);
2062 1964
2063 avg_load += load; 1965 avg_load += load;
2064 } 1966 }
@@ -2171,7 +2073,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2171 int i; 2073 int i;
2172 2074
2173 for_each_cpu_mask(i, group->cpumask) { 2075 for_each_cpu_mask(i, group->cpumask) {
2174 load = __source_load(i, 0, idle); 2076 load = source_load(i, 0);
2175 2077
2176 if (load > max_load) { 2078 if (load > max_load) {
2177 max_load = load; 2079 max_load = load;
@@ -3571,10 +3473,8 @@ void set_user_nice(task_t *p, long nice)
3571 goto out_unlock; 3473 goto out_unlock;
3572 } 3474 }
3573 array = p->array; 3475 array = p->array;
3574 if (array) { 3476 if (array)
3575 dequeue_task(p, array); 3477 dequeue_task(p, array);
3576 dec_prio_bias(rq, p->static_prio);
3577 }
3578 3478
3579 old_prio = p->prio; 3479 old_prio = p->prio;
3580 new_prio = NICE_TO_PRIO(nice); 3480 new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3484,6 @@ void set_user_nice(task_t *p, long nice)
3584 3484
3585 if (array) { 3485 if (array) {
3586 enqueue_task(p, array); 3486 enqueue_task(p, array);
3587 inc_prio_bias(rq, p->static_prio);
3588 /* 3487 /*
3589 * If the task increased its priority or is running and 3488 * If the task increased its priority or is running and
3590 * lowered its priority, then reschedule its CPU: 3489 * lowered its priority, then reschedule its CPU:
@@ -4031,7 +3930,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
4031 goto out_unlock; 3930 goto out_unlock;
4032 3931
4033 retval = 0; 3932 retval = 0;
4034 cpus_and(*mask, p->cpus_allowed, cpu_possible_map); 3933 cpus_and(*mask, p->cpus_allowed, cpu_online_map);
4035 3934
4036out_unlock: 3935out_unlock:
4037 read_unlock(&tasklist_lock); 3936 read_unlock(&tasklist_lock);
@@ -5141,7 +5040,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5141#define SEARCH_SCOPE 2 5040#define SEARCH_SCOPE 2
5142#define MIN_CACHE_SIZE (64*1024U) 5041#define MIN_CACHE_SIZE (64*1024U)
5143#define DEFAULT_CACHE_SIZE (5*1024*1024U) 5042#define DEFAULT_CACHE_SIZE (5*1024*1024U)
5144#define ITERATIONS 2 5043#define ITERATIONS 1
5145#define SIZE_THRESH 130 5044#define SIZE_THRESH 130
5146#define COST_THRESH 130 5045#define COST_THRESH 130
5147 5046
@@ -5159,7 +5058,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5159#define MAX_DOMAIN_DISTANCE 32 5058#define MAX_DOMAIN_DISTANCE 32
5160 5059
5161static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = 5060static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
5162 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; 5061 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
5062/*
5063 * Architectures may override the migration cost and thus avoid
5064 * boot-time calibration. Unit is nanoseconds. Mostly useful for
5065 * virtualized hardware:
5066 */
5067#ifdef CONFIG_DEFAULT_MIGRATION_COST
5068 CONFIG_DEFAULT_MIGRATION_COST
5069#else
5070 -1LL
5071#endif
5072};
5163 5073
5164/* 5074/*
5165 * Allow override of migration cost - in units of microseconds. 5075 * Allow override of migration cost - in units of microseconds.
@@ -5480,9 +5390,9 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
5480 break; 5390 break;
5481 } 5391 }
5482 /* 5392 /*
5483 * Increase the cachesize in 5% steps: 5393 * Increase the cachesize in 10% steps:
5484 */ 5394 */
5485 size = size * 20 / 19; 5395 size = size * 10 / 9;
5486 } 5396 }
5487 5397
5488 if (migration_debug) 5398 if (migration_debug)
@@ -5551,13 +5461,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
5551 -1 5461 -1
5552#endif 5462#endif
5553 ); 5463 );
5554 printk("migration_cost="); 5464 if (system_state == SYSTEM_BOOTING) {
5555 for (distance = 0; distance <= max_distance; distance++) { 5465 printk("migration_cost=");
5556 if (distance) 5466 for (distance = 0; distance <= max_distance; distance++) {
5557 printk(","); 5467 if (distance)
5558 printk("%ld", (long)migration_cost[distance] / 1000); 5468 printk(",");
5469 printk("%ld", (long)migration_cost[distance] / 1000);
5470 }
5471 printk("\n");
5559 } 5472 }
5560 printk("\n");
5561 j1 = jiffies; 5473 j1 = jiffies;
5562 if (migration_debug) 5474 if (migration_debug)
5563 printk("migration: %ld seconds\n", (j1-j0)/HZ); 5475 printk("migration: %ld seconds\n", (j1-j0)/HZ);
@@ -6109,7 +6021,7 @@ void __init sched_init(void)
6109 runqueue_t *rq; 6021 runqueue_t *rq;
6110 int i, j, k; 6022 int i, j, k;
6111 6023
6112 for (i = 0; i < NR_CPUS; i++) { 6024 for_each_cpu(i) {
6113 prio_array_t *array; 6025 prio_array_t *array;
6114 6026
6115 rq = cpu_rq(i); 6027 rq = cpu_rq(i);
diff --git a/kernel/signal.c b/kernel/signal.c
index d3efafd8109a..ea154104a00b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -283,7 +283,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
283 return(q); 283 return(q);
284} 284}
285 285
286static inline void __sigqueue_free(struct sigqueue *q) 286static void __sigqueue_free(struct sigqueue *q)
287{ 287{
288 if (q->flags & SIGQUEUE_PREALLOC) 288 if (q->flags & SIGQUEUE_PREALLOC)
289 return; 289 return;
@@ -2430,7 +2430,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
2430} 2430}
2431 2431
2432int 2432int
2433do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) 2433do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2434{ 2434{
2435 struct k_sigaction *k; 2435 struct k_sigaction *k;
2436 sigset_t mask; 2436 sigset_t mask;
@@ -2454,6 +2454,8 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2454 *oact = *k; 2454 *oact = *k;
2455 2455
2456 if (act) { 2456 if (act) {
2457 sigdelsetmask(&act->sa.sa_mask,
2458 sigmask(SIGKILL) | sigmask(SIGSTOP));
2457 /* 2459 /*
2458 * POSIX 3.3.1.3: 2460 * POSIX 3.3.1.3:
2459 * "Setting a signal action to SIG_IGN for a signal that is 2461 * "Setting a signal action to SIG_IGN for a signal that is
@@ -2479,8 +2481,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2479 read_lock(&tasklist_lock); 2481 read_lock(&tasklist_lock);
2480 spin_lock_irq(&t->sighand->siglock); 2482 spin_lock_irq(&t->sighand->siglock);
2481 *k = *act; 2483 *k = *act;
2482 sigdelsetmask(&k->sa.sa_mask,
2483 sigmask(SIGKILL) | sigmask(SIGSTOP));
2484 sigemptyset(&mask); 2484 sigemptyset(&mask);
2485 sigaddset(&mask, sig); 2485 sigaddset(&mask, sig);
2486 rm_from_queue_full(&mask, &t->signal->shared_pending); 2486 rm_from_queue_full(&mask, &t->signal->shared_pending);
@@ -2495,8 +2495,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2495 } 2495 }
2496 2496
2497 *k = *act; 2497 *k = *act;
2498 sigdelsetmask(&k->sa.sa_mask,
2499 sigmask(SIGKILL) | sigmask(SIGSTOP));
2500 } 2498 }
2501 2499
2502 spin_unlock_irq(&current->sighand->siglock); 2500 spin_unlock_irq(&current->sighand->siglock);
@@ -2702,6 +2700,7 @@ sys_signal(int sig, __sighandler_t handler)
2702 2700
2703 new_sa.sa.sa_handler = handler; 2701 new_sa.sa.sa_handler = handler;
2704 new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; 2702 new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2703 sigemptyset(&new_sa.sa.sa_mask);
2705 2704
2706 ret = do_sigaction(sig, &new_sa, &old_sa); 2705 ret = do_sigaction(sig, &new_sa, &old_sa);
2707 2706
diff --git a/kernel/sys.c b/kernel/sys.c
index d09cac23fdfd..f91218a5463e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -428,7 +428,7 @@ void kernel_kexec(void)
428{ 428{
429#ifdef CONFIG_KEXEC 429#ifdef CONFIG_KEXEC
430 struct kimage *image; 430 struct kimage *image;
431 image = xchg(&kexec_image, 0); 431 image = xchg(&kexec_image, NULL);
432 if (!image) { 432 if (!image) {
433 return; 433 return;
434 } 434 }
@@ -440,23 +440,25 @@ void kernel_kexec(void)
440} 440}
441EXPORT_SYMBOL_GPL(kernel_kexec); 441EXPORT_SYMBOL_GPL(kernel_kexec);
442 442
443void kernel_shutdown_prepare(enum system_states state)
444{
445 notifier_call_chain(&reboot_notifier_list,
446 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
447 system_state = state;
448 device_shutdown();
449}
443/** 450/**
444 * kernel_halt - halt the system 451 * kernel_halt - halt the system
445 * 452 *
446 * Shutdown everything and perform a clean system halt. 453 * Shutdown everything and perform a clean system halt.
447 */ 454 */
448void kernel_halt_prepare(void)
449{
450 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
451 system_state = SYSTEM_HALT;
452 device_shutdown();
453}
454void kernel_halt(void) 455void kernel_halt(void)
455{ 456{
456 kernel_halt_prepare(); 457 kernel_shutdown_prepare(SYSTEM_HALT);
457 printk(KERN_EMERG "System halted.\n"); 458 printk(KERN_EMERG "System halted.\n");
458 machine_halt(); 459 machine_halt();
459} 460}
461
460EXPORT_SYMBOL_GPL(kernel_halt); 462EXPORT_SYMBOL_GPL(kernel_halt);
461 463
462/** 464/**
@@ -464,20 +466,13 @@ EXPORT_SYMBOL_GPL(kernel_halt);
464 * 466 *
465 * Shutdown everything and perform a clean system power_off. 467 * Shutdown everything and perform a clean system power_off.
466 */ 468 */
467void kernel_power_off_prepare(void)
468{
469 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
470 system_state = SYSTEM_POWER_OFF;
471 device_shutdown();
472}
473void kernel_power_off(void) 469void kernel_power_off(void)
474{ 470{
475 kernel_power_off_prepare(); 471 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
476 printk(KERN_EMERG "Power down.\n"); 472 printk(KERN_EMERG "Power down.\n");
477 machine_power_off(); 473 machine_power_off();
478} 474}
479EXPORT_SYMBOL_GPL(kernel_power_off); 475EXPORT_SYMBOL_GPL(kernel_power_off);
480
481/* 476/*
482 * Reboot system call: for obvious reasons only root may call it, 477 * Reboot system call: for obvious reasons only root may call it,
483 * and even root needs to set up some magic numbers in the registers 478 * and even root needs to set up some magic numbers in the registers
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 17313b99e53d..1067090db6b1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16);
104cond_syscall(sys_setuid16); 104cond_syscall(sys_setuid16);
105cond_syscall(sys_vm86old); 105cond_syscall(sys_vm86old);
106cond_syscall(sys_vm86); 106cond_syscall(sys_vm86);
107cond_syscall(compat_sys_ipc);
108cond_syscall(compat_sys_sysctl);
107 109
108/* arch-specific weak syscall entries */ 110/* arch-specific weak syscall entries */
109cond_syscall(sys_pciconfig_read); 111cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cb99a42f8b37..c05a2b7125e1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -44,14 +44,12 @@
44#include <linux/limits.h> 44#include <linux/limits.h>
45#include <linux/dcache.h> 45#include <linux/dcache.h>
46#include <linux/syscalls.h> 46#include <linux/syscalls.h>
47#include <linux/nfs_fs.h>
48#include <linux/acpi.h>
47 49
48#include <asm/uaccess.h> 50#include <asm/uaccess.h>
49#include <asm/processor.h> 51#include <asm/processor.h>
50 52
51#ifdef CONFIG_ROOT_NFS
52#include <linux/nfs_fs.h>
53#endif
54
55#if defined(CONFIG_SYSCTL) 53#if defined(CONFIG_SYSCTL)
56 54
57/* External variables not in a header file. */ 55/* External variables not in a header file. */
@@ -126,8 +124,6 @@ extern int sysctl_hz_timer;
126extern int acct_parm[]; 124extern int acct_parm[];
127#endif 125#endif
128 126
129int randomize_va_space = 1;
130
131static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, 127static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
132 ctl_table *, void **); 128 ctl_table *, void **);
133static int proc_doutsstring(ctl_table *table, int write, struct file *filp, 129static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
@@ -640,6 +636,7 @@ static ctl_table kern_table[] = {
640 .proc_handler = &proc_dointvec, 636 .proc_handler = &proc_dointvec,
641 }, 637 },
642#endif 638#endif
639#if defined(CONFIG_MMU)
643 { 640 {
644 .ctl_name = KERN_RANDOMIZE, 641 .ctl_name = KERN_RANDOMIZE,
645 .procname = "randomize_va_space", 642 .procname = "randomize_va_space",
@@ -648,6 +645,7 @@ static ctl_table kern_table[] = {
648 .mode = 0644, 645 .mode = 0644,
649 .proc_handler = &proc_dointvec, 646 .proc_handler = &proc_dointvec,
650 }, 647 },
648#endif
651#if defined(CONFIG_S390) && defined(CONFIG_SMP) 649#if defined(CONFIG_S390) && defined(CONFIG_SMP)
652 { 650 {
653 .ctl_name = KERN_SPIN_RETRY, 651 .ctl_name = KERN_SPIN_RETRY,
@@ -658,6 +656,16 @@ static ctl_table kern_table[] = {
658 .proc_handler = &proc_dointvec, 656 .proc_handler = &proc_dointvec,
659 }, 657 },
660#endif 658#endif
659#ifdef CONFIG_ACPI_SLEEP
660 {
661 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
662 .procname = "acpi_video_flags",
663 .data = &acpi_video_flags,
664 .maxlen = sizeof (unsigned long),
665 .mode = 0644,
666 .proc_handler = &proc_dointvec,
667 },
668#endif
661 { .ctl_name = 0 } 669 { .ctl_name = 0 }
662}; 670};
663 671
@@ -878,7 +886,17 @@ static ctl_table vm_table[] = {
878 .maxlen = sizeof(zone_reclaim_mode), 886 .maxlen = sizeof(zone_reclaim_mode),
879 .mode = 0644, 887 .mode = 0644,
880 .proc_handler = &proc_dointvec, 888 .proc_handler = &proc_dointvec,
881 .strategy = &zero, 889 .strategy = &sysctl_intvec,
890 .extra1 = &zero,
891 },
892 {
893 .ctl_name = VM_ZONE_RECLAIM_INTERVAL,
894 .procname = "zone_reclaim_interval",
895 .data = &zone_reclaim_interval,
896 .maxlen = sizeof(zone_reclaim_interval),
897 .mode = 0644,
898 .proc_handler = &proc_dointvec_jiffies,
899 .strategy = &sysctl_jiffies,
882 }, 900 },
883#endif 901#endif
884 { .ctl_name = 0 } 902 { .ctl_name = 0 }
diff --git a/kernel/time.c b/kernel/time.c
index 7477b1d2079e..804539165d8b 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -155,7 +155,7 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
155 static int firsttime = 1; 155 static int firsttime = 1;
156 int error = 0; 156 int error = 0;
157 157
158 if (!timespec_valid(tv)) 158 if (tv && !timespec_valid(tv))
159 return -EINVAL; 159 return -EINVAL;
160 160
161 error = security_settime(tv, tz); 161 error = security_settime(tv, tz);
@@ -637,15 +637,16 @@ void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
637 * 637 *
638 * Returns the timespec representation of the nsec parameter. 638 * Returns the timespec representation of the nsec parameter.
639 */ 639 */
640inline struct timespec ns_to_timespec(const nsec_t nsec) 640struct timespec ns_to_timespec(const nsec_t nsec)
641{ 641{
642 struct timespec ts; 642 struct timespec ts;
643 643
644 if (nsec) 644 if (!nsec)
645 ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, 645 return (struct timespec) {0, 0};
646 &ts.tv_nsec); 646
647 else 647 ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
648 ts.tv_sec = ts.tv_nsec = 0; 648 if (unlikely(nsec < 0))
649 set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
649 650
650 return ts; 651 return ts;
651} 652}
diff --git a/kernel/timer.c b/kernel/timer.c
index 4f1cb0ab5251..fe3a9a9f8328 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -495,7 +495,7 @@ unsigned long next_timer_interrupt(void)
495 base = &__get_cpu_var(tvec_bases); 495 base = &__get_cpu_var(tvec_bases);
496 spin_lock(&base->t_base.lock); 496 spin_lock(&base->t_base.lock);
497 expires = base->timer_jiffies + (LONG_MAX >> 1); 497 expires = base->timer_jiffies + (LONG_MAX >> 1);
498 list = 0; 498 list = NULL;
499 499
500 /* Look for timer events in tv1. */ 500 /* Look for timer events in tv1. */
501 j = base->timer_jiffies & TVR_MASK; 501 j = base->timer_jiffies & TVR_MASK;
@@ -717,12 +717,16 @@ static void second_overflow(void)
717#endif 717#endif
718} 718}
719 719
720/* in the NTP reference this is called "hardclock()" */ 720/*
721static void update_wall_time_one_tick(void) 721 * Returns how many microseconds we need to add to xtime this tick
722 * in doing an adjustment requested with adjtime.
723 */
724static long adjtime_adjustment(void)
722{ 725{
723 long time_adjust_step, delta_nsec; 726 long time_adjust_step;
724 727
725 if ((time_adjust_step = time_adjust) != 0 ) { 728 time_adjust_step = time_adjust;
729 if (time_adjust_step) {
726 /* 730 /*
727 * We are doing an adjtime thing. Prepare time_adjust_step to 731 * We are doing an adjtime thing. Prepare time_adjust_step to
728 * be within bounds. Note that a positive time_adjust means we 732 * be within bounds. Note that a positive time_adjust means we
@@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void)
733 */ 737 */
734 time_adjust_step = min(time_adjust_step, (long)tickadj); 738 time_adjust_step = min(time_adjust_step, (long)tickadj);
735 time_adjust_step = max(time_adjust_step, (long)-tickadj); 739 time_adjust_step = max(time_adjust_step, (long)-tickadj);
740 }
741 return time_adjust_step;
742}
736 743
744/* in the NTP reference this is called "hardclock()" */
745static void update_wall_time_one_tick(void)
746{
747 long time_adjust_step, delta_nsec;
748
749 time_adjust_step = adjtime_adjustment();
750 if (time_adjust_step)
737 /* Reduce by this step the amount of time left */ 751 /* Reduce by this step the amount of time left */
738 time_adjust -= time_adjust_step; 752 time_adjust -= time_adjust_step;
739 }
740 delta_nsec = tick_nsec + time_adjust_step * 1000; 753 delta_nsec = tick_nsec + time_adjust_step * 1000;
741 /* 754 /*
742 * Advance the phase, once it gets to one microsecond, then 755 * Advance the phase, once it gets to one microsecond, then
@@ -759,6 +772,22 @@ static void update_wall_time_one_tick(void)
759} 772}
760 773
761/* 774/*
775 * Return how long ticks are at the moment, that is, how much time
776 * update_wall_time_one_tick will add to xtime next time we call it
777 * (assuming no calls to do_adjtimex in the meantime).
778 * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
779 * bits to the right of the binary point.
780 * This function has no side-effects.
781 */
782u64 current_tick_length(void)
783{
784 long delta_nsec;
785
786 delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
787 return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
788}
789
790/*
762 * Using a loop looks inefficient, but "ticks" is 791 * Using a loop looks inefficient, but "ticks" is
763 * usually just one (we shouldn't be losing ticks, 792 * usually just one (we shouldn't be losing ticks,
764 * we're doing this this way mainly for interrupt 793 * we're doing this this way mainly for interrupt
diff --git a/kernel/user.c b/kernel/user.c
index 89e562feb1b1..d9deae43a9ab 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/key.h> 15#include <linux/key.h>
16#include <linux/interrupt.h>
16 17
17/* 18/*
18 * UID task count cache, to get fast user lookup in "alloc_uid" 19 * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -27,6 +28,16 @@
27 28
28static kmem_cache_t *uid_cachep; 29static kmem_cache_t *uid_cachep;
29static struct list_head uidhash_table[UIDHASH_SZ]; 30static struct list_head uidhash_table[UIDHASH_SZ];
31
32/*
33 * The uidhash_lock is mostly taken from process context, but it is
34 * occasionally also taken from softirq/tasklet context, when
35 * task-structs get RCU-freed. Hence all locking must be softirq-safe.
36 * But free_uid() is also called with local interrupts disabled, and running
37 * local_bh_enable() with local interrupts disabled is an error - we'll run
38 * softirq callbacks, and they can unconditionally enable interrupts, and
39 * the caller of free_uid() didn't expect that..
40 */
30static DEFINE_SPINLOCK(uidhash_lock); 41static DEFINE_SPINLOCK(uidhash_lock);
31 42
32struct user_struct root_user = { 43struct user_struct root_user = {
@@ -82,15 +93,19 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has
82struct user_struct *find_user(uid_t uid) 93struct user_struct *find_user(uid_t uid)
83{ 94{
84 struct user_struct *ret; 95 struct user_struct *ret;
96 unsigned long flags;
85 97
86 spin_lock(&uidhash_lock); 98 spin_lock_irqsave(&uidhash_lock, flags);
87 ret = uid_hash_find(uid, uidhashentry(uid)); 99 ret = uid_hash_find(uid, uidhashentry(uid));
88 spin_unlock(&uidhash_lock); 100 spin_unlock_irqrestore(&uidhash_lock, flags);
89 return ret; 101 return ret;
90} 102}
91 103
92void free_uid(struct user_struct *up) 104void free_uid(struct user_struct *up)
93{ 105{
106 unsigned long flags;
107
108 local_irq_save(flags);
94 if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { 109 if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
95 uid_hash_remove(up); 110 uid_hash_remove(up);
96 key_put(up->uid_keyring); 111 key_put(up->uid_keyring);
@@ -98,6 +113,7 @@ void free_uid(struct user_struct *up)
98 kmem_cache_free(uid_cachep, up); 113 kmem_cache_free(uid_cachep, up);
99 spin_unlock(&uidhash_lock); 114 spin_unlock(&uidhash_lock);
100 } 115 }
116 local_irq_restore(flags);
101} 117}
102 118
103struct user_struct * alloc_uid(uid_t uid) 119struct user_struct * alloc_uid(uid_t uid)
@@ -105,9 +121,9 @@ struct user_struct * alloc_uid(uid_t uid)
105 struct list_head *hashent = uidhashentry(uid); 121 struct list_head *hashent = uidhashentry(uid);
106 struct user_struct *up; 122 struct user_struct *up;
107 123
108 spin_lock(&uidhash_lock); 124 spin_lock_irq(&uidhash_lock);
109 up = uid_hash_find(uid, hashent); 125 up = uid_hash_find(uid, hashent);
110 spin_unlock(&uidhash_lock); 126 spin_unlock_irq(&uidhash_lock);
111 127
112 if (!up) { 128 if (!up) {
113 struct user_struct *new; 129 struct user_struct *new;
@@ -137,7 +153,7 @@ struct user_struct * alloc_uid(uid_t uid)
137 * Before adding this, check whether we raced 153 * Before adding this, check whether we raced
138 * on adding the same user already.. 154 * on adding the same user already..
139 */ 155 */
140 spin_lock(&uidhash_lock); 156 spin_lock_irq(&uidhash_lock);
141 up = uid_hash_find(uid, hashent); 157 up = uid_hash_find(uid, hashent);
142 if (up) { 158 if (up) {
143 key_put(new->uid_keyring); 159 key_put(new->uid_keyring);
@@ -147,7 +163,7 @@ struct user_struct * alloc_uid(uid_t uid)
147 uid_hash_insert(new, hashent); 163 uid_hash_insert(new, hashent);
148 up = new; 164 up = new;
149 } 165 }
150 spin_unlock(&uidhash_lock); 166 spin_unlock_irq(&uidhash_lock);
151 167
152 } 168 }
153 return up; 169 return up;
@@ -183,9 +199,9 @@ static int __init uid_cache_init(void)
183 INIT_LIST_HEAD(uidhash_table + n); 199 INIT_LIST_HEAD(uidhash_table + n);
184 200
185 /* Insert the root user immediately (init already runs as root) */ 201 /* Insert the root user immediately (init already runs as root) */
186 spin_lock(&uidhash_lock); 202 spin_lock_irq(&uidhash_lock);
187 uid_hash_insert(&root_user, uidhashentry(0)); 203 uid_hash_insert(&root_user, uidhashentry(0));
188 spin_unlock(&uidhash_lock); 204 spin_unlock_irq(&uidhash_lock);
189 205
190 return 0; 206 return 0;
191} 207}