aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAdrian Bunk <bunk@r063144.stusta.swh.mhn.de>2006-03-20 12:30:36 -0500
committerAdrian Bunk <bunk@r063144.stusta.swh.mhn.de>2006-03-20 12:30:36 -0500
commit0f76ee451484d02c7405d92e7bceb39b415abb01 (patch)
tree9722f84281f786ba48971dde057f5171a49969e4 /kernel
parent01d206a7c1167639f6ca6dac22140fbdca017558 (diff)
parent7705a8792b0fc82fd7d4dd923724606bbfd9fb20 (diff)
Merge with git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/compat.c1
-rw-r--r--kernel/cpuset.c35
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c411
-rw-r--r--kernel/hrtimer.c48
-rw-r--r--kernel/intermodule.c3
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/posix-timers.c1
-rw-r--r--kernel/power/console.c4
-rw-r--r--kernel/power/power.h16
-rw-r--r--kernel/power/snapshot.c4
-rw-r--r--kernel/power/swsusp.c10
-rw-r--r--kernel/ptrace.c28
-rw-r--r--kernel/rcupdate.c76
-rw-r--r--kernel/sched.c178
-rw-r--r--kernel/signal.c9
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c35
-rw-r--r--kernel/timer.c67
22 files changed, 664 insertions, 279 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 685c25175d96..d7e7e637b92a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -841,7 +841,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
841 841
842 for (aux = context->aux; aux; aux = aux->next) { 842 for (aux = context->aux; aux; aux = aux->next) {
843 843
844 ab = audit_log_start(context, GFP_KERNEL, aux->type); 844 ab = audit_log_start(context, gfp_mask, aux->type);
845 if (!ab) 845 if (!ab)
846 continue; /* audit_panic has been called */ 846 continue; /* audit_panic has been called */
847 847
@@ -878,14 +878,14 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
878 } 878 }
879 879
880 if (context->pwd && context->pwdmnt) { 880 if (context->pwd && context->pwdmnt) {
881 ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); 881 ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
882 if (ab) { 882 if (ab) {
883 audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); 883 audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
884 audit_log_end(ab); 884 audit_log_end(ab);
885 } 885 }
886 } 886 }
887 for (i = 0; i < context->name_count; i++) { 887 for (i = 0; i < context->name_count; i++) {
888 ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); 888 ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
889 if (!ab) 889 if (!ab)
890 continue; /* audit_panic has been called */ 890 continue; /* audit_panic has been called */
891 891
diff --git a/kernel/compat.c b/kernel/compat.c
index 1867290c37e3..8c9cd88b6785 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,7 +23,6 @@
23#include <linux/security.h> 23#include <linux/security.h>
24 24
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/bug.h>
27 26
28int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) 27int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
29{ 28{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba42b0a76961..12815d3f1a05 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child)
1977 * We don't need to task_lock() this reference to tsk->cpuset, 1977 * We don't need to task_lock() this reference to tsk->cpuset,
1978 * because tsk is already marked PF_EXITING, so attach_task() won't 1978 * because tsk is already marked PF_EXITING, so attach_task() won't
1979 * mess with it, or task is a failed fork, never visible to attach_task. 1979 * mess with it, or task is a failed fork, never visible to attach_task.
1980 *
1981 * Hack:
1982 *
1983 * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
1984 *
1985 * Don't leave a task unable to allocate memory, as that is an
1986 * accident waiting to happen should someone add a callout in
1987 * do_exit() after the cpuset_exit() call that might allocate.
1988 * If a task tries to allocate memory with an invalid cpuset,
1989 * it will oops in cpuset_update_task_memory_state().
1990 *
1991 * We call cpuset_exit() while the task is still competent to
1992 * handle notify_on_release(), then leave the task attached to
1993 * the root cpuset (top_cpuset) for the remainder of its exit.
1994 *
1995 * To do this properly, we would increment the reference count on
1996 * top_cpuset, and near the very end of the kernel/exit.c do_exit()
1997 * code we would add a second cpuset function call, to drop that
1998 * reference. This would just create an unnecessary hot spot on
1999 * the top_cpuset reference count, to no avail.
2000 *
2001 * Normally, holding a reference to a cpuset without bumping its
2002 * count is unsafe. The cpuset could go away, or someone could
2003 * attach us to a different cpuset, decrementing the count on
2004 * the first cpuset that we never incremented. But in this case,
2005 * top_cpuset isn't going away, and either task has PF_EXITING set,
2006 * which wards off any attach_task() attempts, or task is a failed
2007 * fork, never visible to attach_task.
2008 *
2009 * Another way to do this would be to set the cpuset pointer
2010 * to NULL here, and check in cpuset_update_task_memory_state()
2011 * for a NULL pointer. This hack avoids that NULL check, for no
2012 * cost (other than this way too long comment ;).
1980 **/ 2013 **/
1981 2014
1982void cpuset_exit(struct task_struct *tsk) 2015void cpuset_exit(struct task_struct *tsk)
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk)
1984 struct cpuset *cs; 2017 struct cpuset *cs;
1985 2018
1986 cs = tsk->cpuset; 2019 cs = tsk->cpuset;
1987 tsk->cpuset = NULL; 2020 tsk->cpuset = &top_cpuset; /* Hack - see comment above */
1988 2021
1989 if (notify_on_release(cs)) { 2022 if (notify_on_release(cs)) {
1990 char *pathbuf = NULL; 2023 char *pathbuf = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 93cee3671332..531aadca5530 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -360,6 +360,9 @@ void daemonize(const char *name, ...)
360 fs = init_task.fs; 360 fs = init_task.fs;
361 current->fs = fs; 361 current->fs = fs;
362 atomic_inc(&fs->count); 362 atomic_inc(&fs->count);
363 exit_namespace(current);
364 current->namespace = init_task.namespace;
365 get_namespace(current->namespace);
363 exit_files(current); 366 exit_files(current);
364 current->files = init_task.files; 367 current->files = init_task.files;
365 atomic_inc(&current->files->count); 368 atomic_inc(&current->files->count);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7f0ab5ee948c..b373322ca497 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,8 +108,10 @@ void free_task(struct task_struct *tsk)
108} 108}
109EXPORT_SYMBOL(free_task); 109EXPORT_SYMBOL(free_task);
110 110
111void __put_task_struct(struct task_struct *tsk) 111void __put_task_struct_cb(struct rcu_head *rhp)
112{ 112{
113 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
114
113 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 115 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
114 WARN_ON(atomic_read(&tsk->usage)); 116 WARN_ON(atomic_read(&tsk->usage));
115 WARN_ON(tsk == current); 117 WARN_ON(tsk == current);
@@ -446,6 +448,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
446 } 448 }
447} 449}
448 450
451/*
452 * Allocate a new mm structure and copy contents from the
453 * mm structure of the passed in task structure.
454 */
455static struct mm_struct *dup_mm(struct task_struct *tsk)
456{
457 struct mm_struct *mm, *oldmm = current->mm;
458 int err;
459
460 if (!oldmm)
461 return NULL;
462
463 mm = allocate_mm();
464 if (!mm)
465 goto fail_nomem;
466
467 memcpy(mm, oldmm, sizeof(*mm));
468
469 if (!mm_init(mm))
470 goto fail_nomem;
471
472 if (init_new_context(tsk, mm))
473 goto fail_nocontext;
474
475 err = dup_mmap(mm, oldmm);
476 if (err)
477 goto free_pt;
478
479 mm->hiwater_rss = get_mm_rss(mm);
480 mm->hiwater_vm = mm->total_vm;
481
482 return mm;
483
484free_pt:
485 mmput(mm);
486
487fail_nomem:
488 return NULL;
489
490fail_nocontext:
491 /*
492 * If init_new_context() failed, we cannot use mmput() to free the mm
493 * because it calls destroy_context()
494 */
495 mm_free_pgd(mm);
496 free_mm(mm);
497 return NULL;
498}
499
449static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 500static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
450{ 501{
451 struct mm_struct * mm, *oldmm; 502 struct mm_struct * mm, *oldmm;
@@ -473,43 +524,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
473 } 524 }
474 525
475 retval = -ENOMEM; 526 retval = -ENOMEM;
476 mm = allocate_mm(); 527 mm = dup_mm(tsk);
477 if (!mm) 528 if (!mm)
478 goto fail_nomem; 529 goto fail_nomem;
479 530
480 /* Copy the current MM stuff.. */
481 memcpy(mm, oldmm, sizeof(*mm));
482 if (!mm_init(mm))
483 goto fail_nomem;
484
485 if (init_new_context(tsk,mm))
486 goto fail_nocontext;
487
488 retval = dup_mmap(mm, oldmm);
489 if (retval)
490 goto free_pt;
491
492 mm->hiwater_rss = get_mm_rss(mm);
493 mm->hiwater_vm = mm->total_vm;
494
495good_mm: 531good_mm:
496 tsk->mm = mm; 532 tsk->mm = mm;
497 tsk->active_mm = mm; 533 tsk->active_mm = mm;
498 return 0; 534 return 0;
499 535
500free_pt:
501 mmput(mm);
502fail_nomem: 536fail_nomem:
503 return retval; 537 return retval;
504
505fail_nocontext:
506 /*
507 * If init_new_context() failed, we cannot use mmput() to free the mm
508 * because it calls destroy_context()
509 */
510 mm_free_pgd(mm);
511 free_mm(mm);
512 return retval;
513} 538}
514 539
515static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 540static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -597,32 +622,17 @@ out:
597 return newf; 622 return newf;
598} 623}
599 624
600static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 625/*
626 * Allocate a new files structure and copy contents from the
627 * passed in files structure.
628 */
629static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
601{ 630{
602 struct files_struct *oldf, *newf; 631 struct files_struct *newf;
603 struct file **old_fds, **new_fds; 632 struct file **old_fds, **new_fds;
604 int open_files, size, i, error = 0, expand; 633 int open_files, size, i, expand;
605 struct fdtable *old_fdt, *new_fdt; 634 struct fdtable *old_fdt, *new_fdt;
606 635
607 /*
608 * A background process may not have any files ...
609 */
610 oldf = current->files;
611 if (!oldf)
612 goto out;
613
614 if (clone_flags & CLONE_FILES) {
615 atomic_inc(&oldf->count);
616 goto out;
617 }
618
619 /*
620 * Note: we may be using current for both targets (See exec.c)
621 * This works because we cache current->files (old) as oldf. Don't
622 * break this.
623 */
624 tsk->files = NULL;
625 error = -ENOMEM;
626 newf = alloc_files(); 636 newf = alloc_files();
627 if (!newf) 637 if (!newf)
628 goto out; 638 goto out;
@@ -651,9 +661,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
651 if (expand) { 661 if (expand) {
652 spin_unlock(&oldf->file_lock); 662 spin_unlock(&oldf->file_lock);
653 spin_lock(&newf->file_lock); 663 spin_lock(&newf->file_lock);
654 error = expand_files(newf, open_files-1); 664 *errorp = expand_files(newf, open_files-1);
655 spin_unlock(&newf->file_lock); 665 spin_unlock(&newf->file_lock);
656 if (error < 0) 666 if (*errorp < 0)
657 goto out_release; 667 goto out_release;
658 new_fdt = files_fdtable(newf); 668 new_fdt = files_fdtable(newf);
659 /* 669 /*
@@ -702,10 +712,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
702 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 712 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
703 } 713 }
704 714
705 tsk->files = newf;
706 error = 0;
707out: 715out:
708 return error; 716 return newf;
709 717
710out_release: 718out_release:
711 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); 719 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
@@ -715,6 +723,40 @@ out_release:
715 goto out; 723 goto out;
716} 724}
717 725
726static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
727{
728 struct files_struct *oldf, *newf;
729 int error = 0;
730
731 /*
732 * A background process may not have any files ...
733 */
734 oldf = current->files;
735 if (!oldf)
736 goto out;
737
738 if (clone_flags & CLONE_FILES) {
739 atomic_inc(&oldf->count);
740 goto out;
741 }
742
743 /*
744 * Note: we may be using current for both targets (See exec.c)
745 * This works because we cache current->files (old) as oldf. Don't
746 * break this.
747 */
748 tsk->files = NULL;
749 error = -ENOMEM;
750 newf = dup_fd(oldf, &error);
751 if (!newf)
752 goto out;
753
754 tsk->files = newf;
755 error = 0;
756out:
757 return error;
758}
759
718/* 760/*
719 * Helper to unshare the files of the current task. 761 * Helper to unshare the files of the current task.
720 * We don't want to expose copy_files internals to 762 * We don't want to expose copy_files internals to
@@ -1020,6 +1062,12 @@ static task_t *copy_process(unsigned long clone_flags,
1020 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1062 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1021 1063
1022 /* 1064 /*
1065 * sigaltstack should be cleared when sharing the same VM
1066 */
1067 if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
1068 p->sas_ss_sp = p->sas_ss_size = 0;
1069
1070 /*
1023 * Syscall tracing should be turned off in the child regardless 1071 * Syscall tracing should be turned off in the child regardless
1024 * of CLONE_PTRACE. 1072 * of CLONE_PTRACE.
1025 */ 1073 */
@@ -1083,8 +1131,8 @@ static task_t *copy_process(unsigned long clone_flags,
1083 p->real_parent = current; 1131 p->real_parent = current;
1084 p->parent = p->real_parent; 1132 p->parent = p->real_parent;
1085 1133
1134 spin_lock(&current->sighand->siglock);
1086 if (clone_flags & CLONE_THREAD) { 1135 if (clone_flags & CLONE_THREAD) {
1087 spin_lock(&current->sighand->siglock);
1088 /* 1136 /*
1089 * Important: if an exit-all has been started then 1137 * Important: if an exit-all has been started then
1090 * do not create this new thread - the whole thread 1138 * do not create this new thread - the whole thread
@@ -1122,8 +1170,6 @@ static task_t *copy_process(unsigned long clone_flags,
1122 */ 1170 */
1123 p->it_prof_expires = jiffies_to_cputime(1); 1171 p->it_prof_expires = jiffies_to_cputime(1);
1124 } 1172 }
1125
1126 spin_unlock(&current->sighand->siglock);
1127 } 1173 }
1128 1174
1129 /* 1175 /*
@@ -1135,8 +1181,6 @@ static task_t *copy_process(unsigned long clone_flags,
1135 if (unlikely(p->ptrace & PT_PTRACED)) 1181 if (unlikely(p->ptrace & PT_PTRACED))
1136 __ptrace_link(p, current->parent); 1182 __ptrace_link(p, current->parent);
1137 1183
1138 attach_pid(p, PIDTYPE_PID, p->pid);
1139 attach_pid(p, PIDTYPE_TGID, p->tgid);
1140 if (thread_group_leader(p)) { 1184 if (thread_group_leader(p)) {
1141 p->signal->tty = current->signal->tty; 1185 p->signal->tty = current->signal->tty;
1142 p->signal->pgrp = process_group(current); 1186 p->signal->pgrp = process_group(current);
@@ -1146,9 +1190,12 @@ static task_t *copy_process(unsigned long clone_flags,
1146 if (p->pid) 1190 if (p->pid)
1147 __get_cpu_var(process_counts)++; 1191 __get_cpu_var(process_counts)++;
1148 } 1192 }
1193 attach_pid(p, PIDTYPE_TGID, p->tgid);
1194 attach_pid(p, PIDTYPE_PID, p->pid);
1149 1195
1150 nr_threads++; 1196 nr_threads++;
1151 total_forks++; 1197 total_forks++;
1198 spin_unlock(&current->sighand->siglock);
1152 write_unlock_irq(&tasklist_lock); 1199 write_unlock_irq(&tasklist_lock);
1153 proc_fork_connector(p); 1200 proc_fork_connector(p);
1154 return p; 1201 return p;
@@ -1323,3 +1370,247 @@ void __init proc_caches_init(void)
1323 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1370 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1324 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1371 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1325} 1372}
1373
1374
1375/*
1376 * Check constraints on flags passed to the unshare system call and
1377 * force unsharing of additional process context as appropriate.
1378 */
1379static inline void check_unshare_flags(unsigned long *flags_ptr)
1380{
1381 /*
1382 * If unsharing a thread from a thread group, must also
1383 * unshare vm.
1384 */
1385 if (*flags_ptr & CLONE_THREAD)
1386 *flags_ptr |= CLONE_VM;
1387
1388 /*
1389 * If unsharing vm, must also unshare signal handlers.
1390 */
1391 if (*flags_ptr & CLONE_VM)
1392 *flags_ptr |= CLONE_SIGHAND;
1393
1394 /*
1395 * If unsharing signal handlers and the task was created
1396 * using CLONE_THREAD, then must unshare the thread
1397 */
1398 if ((*flags_ptr & CLONE_SIGHAND) &&
1399 (atomic_read(&current->signal->count) > 1))
1400 *flags_ptr |= CLONE_THREAD;
1401
1402 /*
1403 * If unsharing namespace, must also unshare filesystem information.
1404 */
1405 if (*flags_ptr & CLONE_NEWNS)
1406 *flags_ptr |= CLONE_FS;
1407}
1408
1409/*
1410 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1411 */
1412static int unshare_thread(unsigned long unshare_flags)
1413{
1414 if (unshare_flags & CLONE_THREAD)
1415 return -EINVAL;
1416
1417 return 0;
1418}
1419
1420/*
1421 * Unshare the filesystem structure if it is being shared
1422 */
1423static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1424{
1425 struct fs_struct *fs = current->fs;
1426
1427 if ((unshare_flags & CLONE_FS) &&
1428 (fs && atomic_read(&fs->count) > 1)) {
1429 *new_fsp = __copy_fs_struct(current->fs);
1430 if (!*new_fsp)
1431 return -ENOMEM;
1432 }
1433
1434 return 0;
1435}
1436
1437/*
1438 * Unshare the namespace structure if it is being shared
1439 */
1440static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
1441{
1442 struct namespace *ns = current->namespace;
1443
1444 if ((unshare_flags & CLONE_NEWNS) &&
1445 (ns && atomic_read(&ns->count) > 1)) {
1446 if (!capable(CAP_SYS_ADMIN))
1447 return -EPERM;
1448
1449 *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
1450 if (!*new_nsp)
1451 return -ENOMEM;
1452 }
1453
1454 return 0;
1455}
1456
1457/*
1458 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
1459 * supported yet
1460 */
1461static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1462{
1463 struct sighand_struct *sigh = current->sighand;
1464
1465 if ((unshare_flags & CLONE_SIGHAND) &&
1466 (sigh && atomic_read(&sigh->count) > 1))
1467 return -EINVAL;
1468 else
1469 return 0;
1470}
1471
1472/*
1473 * Unshare vm if it is being shared
1474 */
1475static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1476{
1477 struct mm_struct *mm = current->mm;
1478
1479 if ((unshare_flags & CLONE_VM) &&
1480 (mm && atomic_read(&mm->mm_users) > 1)) {
1481 return -EINVAL;
1482 }
1483
1484 return 0;
1485}
1486
1487/*
1488 * Unshare file descriptor table if it is being shared
1489 */
1490static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1491{
1492 struct files_struct *fd = current->files;
1493 int error = 0;
1494
1495 if ((unshare_flags & CLONE_FILES) &&
1496 (fd && atomic_read(&fd->count) > 1)) {
1497 *new_fdp = dup_fd(fd, &error);
1498 if (!*new_fdp)
1499 return error;
1500 }
1501
1502 return 0;
1503}
1504
1505/*
1506 * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
1507 * supported yet
1508 */
1509static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
1510{
1511 if (unshare_flags & CLONE_SYSVSEM)
1512 return -EINVAL;
1513
1514 return 0;
1515}
1516
1517/*
1518 * unshare allows a process to 'unshare' part of the process
1519 * context which was originally shared using clone. copy_*
1520 * functions used by do_fork() cannot be used here directly
1521 * because they modify an inactive task_struct that is being
1522 * constructed. Here we are modifying the current, active,
1523 * task_struct.
1524 */
1525asmlinkage long sys_unshare(unsigned long unshare_flags)
1526{
1527 int err = 0;
1528 struct fs_struct *fs, *new_fs = NULL;
1529 struct namespace *ns, *new_ns = NULL;
1530 struct sighand_struct *sigh, *new_sigh = NULL;
1531 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1532 struct files_struct *fd, *new_fd = NULL;
1533 struct sem_undo_list *new_ulist = NULL;
1534
1535 check_unshare_flags(&unshare_flags);
1536
1537 if ((err = unshare_thread(unshare_flags)))
1538 goto bad_unshare_out;
1539 if ((err = unshare_fs(unshare_flags, &new_fs)))
1540 goto bad_unshare_cleanup_thread;
1541 if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
1542 goto bad_unshare_cleanup_fs;
1543 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1544 goto bad_unshare_cleanup_ns;
1545 if ((err = unshare_vm(unshare_flags, &new_mm)))
1546 goto bad_unshare_cleanup_sigh;
1547 if ((err = unshare_fd(unshare_flags, &new_fd)))
1548 goto bad_unshare_cleanup_vm;
1549 if ((err = unshare_semundo(unshare_flags, &new_ulist)))
1550 goto bad_unshare_cleanup_fd;
1551
1552 if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
1553
1554 task_lock(current);
1555
1556 if (new_fs) {
1557 fs = current->fs;
1558 current->fs = new_fs;
1559 new_fs = fs;
1560 }
1561
1562 if (new_ns) {
1563 ns = current->namespace;
1564 current->namespace = new_ns;
1565 new_ns = ns;
1566 }
1567
1568 if (new_sigh) {
1569 sigh = current->sighand;
1570 rcu_assign_pointer(current->sighand, new_sigh);
1571 new_sigh = sigh;
1572 }
1573
1574 if (new_mm) {
1575 mm = current->mm;
1576 active_mm = current->active_mm;
1577 current->mm = new_mm;
1578 current->active_mm = new_mm;
1579 activate_mm(active_mm, new_mm);
1580 new_mm = mm;
1581 }
1582
1583 if (new_fd) {
1584 fd = current->files;
1585 current->files = new_fd;
1586 new_fd = fd;
1587 }
1588
1589 task_unlock(current);
1590 }
1591
1592bad_unshare_cleanup_fd:
1593 if (new_fd)
1594 put_files_struct(new_fd);
1595
1596bad_unshare_cleanup_vm:
1597 if (new_mm)
1598 mmput(new_mm);
1599
1600bad_unshare_cleanup_sigh:
1601 if (new_sigh)
1602 if (atomic_dec_and_test(&new_sigh->count))
1603 kmem_cache_free(sighand_cachep, new_sigh);
1604
1605bad_unshare_cleanup_ns:
1606 if (new_ns)
1607 put_namespace(new_ns);
1608
1609bad_unshare_cleanup_fs:
1610 if (new_fs)
1611 put_fs_struct(new_fs);
1612
1613bad_unshare_cleanup_thread:
1614bad_unshare_out:
1615 return err;
1616}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b6e1757aedd..14bc9cfa6399 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -418,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
418 /* Switch the timer base, if necessary: */ 418 /* Switch the timer base, if necessary: */
419 new_base = switch_hrtimer_base(timer, base); 419 new_base = switch_hrtimer_base(timer, base);
420 420
421 if (mode == HRTIMER_REL) 421 if (mode == HRTIMER_REL) {
422 tim = ktime_add(tim, new_base->get_time()); 422 tim = ktime_add(tim, new_base->get_time());
423 /*
424 * CONFIG_TIME_LOW_RES is a temporary way for architectures
425 * to signal that they simply return xtime in
426 * do_gettimeoffset(). In this case we want to round up by
427 * resolution when starting a relative timer, to avoid short
428 * timeouts. This will go away with the GTOD framework.
429 */
430#ifdef CONFIG_TIME_LOW_RES
431 tim = ktime_add(tim, base->resolution);
432#endif
433 }
423 timer->expires = tim; 434 timer->expires = tim;
424 435
425 enqueue_hrtimer(timer, new_base); 436 enqueue_hrtimer(timer, new_base);
@@ -494,6 +505,41 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
494 return rem; 505 return rem;
495} 506}
496 507
508#ifdef CONFIG_NO_IDLE_HZ
509/**
510 * hrtimer_get_next_event - get the time until next expiry event
511 *
512 * Returns the delta to the next expiry event or KTIME_MAX if no timer
513 * is pending.
514 */
515ktime_t hrtimer_get_next_event(void)
516{
517 struct hrtimer_base *base = __get_cpu_var(hrtimer_bases);
518 ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
519 unsigned long flags;
520 int i;
521
522 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
523 struct hrtimer *timer;
524
525 spin_lock_irqsave(&base->lock, flags);
526 if (!base->first) {
527 spin_unlock_irqrestore(&base->lock, flags);
528 continue;
529 }
530 timer = rb_entry(base->first, struct hrtimer, node);
531 delta.tv64 = timer->expires.tv64;
532 spin_unlock_irqrestore(&base->lock, flags);
533 delta = ktime_sub(delta, base->get_time());
534 if (delta.tv64 < mindelta.tv64)
535 mindelta.tv64 = delta.tv64;
536 }
537 if (mindelta.tv64 < 0)
538 mindelta.tv64 = 0;
539 return mindelta;
540}
541#endif
542
497/** 543/**
498 * hrtimer_init - initialize a timer to the given clock 544 * hrtimer_init - initialize a timer to the given clock
499 * 545 *
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 0cbe633420fb..55b1e5b85db9 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -179,3 +179,6 @@ EXPORT_SYMBOL(inter_module_register);
179EXPORT_SYMBOL(inter_module_unregister); 179EXPORT_SYMBOL(inter_module_unregister);
180EXPORT_SYMBOL(inter_module_get_request); 180EXPORT_SYMBOL(inter_module_get_request);
181EXPORT_SYMBOL(inter_module_put); 181EXPORT_SYMBOL(inter_module_put);
182
183MODULE_LICENSE("GPL");
184
diff --git a/kernel/module.c b/kernel/module.c
index e058aedf6b93..5aad477ddc79 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1670,6 +1670,9 @@ static struct module *load_module(void __user *umod,
1670 goto free_mod; 1670 goto free_mod;
1671 } 1671 }
1672 1672
1673 /* Userspace could have altered the string after the strlen_user() */
1674 args[arglen - 1] = '\0';
1675
1673 if (find_module(mod->name)) { 1676 if (find_module(mod->name)) {
1674 err = -EEXIST; 1677 err = -EEXIST;
1675 goto free_mod; 1678 goto free_mod;
diff --git a/kernel/panic.c b/kernel/panic.c
index c5c4ab255834..126dc43f1c74 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...)
130#endif 130#endif
131 local_irq_enable(); 131 local_irq_enable();
132 for (i = 0;;) { 132 for (i = 0;;) {
133 touch_softlockup_watchdog();
133 i += panic_blink(i); 134 i += panic_blink(i);
134 mdelay(1); 135 mdelay(1);
135 i++; 136 i++;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 216f574b5ffb..fa895fc2ecf5 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -353,6 +353,7 @@ static int posix_timer_fn(void *data)
353 hrtimer_forward(&timr->it.real.timer, 353 hrtimer_forward(&timr->it.real.timer,
354 timr->it.real.interval); 354 timr->it.real.interval);
355 ret = HRTIMER_RESTART; 355 ret = HRTIMER_RESTART;
356 ++timr->it_requeue_pending;
356 } 357 }
357 } 358 }
358 359
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 579d239d129f..623786d44159 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,7 +9,9 @@
9#include <linux/console.h> 9#include <linux/console.h>
10#include "power.h" 10#include "power.h"
11 11
12#ifdef SUSPEND_CONSOLE 12#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
13#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
14
13static int orig_fgconsole, orig_kmsg; 15static int orig_fgconsole, orig_kmsg;
14 16
15int pm_prepare_console(void) 17int pm_prepare_console(void)
diff --git a/kernel/power/power.h b/kernel/power/power.h
index d8f0d1a76bae..388dba680841 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,14 +1,6 @@
1#include <linux/suspend.h> 1#include <linux/suspend.h>
2#include <linux/utsname.h> 2#include <linux/utsname.h>
3 3
4/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but
5 we probably do not take enough locks for switching consoles, etc,
6 so bad things might happen.
7*/
8#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
9#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
10#endif
11
12struct swsusp_info { 4struct swsusp_info {
13 struct new_utsname uts; 5 struct new_utsname uts;
14 u32 version_code; 6 u32 version_code;
@@ -42,14 +34,6 @@ static struct subsys_attribute _name##_attr = { \
42 34
43extern struct subsystem power_subsys; 35extern struct subsystem power_subsys;
44 36
45#ifdef SUSPEND_CONSOLE
46extern int pm_prepare_console(void);
47extern void pm_restore_console(void);
48#else
49static int pm_prepare_console(void) { return 0; }
50static void pm_restore_console(void) {}
51#endif
52
53/* References to section boundaries */ 37/* References to section boundaries */
54extern const void __nosave_begin, __nosave_end; 38extern const void __nosave_begin, __nosave_end;
55 39
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 41f66365f0d8..8d5a5986d621 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone)
91 * corrected eventually when the cases giving rise to this 91 * corrected eventually when the cases giving rise to this
92 * are better understood. 92 * are better understood.
93 */ 93 */
94 if (PageReserved(page)) { 94 if (PageReserved(page))
95 printk("highmem reserved page?!\n");
96 continue; 95 continue;
97 }
98 BUG_ON(PageNosave(page)); 96 BUG_ON(PageNosave(page));
99 if (PageNosaveFree(page)) 97 if (PageNosaveFree(page))
100 continue; 98 continue;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 59c91c148e82..2d9d08f72f76 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */
153{ 153{
154 int i; 154 int i;
155 155
156 if (!swsusp_resume_device)
157 return -ENODEV;
158 spin_lock(&swap_lock); 156 spin_lock(&swap_lock);
159 for (i = 0; i < MAX_SWAPFILES; i++) { 157 for (i = 0; i < MAX_SWAPFILES; i++) {
160 if (!(swap_info[i].flags & SWP_WRITEOK)) 158 if (!(swap_info[i].flags & SWP_WRITEOK))
161 continue; 159 continue;
162 if (is_resume_device(swap_info + i)) { 160 if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
163 spin_unlock(&swap_lock); 161 spin_unlock(&swap_lock);
164 root_swap = i; 162 root_swap = i;
165 return 0; 163 return 0;
@@ -743,7 +741,6 @@ static int submit(int rw, pgoff_t page_off, void *page)
743 if (!bio) 741 if (!bio)
744 return -ENOMEM; 742 return -ENOMEM;
745 bio->bi_sector = page_off * (PAGE_SIZE >> 9); 743 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
746 bio_get(bio);
747 bio->bi_bdev = resume_bdev; 744 bio->bi_bdev = resume_bdev;
748 bio->bi_end_io = end_io; 745 bio->bi_end_io = end_io;
749 746
@@ -753,14 +750,13 @@ static int submit(int rw, pgoff_t page_off, void *page)
753 goto Done; 750 goto Done;
754 } 751 }
755 752
756 if (rw == WRITE)
757 bio_set_pages_dirty(bio);
758 753
759 atomic_set(&io_done, 1); 754 atomic_set(&io_done, 1);
760 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 755 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
761 while (atomic_read(&io_done)) 756 while (atomic_read(&io_done))
762 yield(); 757 yield();
763 758 if (rw == READ)
759 bio_set_pages_dirty(bio);
764 Done: 760 Done:
765 bio_put(bio); 761 bio_put(bio);
766 return error; 762 return error;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5f33cdb6fff5..d95a72c9279d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
72 */ 72 */
73void __ptrace_unlink(task_t *child) 73void __ptrace_unlink(task_t *child)
74{ 74{
75 if (!child->ptrace) 75 BUG_ON(!child->ptrace);
76 BUG(); 76
77 child->ptrace = 0; 77 child->ptrace = 0;
78 if (!list_empty(&child->ptrace_list)) { 78 if (!list_empty(&child->ptrace_list)) {
79 list_del_init(&child->ptrace_list); 79 list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
184 return retval; 184 return retval;
185} 185}
186 186
187void __ptrace_detach(struct task_struct *child, unsigned int data)
188{
189 child->exit_code = data;
190 /* .. re-parent .. */
191 __ptrace_unlink(child);
192 /* .. and wake it up. */
193 if (child->exit_state != EXIT_ZOMBIE)
194 wake_up_process(child);
195}
196
187int ptrace_detach(struct task_struct *child, unsigned int data) 197int ptrace_detach(struct task_struct *child, unsigned int data)
188{ 198{
189 if (!valid_signal(data)) 199 if (!valid_signal(data))
190 return -EIO; 200 return -EIO;
191 201
192 /* Architecture-specific hardware disable .. */ 202 /* Architecture-specific hardware disable .. */
193 ptrace_disable(child); 203 ptrace_disable(child);
194 204
195 /* .. re-parent .. */
196 child->exit_code = data;
197
198 write_lock_irq(&tasklist_lock); 205 write_lock_irq(&tasklist_lock);
199 __ptrace_unlink(child); 206 if (child->ptrace)
200 /* .. and wake it up. */ 207 __ptrace_detach(child, data);
201 if (child->exit_state != EXIT_ZOMBIE)
202 wake_up_process(child);
203 write_unlock_irq(&tasklist_lock); 208 write_unlock_irq(&tasklist_lock);
204 209
205 return 0; 210 return 0;
@@ -242,8 +247,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
242 if (write) { 247 if (write) {
243 copy_to_user_page(vma, page, addr, 248 copy_to_user_page(vma, page, addr,
244 maddr + offset, buf, bytes); 249 maddr + offset, buf, bytes);
245 if (!PageCompound(page)) 250 set_page_dirty_lock(page);
246 set_page_dirty_lock(page);
247 } else { 251 } else {
248 copy_from_user_page(vma, page, addr, 252 copy_from_user_page(vma, page, addr,
249 buf, maddr + offset, bytes); 253 buf, maddr + offset, bytes);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 0cf8146bd585..8cf15a569fcd 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
67 67
68/* Fake initialization required by compiler */ 68/* Fake initialization required by compiler */
69static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; 69static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
70static int maxbatch = 10000; 70static int blimit = 10;
71static int qhimark = 10000;
72static int qlowmark = 100;
73#ifdef CONFIG_SMP
74static int rsinterval = 1000;
75#endif
76
77static atomic_t rcu_barrier_cpu_count;
78static struct semaphore rcu_barrier_sema;
79static struct completion rcu_barrier_completion;
80
81#ifdef CONFIG_SMP
82static void force_quiescent_state(struct rcu_data *rdp,
83 struct rcu_ctrlblk *rcp)
84{
85 int cpu;
86 cpumask_t cpumask;
87 set_need_resched();
88 if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
89 rdp->last_rs_qlen = rdp->qlen;
90 /*
91 * Don't send IPI to itself. With irqs disabled,
92 * rdp->cpu is the current cpu.
93 */
94 cpumask = rcp->cpumask;
95 cpu_clear(rdp->cpu, cpumask);
96 for_each_cpu_mask(cpu, cpumask)
97 smp_send_reschedule(cpu);
98 }
99}
100#else
101static inline void force_quiescent_state(struct rcu_data *rdp,
102 struct rcu_ctrlblk *rcp)
103{
104 set_need_resched();
105}
106#endif
71 107
72/** 108/**
73 * call_rcu - Queue an RCU callback for invocation after a grace period. 109 * call_rcu - Queue an RCU callback for invocation after a grace period.
@@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head,
92 rdp = &__get_cpu_var(rcu_data); 128 rdp = &__get_cpu_var(rcu_data);
93 *rdp->nxttail = head; 129 *rdp->nxttail = head;
94 rdp->nxttail = &head->next; 130 rdp->nxttail = &head->next;
95 131 if (unlikely(++rdp->qlen > qhimark)) {
96 if (unlikely(++rdp->count > 10000)) 132 rdp->blimit = INT_MAX;
97 set_need_resched(); 133 force_quiescent_state(rdp, &rcu_ctrlblk);
98 134 }
99 local_irq_restore(flags); 135 local_irq_restore(flags);
100} 136}
101 137
102static atomic_t rcu_barrier_cpu_count;
103static struct semaphore rcu_barrier_sema;
104static struct completion rcu_barrier_completion;
105
106/** 138/**
107 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. 139 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
108 * @head: structure to be used for queueing the RCU updates. 140 * @head: structure to be used for queueing the RCU updates.
@@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
131 rdp = &__get_cpu_var(rcu_bh_data); 163 rdp = &__get_cpu_var(rcu_bh_data);
132 *rdp->nxttail = head; 164 *rdp->nxttail = head;
133 rdp->nxttail = &head->next; 165 rdp->nxttail = &head->next;
134 rdp->count++; 166
135/* 167 if (unlikely(++rdp->qlen > qhimark)) {
136 * Should we directly call rcu_do_batch() here ? 168 rdp->blimit = INT_MAX;
137 * if (unlikely(rdp->count > 10000)) 169 force_quiescent_state(rdp, &rcu_bh_ctrlblk);
138 * rcu_do_batch(rdp); 170 }
139 */ 171
140 local_irq_restore(flags); 172 local_irq_restore(flags);
141} 173}
142 174
@@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
199 next = rdp->donelist = list->next; 231 next = rdp->donelist = list->next;
200 list->func(list); 232 list->func(list);
201 list = next; 233 list = next;
202 rdp->count--; 234 rdp->qlen--;
203 if (++count >= maxbatch) 235 if (++count >= rdp->blimit)
204 break; 236 break;
205 } 237 }
238 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
239 rdp->blimit = blimit;
206 if (!rdp->donelist) 240 if (!rdp->donelist)
207 rdp->donetail = &rdp->donelist; 241 rdp->donetail = &rdp->donelist;
208 else 242 else
@@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
473 rdp->quiescbatch = rcp->completed; 507 rdp->quiescbatch = rcp->completed;
474 rdp->qs_pending = 0; 508 rdp->qs_pending = 0;
475 rdp->cpu = cpu; 509 rdp->cpu = cpu;
510 rdp->blimit = blimit;
476} 511}
477 512
478static void __devinit rcu_online_cpu(int cpu) 513static void __devinit rcu_online_cpu(int cpu)
@@ -567,7 +602,12 @@ void synchronize_kernel(void)
567 synchronize_rcu(); 602 synchronize_rcu();
568} 603}
569 604
570module_param(maxbatch, int, 0); 605module_param(blimit, int, 0);
606module_param(qhimark, int, 0);
607module_param(qlowmark, int, 0);
608#ifdef CONFIG_SMP
609module_param(rsinterval, int, 0);
610#endif
571EXPORT_SYMBOL_GPL(rcu_batches_completed); 611EXPORT_SYMBOL_GPL(rcu_batches_completed);
572EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ 612EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */
573EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ 613EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */
diff --git a/kernel/sched.c b/kernel/sched.c
index f77f23f8f479..4d46e90f59c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p)
178#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ 178#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
179 < (long long) (sd)->cache_hot_time) 179 < (long long) (sd)->cache_hot_time)
180 180
181void __put_task_struct_cb(struct rcu_head *rhp)
182{
183 __put_task_struct(container_of(rhp, struct task_struct, rcu));
184}
185
186EXPORT_SYMBOL_GPL(__put_task_struct_cb);
187
188/* 181/*
189 * These are the runqueue data structures: 182 * These are the runqueue data structures:
190 */ 183 */
@@ -215,7 +208,6 @@ struct runqueue {
215 */ 208 */
216 unsigned long nr_running; 209 unsigned long nr_running;
217#ifdef CONFIG_SMP 210#ifdef CONFIG_SMP
218 unsigned long prio_bias;
219 unsigned long cpu_load[3]; 211 unsigned long cpu_load[3];
220#endif 212#endif
221 unsigned long long nr_switches; 213 unsigned long long nr_switches;
@@ -669,68 +661,13 @@ static int effective_prio(task_t *p)
669 return prio; 661 return prio;
670} 662}
671 663
672#ifdef CONFIG_SMP
673static inline void inc_prio_bias(runqueue_t *rq, int prio)
674{
675 rq->prio_bias += MAX_PRIO - prio;
676}
677
678static inline void dec_prio_bias(runqueue_t *rq, int prio)
679{
680 rq->prio_bias -= MAX_PRIO - prio;
681}
682
683static inline void inc_nr_running(task_t *p, runqueue_t *rq)
684{
685 rq->nr_running++;
686 if (rt_task(p)) {
687 if (p != rq->migration_thread)
688 /*
689 * The migration thread does the actual balancing. Do
690 * not bias by its priority as the ultra high priority
691 * will skew balancing adversely.
692 */
693 inc_prio_bias(rq, p->prio);
694 } else
695 inc_prio_bias(rq, p->static_prio);
696}
697
698static inline void dec_nr_running(task_t *p, runqueue_t *rq)
699{
700 rq->nr_running--;
701 if (rt_task(p)) {
702 if (p != rq->migration_thread)
703 dec_prio_bias(rq, p->prio);
704 } else
705 dec_prio_bias(rq, p->static_prio);
706}
707#else
708static inline void inc_prio_bias(runqueue_t *rq, int prio)
709{
710}
711
712static inline void dec_prio_bias(runqueue_t *rq, int prio)
713{
714}
715
716static inline void inc_nr_running(task_t *p, runqueue_t *rq)
717{
718 rq->nr_running++;
719}
720
721static inline void dec_nr_running(task_t *p, runqueue_t *rq)
722{
723 rq->nr_running--;
724}
725#endif
726
727/* 664/*
728 * __activate_task - move a task to the runqueue. 665 * __activate_task - move a task to the runqueue.
729 */ 666 */
730static inline void __activate_task(task_t *p, runqueue_t *rq) 667static inline void __activate_task(task_t *p, runqueue_t *rq)
731{ 668{
732 enqueue_task(p, rq->active); 669 enqueue_task(p, rq->active);
733 inc_nr_running(p, rq); 670 rq->nr_running++;
734} 671}
735 672
736/* 673/*
@@ -739,7 +676,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
739static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 676static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
740{ 677{
741 enqueue_task_head(p, rq->active); 678 enqueue_task_head(p, rq->active);
742 inc_nr_running(p, rq); 679 rq->nr_running++;
743} 680}
744 681
745static int recalc_task_prio(task_t *p, unsigned long long now) 682static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -863,7 +800,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
863 */ 800 */
864static void deactivate_task(struct task_struct *p, runqueue_t *rq) 801static void deactivate_task(struct task_struct *p, runqueue_t *rq)
865{ 802{
866 dec_nr_running(p, rq); 803 rq->nr_running--;
867 dequeue_task(p, p->array); 804 dequeue_task(p, p->array);
868 p->array = NULL; 805 p->array = NULL;
869} 806}
@@ -1007,61 +944,27 @@ void kick_process(task_t *p)
1007 * We want to under-estimate the load of migration sources, to 944 * We want to under-estimate the load of migration sources, to
1008 * balance conservatively. 945 * balance conservatively.
1009 */ 946 */
1010static unsigned long __source_load(int cpu, int type, enum idle_type idle) 947static inline unsigned long source_load(int cpu, int type)
1011{ 948{
1012 runqueue_t *rq = cpu_rq(cpu); 949 runqueue_t *rq = cpu_rq(cpu);
1013 unsigned long running = rq->nr_running; 950 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1014 unsigned long source_load, cpu_load = rq->cpu_load[type-1],
1015 load_now = running * SCHED_LOAD_SCALE;
1016
1017 if (type == 0) 951 if (type == 0)
1018 source_load = load_now; 952 return load_now;
1019 else
1020 source_load = min(cpu_load, load_now);
1021
1022 if (running > 1 || (idle == NOT_IDLE && running))
1023 /*
1024 * If we are busy rebalancing the load is biased by
1025 * priority to create 'nice' support across cpus. When
1026 * idle rebalancing we should only bias the source_load if
1027 * there is more than one task running on that queue to
1028 * prevent idle rebalance from trying to pull tasks from a
1029 * queue with only one running task.
1030 */
1031 source_load = source_load * rq->prio_bias / running;
1032
1033 return source_load;
1034}
1035 953
1036static inline unsigned long source_load(int cpu, int type) 954 return min(rq->cpu_load[type-1], load_now);
1037{
1038 return __source_load(cpu, type, NOT_IDLE);
1039} 955}
1040 956
1041/* 957/*
1042 * Return a high guess at the load of a migration-target cpu 958 * Return a high guess at the load of a migration-target cpu
1043 */ 959 */
1044static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) 960static inline unsigned long target_load(int cpu, int type)
1045{ 961{
1046 runqueue_t *rq = cpu_rq(cpu); 962 runqueue_t *rq = cpu_rq(cpu);
1047 unsigned long running = rq->nr_running; 963 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1048 unsigned long target_load, cpu_load = rq->cpu_load[type-1],
1049 load_now = running * SCHED_LOAD_SCALE;
1050
1051 if (type == 0) 964 if (type == 0)
1052 target_load = load_now; 965 return load_now;
1053 else
1054 target_load = max(cpu_load, load_now);
1055
1056 if (running > 1 || (idle == NOT_IDLE && running))
1057 target_load = target_load * rq->prio_bias / running;
1058 966
1059 return target_load; 967 return max(rq->cpu_load[type-1], load_now);
1060}
1061
1062static inline unsigned long target_load(int cpu, int type)
1063{
1064 return __target_load(cpu, type, NOT_IDLE);
1065} 968}
1066 969
1067/* 970/*
@@ -1294,9 +1197,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
1294 } 1197 }
1295 } 1198 }
1296 1199
1297 if (p->last_waker_cpu != this_cpu)
1298 goto out_set_cpu;
1299
1300 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1200 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
1301 goto out_set_cpu; 1201 goto out_set_cpu;
1302 1202
@@ -1367,8 +1267,6 @@ out_set_cpu:
1367 cpu = task_cpu(p); 1267 cpu = task_cpu(p);
1368 } 1268 }
1369 1269
1370 p->last_waker_cpu = this_cpu;
1371
1372out_activate: 1270out_activate:
1373#endif /* CONFIG_SMP */ 1271#endif /* CONFIG_SMP */
1374 if (old_state == TASK_UNINTERRUPTIBLE) { 1272 if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1450,12 +1348,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1450#ifdef CONFIG_SCHEDSTATS 1348#ifdef CONFIG_SCHEDSTATS
1451 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1349 memset(&p->sched_info, 0, sizeof(p->sched_info));
1452#endif 1350#endif
1453#if defined(CONFIG_SMP) 1351#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1454 p->last_waker_cpu = cpu;
1455#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
1456 p->oncpu = 0; 1352 p->oncpu = 0;
1457#endif 1353#endif
1458#endif
1459#ifdef CONFIG_PREEMPT 1354#ifdef CONFIG_PREEMPT
1460 /* Want to start with kernel preemption disabled. */ 1355 /* Want to start with kernel preemption disabled. */
1461 task_thread_info(p)->preempt_count = 1; 1356 task_thread_info(p)->preempt_count = 1;
@@ -1530,7 +1425,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1530 list_add_tail(&p->run_list, &current->run_list); 1425 list_add_tail(&p->run_list, &current->run_list);
1531 p->array = current->array; 1426 p->array = current->array;
1532 p->array->nr_active++; 1427 p->array->nr_active++;
1533 inc_nr_running(p, rq); 1428 rq->nr_running++;
1534 } 1429 }
1535 set_need_resched(); 1430 set_need_resched();
1536 } else 1431 } else
@@ -1875,9 +1770,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1875 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1770 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1876{ 1771{
1877 dequeue_task(p, src_array); 1772 dequeue_task(p, src_array);
1878 dec_nr_running(p, src_rq); 1773 src_rq->nr_running--;
1879 set_task_cpu(p, this_cpu); 1774 set_task_cpu(p, this_cpu);
1880 inc_nr_running(p, this_rq); 1775 this_rq->nr_running++;
1881 enqueue_task(p, this_array); 1776 enqueue_task(p, this_array);
1882 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1777 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1883 + this_rq->timestamp_last_tick; 1778 + this_rq->timestamp_last_tick;
@@ -2056,9 +1951,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2056 1951
2057 /* Bias balancing toward cpus of our domain */ 1952 /* Bias balancing toward cpus of our domain */
2058 if (local_group) 1953 if (local_group)
2059 load = __target_load(i, load_idx, idle); 1954 load = target_load(i, load_idx);
2060 else 1955 else
2061 load = __source_load(i, load_idx, idle); 1956 load = source_load(i, load_idx);
2062 1957
2063 avg_load += load; 1958 avg_load += load;
2064 } 1959 }
@@ -2171,7 +2066,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2171 int i; 2066 int i;
2172 2067
2173 for_each_cpu_mask(i, group->cpumask) { 2068 for_each_cpu_mask(i, group->cpumask) {
2174 load = __source_load(i, 0, idle); 2069 load = source_load(i, 0);
2175 2070
2176 if (load > max_load) { 2071 if (load > max_load) {
2177 max_load = load; 2072 max_load = load;
@@ -3571,10 +3466,8 @@ void set_user_nice(task_t *p, long nice)
3571 goto out_unlock; 3466 goto out_unlock;
3572 } 3467 }
3573 array = p->array; 3468 array = p->array;
3574 if (array) { 3469 if (array)
3575 dequeue_task(p, array); 3470 dequeue_task(p, array);
3576 dec_prio_bias(rq, p->static_prio);
3577 }
3578 3471
3579 old_prio = p->prio; 3472 old_prio = p->prio;
3580 new_prio = NICE_TO_PRIO(nice); 3473 new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3477,6 @@ void set_user_nice(task_t *p, long nice)
3584 3477
3585 if (array) { 3478 if (array) {
3586 enqueue_task(p, array); 3479 enqueue_task(p, array);
3587 inc_prio_bias(rq, p->static_prio);
3588 /* 3480 /*
3589 * If the task increased its priority or is running and 3481 * If the task increased its priority or is running and
3590 * lowered its priority, then reschedule its CPU: 3482 * lowered its priority, then reschedule its CPU:
@@ -4129,6 +4021,8 @@ static inline void __cond_resched(void)
4129 */ 4021 */
4130 if (unlikely(preempt_count())) 4022 if (unlikely(preempt_count()))
4131 return; 4023 return;
4024 if (unlikely(system_state != SYSTEM_RUNNING))
4025 return;
4132 do { 4026 do {
4133 add_preempt_count(PREEMPT_ACTIVE); 4027 add_preempt_count(PREEMPT_ACTIVE);
4134 schedule(); 4028 schedule();
@@ -4434,6 +4328,7 @@ void __devinit init_idle(task_t *idle, int cpu)
4434 runqueue_t *rq = cpu_rq(cpu); 4328 runqueue_t *rq = cpu_rq(cpu);
4435 unsigned long flags; 4329 unsigned long flags;
4436 4330
4331 idle->timestamp = sched_clock();
4437 idle->sleep_avg = 0; 4332 idle->sleep_avg = 0;
4438 idle->array = NULL; 4333 idle->array = NULL;
4439 idle->prio = MAX_PRIO; 4334 idle->prio = MAX_PRIO;
@@ -5159,7 +5054,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5159#define MAX_DOMAIN_DISTANCE 32 5054#define MAX_DOMAIN_DISTANCE 32
5160 5055
5161static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = 5056static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
5162 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; 5057 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
5058/*
5059 * Architectures may override the migration cost and thus avoid
5060 * boot-time calibration. Unit is nanoseconds. Mostly useful for
5061 * virtualized hardware:
5062 */
5063#ifdef CONFIG_DEFAULT_MIGRATION_COST
5064 CONFIG_DEFAULT_MIGRATION_COST
5065#else
5066 -1LL
5067#endif
5068};
5163 5069
5164/* 5070/*
5165 * Allow override of migration cost - in units of microseconds. 5071 * Allow override of migration cost - in units of microseconds.
@@ -5551,13 +5457,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
5551 -1 5457 -1
5552#endif 5458#endif
5553 ); 5459 );
5554 printk("migration_cost="); 5460 if (system_state == SYSTEM_BOOTING) {
5555 for (distance = 0; distance <= max_distance; distance++) { 5461 printk("migration_cost=");
5556 if (distance) 5462 for (distance = 0; distance <= max_distance; distance++) {
5557 printk(","); 5463 if (distance)
5558 printk("%ld", (long)migration_cost[distance] / 1000); 5464 printk(",");
5465 printk("%ld", (long)migration_cost[distance] / 1000);
5466 }
5467 printk("\n");
5559 } 5468 }
5560 printk("\n");
5561 j1 = jiffies; 5469 j1 = jiffies;
5562 if (migration_debug) 5470 if (migration_debug)
5563 printk("migration: %ld seconds\n", (j1-j0)/HZ); 5471 printk("migration: %ld seconds\n", (j1-j0)/HZ);
@@ -6109,7 +6017,7 @@ void __init sched_init(void)
6109 runqueue_t *rq; 6017 runqueue_t *rq;
6110 int i, j, k; 6018 int i, j, k;
6111 6019
6112 for (i = 0; i < NR_CPUS; i++) { 6020 for_each_cpu(i) {
6113 prio_array_t *array; 6021 prio_array_t *array;
6114 6022
6115 rq = cpu_rq(i); 6023 rq = cpu_rq(i);
diff --git a/kernel/signal.c b/kernel/signal.c
index b373fc2420da..ea154104a00b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2430,7 +2430,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
2430} 2430}
2431 2431
2432int 2432int
2433do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) 2433do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2434{ 2434{
2435 struct k_sigaction *k; 2435 struct k_sigaction *k;
2436 sigset_t mask; 2436 sigset_t mask;
@@ -2454,6 +2454,8 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2454 *oact = *k; 2454 *oact = *k;
2455 2455
2456 if (act) { 2456 if (act) {
2457 sigdelsetmask(&act->sa.sa_mask,
2458 sigmask(SIGKILL) | sigmask(SIGSTOP));
2457 /* 2459 /*
2458 * POSIX 3.3.1.3: 2460 * POSIX 3.3.1.3:
2459 * "Setting a signal action to SIG_IGN for a signal that is 2461 * "Setting a signal action to SIG_IGN for a signal that is
@@ -2479,8 +2481,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2479 read_lock(&tasklist_lock); 2481 read_lock(&tasklist_lock);
2480 spin_lock_irq(&t->sighand->siglock); 2482 spin_lock_irq(&t->sighand->siglock);
2481 *k = *act; 2483 *k = *act;
2482 sigdelsetmask(&k->sa.sa_mask,
2483 sigmask(SIGKILL) | sigmask(SIGSTOP));
2484 sigemptyset(&mask); 2484 sigemptyset(&mask);
2485 sigaddset(&mask, sig); 2485 sigaddset(&mask, sig);
2486 rm_from_queue_full(&mask, &t->signal->shared_pending); 2486 rm_from_queue_full(&mask, &t->signal->shared_pending);
@@ -2495,8 +2495,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2495 } 2495 }
2496 2496
2497 *k = *act; 2497 *k = *act;
2498 sigdelsetmask(&k->sa.sa_mask,
2499 sigmask(SIGKILL) | sigmask(SIGSTOP));
2500 } 2498 }
2501 2499
2502 spin_unlock_irq(&current->sighand->siglock); 2500 spin_unlock_irq(&current->sighand->siglock);
@@ -2702,6 +2700,7 @@ sys_signal(int sig, __sighandler_t handler)
2702 2700
2703 new_sa.sa.sa_handler = handler; 2701 new_sa.sa.sa_handler = handler;
2704 new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; 2702 new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2703 sigemptyset(&new_sa.sa.sa_mask);
2705 2704
2706 ret = do_sigaction(sig, &new_sa, &old_sa); 2705 ret = do_sigaction(sig, &new_sa, &old_sa);
2707 2706
diff --git a/kernel/sys.c b/kernel/sys.c
index 0929c698affc..f91218a5463e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -428,7 +428,7 @@ void kernel_kexec(void)
428{ 428{
429#ifdef CONFIG_KEXEC 429#ifdef CONFIG_KEXEC
430 struct kimage *image; 430 struct kimage *image;
431 image = xchg(&kexec_image, 0); 431 image = xchg(&kexec_image, NULL);
432 if (!image) { 432 if (!image) {
433 return; 433 return;
434 } 434 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 17313b99e53d..1067090db6b1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16);
104cond_syscall(sys_setuid16); 104cond_syscall(sys_setuid16);
105cond_syscall(sys_vm86old); 105cond_syscall(sys_vm86old);
106cond_syscall(sys_vm86); 106cond_syscall(sys_vm86);
107cond_syscall(compat_sys_ipc);
108cond_syscall(compat_sys_sysctl);
107 109
108/* arch-specific weak syscall entries */ 110/* arch-specific weak syscall entries */
109cond_syscall(sys_pciconfig_read); 111cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71dd6f62efec..32b48e8ee36e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -44,13 +44,14 @@
44#include <linux/limits.h> 44#include <linux/limits.h>
45#include <linux/dcache.h> 45#include <linux/dcache.h>
46#include <linux/syscalls.h> 46#include <linux/syscalls.h>
47#include <linux/nfs_fs.h>
48#include <linux/acpi.h>
47 49
48#include <asm/uaccess.h> 50#include <asm/uaccess.h>
49#include <asm/processor.h> 51#include <asm/processor.h>
50 52
51#ifdef CONFIG_ROOT_NFS 53extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
52#include <linux/nfs_fs.h> 54 void __user *buffer, size_t *lenp, loff_t *ppos);
53#endif
54 55
55#if defined(CONFIG_SYSCTL) 56#if defined(CONFIG_SYSCTL)
56 57
@@ -126,7 +127,9 @@ extern int sysctl_hz_timer;
126extern int acct_parm[]; 127extern int acct_parm[];
127#endif 128#endif
128 129
129int randomize_va_space = 1; 130#ifdef CONFIG_IA64
131extern int no_unaligned_warning;
132#endif
130 133
131static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, 134static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
132 ctl_table *, void **); 135 ctl_table *, void **);
@@ -640,6 +643,7 @@ static ctl_table kern_table[] = {
640 .proc_handler = &proc_dointvec, 643 .proc_handler = &proc_dointvec,
641 }, 644 },
642#endif 645#endif
646#if defined(CONFIG_MMU)
643 { 647 {
644 .ctl_name = KERN_RANDOMIZE, 648 .ctl_name = KERN_RANDOMIZE,
645 .procname = "randomize_va_space", 649 .procname = "randomize_va_space",
@@ -648,6 +652,7 @@ static ctl_table kern_table[] = {
648 .mode = 0644, 652 .mode = 0644,
649 .proc_handler = &proc_dointvec, 653 .proc_handler = &proc_dointvec,
650 }, 654 },
655#endif
651#if defined(CONFIG_S390) && defined(CONFIG_SMP) 656#if defined(CONFIG_S390) && defined(CONFIG_SMP)
652 { 657 {
653 .ctl_name = KERN_SPIN_RETRY, 658 .ctl_name = KERN_SPIN_RETRY,
@@ -658,6 +663,26 @@ static ctl_table kern_table[] = {
658 .proc_handler = &proc_dointvec, 663 .proc_handler = &proc_dointvec,
659 }, 664 },
660#endif 665#endif
666#ifdef CONFIG_ACPI_SLEEP
667 {
668 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
669 .procname = "acpi_video_flags",
670 .data = &acpi_video_flags,
671 .maxlen = sizeof (unsigned long),
672 .mode = 0644,
673 .proc_handler = &proc_doulongvec_minmax,
674 },
675#endif
676#ifdef CONFIG_IA64
677 {
678 .ctl_name = KERN_IA64_UNALIGNED,
679 .procname = "ignore-unaligned-usertrap",
680 .data = &no_unaligned_warning,
681 .maxlen = sizeof (int),
682 .mode = 0644,
683 .proc_handler = &proc_dointvec,
684 },
685#endif
661 { .ctl_name = 0 } 686 { .ctl_name = 0 }
662}; 687};
663 688
@@ -921,7 +946,7 @@ static ctl_table fs_table[] = {
921 .data = &files_stat, 946 .data = &files_stat,
922 .maxlen = 3*sizeof(int), 947 .maxlen = 3*sizeof(int),
923 .mode = 0444, 948 .mode = 0444,
924 .proc_handler = &proc_dointvec, 949 .proc_handler = &proc_nr_files,
925 }, 950 },
926 { 951 {
927 .ctl_name = FS_MAXFILE, 952 .ctl_name = FS_MAXFILE,
diff --git a/kernel/timer.c b/kernel/timer.c
index 4f1cb0ab5251..2410c18dbeb1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -489,13 +489,25 @@ unsigned long next_timer_interrupt(void)
489 struct list_head *list; 489 struct list_head *list;
490 struct timer_list *nte; 490 struct timer_list *nte;
491 unsigned long expires; 491 unsigned long expires;
492 unsigned long hr_expires = MAX_JIFFY_OFFSET;
493 ktime_t hr_delta;
492 tvec_t *varray[4]; 494 tvec_t *varray[4];
493 int i, j; 495 int i, j;
494 496
497 hr_delta = hrtimer_get_next_event();
498 if (hr_delta.tv64 != KTIME_MAX) {
499 struct timespec tsdelta;
500 tsdelta = ktime_to_timespec(hr_delta);
501 hr_expires = timespec_to_jiffies(&tsdelta);
502 if (hr_expires < 3)
503 return hr_expires + jiffies;
504 }
505 hr_expires += jiffies;
506
495 base = &__get_cpu_var(tvec_bases); 507 base = &__get_cpu_var(tvec_bases);
496 spin_lock(&base->t_base.lock); 508 spin_lock(&base->t_base.lock);
497 expires = base->timer_jiffies + (LONG_MAX >> 1); 509 expires = base->timer_jiffies + (LONG_MAX >> 1);
498 list = 0; 510 list = NULL;
499 511
500 /* Look for timer events in tv1. */ 512 /* Look for timer events in tv1. */
501 j = base->timer_jiffies & TVR_MASK; 513 j = base->timer_jiffies & TVR_MASK;
@@ -542,6 +554,10 @@ found:
542 } 554 }
543 } 555 }
544 spin_unlock(&base->t_base.lock); 556 spin_unlock(&base->t_base.lock);
557
558 if (time_before(hr_expires, expires))
559 return hr_expires;
560
545 return expires; 561 return expires;
546} 562}
547#endif 563#endif
@@ -717,12 +733,16 @@ static void second_overflow(void)
717#endif 733#endif
718} 734}
719 735
720/* in the NTP reference this is called "hardclock()" */ 736/*
721static void update_wall_time_one_tick(void) 737 * Returns how many microseconds we need to add to xtime this tick
738 * in doing an adjustment requested with adjtime.
739 */
740static long adjtime_adjustment(void)
722{ 741{
723 long time_adjust_step, delta_nsec; 742 long time_adjust_step;
724 743
725 if ((time_adjust_step = time_adjust) != 0 ) { 744 time_adjust_step = time_adjust;
745 if (time_adjust_step) {
726 /* 746 /*
727 * We are doing an adjtime thing. Prepare time_adjust_step to 747 * We are doing an adjtime thing. Prepare time_adjust_step to
728 * be within bounds. Note that a positive time_adjust means we 748 * be within bounds. Note that a positive time_adjust means we
@@ -733,10 +753,19 @@ static void update_wall_time_one_tick(void)
733 */ 753 */
734 time_adjust_step = min(time_adjust_step, (long)tickadj); 754 time_adjust_step = min(time_adjust_step, (long)tickadj);
735 time_adjust_step = max(time_adjust_step, (long)-tickadj); 755 time_adjust_step = max(time_adjust_step, (long)-tickadj);
756 }
757 return time_adjust_step;
758}
736 759
760/* in the NTP reference this is called "hardclock()" */
761static void update_wall_time_one_tick(void)
762{
763 long time_adjust_step, delta_nsec;
764
765 time_adjust_step = adjtime_adjustment();
766 if (time_adjust_step)
737 /* Reduce by this step the amount of time left */ 767 /* Reduce by this step the amount of time left */
738 time_adjust -= time_adjust_step; 768 time_adjust -= time_adjust_step;
739 }
740 delta_nsec = tick_nsec + time_adjust_step * 1000; 769 delta_nsec = tick_nsec + time_adjust_step * 1000;
741 /* 770 /*
742 * Advance the phase, once it gets to one microsecond, then 771 * Advance the phase, once it gets to one microsecond, then
@@ -759,6 +788,22 @@ static void update_wall_time_one_tick(void)
759} 788}
760 789
761/* 790/*
791 * Return how long ticks are at the moment, that is, how much time
792 * update_wall_time_one_tick will add to xtime next time we call it
793 * (assuming no calls to do_adjtimex in the meantime).
794 * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
795 * bits to the right of the binary point.
796 * This function has no side-effects.
797 */
798u64 current_tick_length(void)
799{
800 long delta_nsec;
801
802 delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
803 return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
804}
805
806/*
762 * Using a loop looks inefficient, but "ticks" is 807 * Using a loop looks inefficient, but "ticks" is
763 * usually just one (we shouldn't be losing ticks, 808 * usually just one (we shouldn't be losing ticks,
764 * we're doing this this way mainly for interrupt 809 * we're doing this this way mainly for interrupt
@@ -896,6 +941,8 @@ static inline void update_times(void)
896void do_timer(struct pt_regs *regs) 941void do_timer(struct pt_regs *regs)
897{ 942{
898 jiffies_64++; 943 jiffies_64++;
944 /* prevent loading jiffies before storing new jiffies_64 value. */
945 barrier();
899 update_times(); 946 update_times();
900 softlockup_tick(regs); 947 softlockup_tick(regs);
901} 948}
@@ -1307,8 +1354,8 @@ void __init init_timers(void)
1307 1354
1308#ifdef CONFIG_TIME_INTERPOLATION 1355#ifdef CONFIG_TIME_INTERPOLATION
1309 1356
1310struct time_interpolator *time_interpolator; 1357struct time_interpolator *time_interpolator __read_mostly;
1311static struct time_interpolator *time_interpolator_list; 1358static struct time_interpolator *time_interpolator_list __read_mostly;
1312static DEFINE_SPINLOCK(time_interpolator_lock); 1359static DEFINE_SPINLOCK(time_interpolator_lock);
1313 1360
1314static inline u64 time_interpolator_get_cycles(unsigned int src) 1361static inline u64 time_interpolator_get_cycles(unsigned int src)
@@ -1322,10 +1369,10 @@ static inline u64 time_interpolator_get_cycles(unsigned int src)
1322 return x(); 1369 return x();
1323 1370
1324 case TIME_SOURCE_MMIO64 : 1371 case TIME_SOURCE_MMIO64 :
1325 return readq((void __iomem *) time_interpolator->addr); 1372 return readq_relaxed((void __iomem *)time_interpolator->addr);
1326 1373
1327 case TIME_SOURCE_MMIO32 : 1374 case TIME_SOURCE_MMIO32 :
1328 return readl((void __iomem *) time_interpolator->addr); 1375 return readl_relaxed((void __iomem *)time_interpolator->addr);
1329 1376
1330 default: return get_cycles(); 1377 default: return get_cycles();
1331 } 1378 }