diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 6 | ||||
-rw-r--r-- | kernel/compat.c | 1 | ||||
-rw-r--r-- | kernel/cpuset.c | 37 | ||||
-rw-r--r-- | kernel/exit.c | 3 | ||||
-rw-r--r-- | kernel/fork.c | 415 | ||||
-rw-r--r-- | kernel/hrtimer.c | 106 | ||||
-rw-r--r-- | kernel/intermodule.c | 3 | ||||
-rw-r--r-- | kernel/itimer.c | 11 | ||||
-rw-r--r-- | kernel/kprobes.c | 36 | ||||
-rw-r--r-- | kernel/module.c | 6 | ||||
-rw-r--r-- | kernel/panic.c | 1 | ||||
-rw-r--r-- | kernel/posix-timers.c | 53 | ||||
-rw-r--r-- | kernel/power/console.c | 16 | ||||
-rw-r--r-- | kernel/power/disk.c | 15 | ||||
-rw-r--r-- | kernel/power/main.c | 4 | ||||
-rw-r--r-- | kernel/power/power.h | 15 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 4 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 18 | ||||
-rw-r--r-- | kernel/ptrace.c | 28 | ||||
-rw-r--r-- | kernel/rcupdate.c | 76 | ||||
-rw-r--r-- | kernel/rcutorture.c | 10 | ||||
-rw-r--r-- | kernel/sched.c | 186 | ||||
-rw-r--r-- | kernel/signal.c | 11 | ||||
-rw-r--r-- | kernel/sys.c | 27 | ||||
-rw-r--r-- | kernel/sys_ni.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 47 | ||||
-rw-r--r-- | kernel/time.c | 15 | ||||
-rw-r--r-- | kernel/timer.c | 63 | ||||
-rw-r--r-- | kernel/user.c | 32 |
29 files changed, 817 insertions, 430 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 685c25175d..d7e7e637b9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -841,7 +841,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
841 | 841 | ||
842 | for (aux = context->aux; aux; aux = aux->next) { | 842 | for (aux = context->aux; aux; aux = aux->next) { |
843 | 843 | ||
844 | ab = audit_log_start(context, GFP_KERNEL, aux->type); | 844 | ab = audit_log_start(context, gfp_mask, aux->type); |
845 | if (!ab) | 845 | if (!ab) |
846 | continue; /* audit_panic has been called */ | 846 | continue; /* audit_panic has been called */ |
847 | 847 | ||
@@ -878,14 +878,14 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
878 | } | 878 | } |
879 | 879 | ||
880 | if (context->pwd && context->pwdmnt) { | 880 | if (context->pwd && context->pwdmnt) { |
881 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); | 881 | ab = audit_log_start(context, gfp_mask, AUDIT_CWD); |
882 | if (ab) { | 882 | if (ab) { |
883 | audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); | 883 | audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); |
884 | audit_log_end(ab); | 884 | audit_log_end(ab); |
885 | } | 885 | } |
886 | } | 886 | } |
887 | for (i = 0; i < context->name_count; i++) { | 887 | for (i = 0; i < context->name_count; i++) { |
888 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); | 888 | ab = audit_log_start(context, gfp_mask, AUDIT_PATH); |
889 | if (!ab) | 889 | if (!ab) |
890 | continue; /* audit_panic has been called */ | 890 | continue; /* audit_panic has been called */ |
891 | 891 | ||
diff --git a/kernel/compat.c b/kernel/compat.c index 1867290c37..8c9cd88b67 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/security.h> | 23 | #include <linux/security.h> |
24 | 24 | ||
25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
26 | #include <asm/bug.h> | ||
27 | 26 | ||
28 | int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) | 27 | int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) |
29 | { | 28 | { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index fe2f71f92a..12815d3f1a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -641,7 +641,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
641 | * task has been modifying its cpuset. | 641 | * task has been modifying its cpuset. |
642 | */ | 642 | */ |
643 | 643 | ||
644 | void cpuset_update_task_memory_state() | 644 | void cpuset_update_task_memory_state(void) |
645 | { | 645 | { |
646 | int my_cpusets_mem_gen; | 646 | int my_cpusets_mem_gen; |
647 | struct task_struct *tsk = current; | 647 | struct task_struct *tsk = current; |
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child) | |||
1977 | * We don't need to task_lock() this reference to tsk->cpuset, | 1977 | * We don't need to task_lock() this reference to tsk->cpuset, |
1978 | * because tsk is already marked PF_EXITING, so attach_task() won't | 1978 | * because tsk is already marked PF_EXITING, so attach_task() won't |
1979 | * mess with it, or task is a failed fork, never visible to attach_task. | 1979 | * mess with it, or task is a failed fork, never visible to attach_task. |
1980 | * | ||
1981 | * Hack: | ||
1982 | * | ||
1983 | * Set the exiting tasks cpuset to the root cpuset (top_cpuset). | ||
1984 | * | ||
1985 | * Don't leave a task unable to allocate memory, as that is an | ||
1986 | * accident waiting to happen should someone add a callout in | ||
1987 | * do_exit() after the cpuset_exit() call that might allocate. | ||
1988 | * If a task tries to allocate memory with an invalid cpuset, | ||
1989 | * it will oops in cpuset_update_task_memory_state(). | ||
1990 | * | ||
1991 | * We call cpuset_exit() while the task is still competent to | ||
1992 | * handle notify_on_release(), then leave the task attached to | ||
1993 | * the root cpuset (top_cpuset) for the remainder of its exit. | ||
1994 | * | ||
1995 | * To do this properly, we would increment the reference count on | ||
1996 | * top_cpuset, and near the very end of the kernel/exit.c do_exit() | ||
1997 | * code we would add a second cpuset function call, to drop that | ||
1998 | * reference. This would just create an unnecessary hot spot on | ||
1999 | * the top_cpuset reference count, to no avail. | ||
2000 | * | ||
2001 | * Normally, holding a reference to a cpuset without bumping its | ||
2002 | * count is unsafe. The cpuset could go away, or someone could | ||
2003 | * attach us to a different cpuset, decrementing the count on | ||
2004 | * the first cpuset that we never incremented. But in this case, | ||
2005 | * top_cpuset isn't going away, and either task has PF_EXITING set, | ||
2006 | * which wards off any attach_task() attempts, or task is a failed | ||
2007 | * fork, never visible to attach_task. | ||
2008 | * | ||
2009 | * Another way to do this would be to set the cpuset pointer | ||
2010 | * to NULL here, and check in cpuset_update_task_memory_state() | ||
2011 | * for a NULL pointer. This hack avoids that NULL check, for no | ||
2012 | * cost (other than this way too long comment ;). | ||
1980 | **/ | 2013 | **/ |
1981 | 2014 | ||
1982 | void cpuset_exit(struct task_struct *tsk) | 2015 | void cpuset_exit(struct task_struct *tsk) |
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk) | |||
1984 | struct cpuset *cs; | 2017 | struct cpuset *cs; |
1985 | 2018 | ||
1986 | cs = tsk->cpuset; | 2019 | cs = tsk->cpuset; |
1987 | tsk->cpuset = NULL; | 2020 | tsk->cpuset = &top_cpuset; /* Hack - see comment above */ |
1988 | 2021 | ||
1989 | if (notify_on_release(cs)) { | 2022 | if (notify_on_release(cs)) { |
1990 | char *pathbuf = NULL; | 2023 | char *pathbuf = NULL; |
diff --git a/kernel/exit.c b/kernel/exit.c index 93cee36713..531aadca55 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -360,6 +360,9 @@ void daemonize(const char *name, ...) | |||
360 | fs = init_task.fs; | 360 | fs = init_task.fs; |
361 | current->fs = fs; | 361 | current->fs = fs; |
362 | atomic_inc(&fs->count); | 362 | atomic_inc(&fs->count); |
363 | exit_namespace(current); | ||
364 | current->namespace = init_task.namespace; | ||
365 | get_namespace(current->namespace); | ||
363 | exit_files(current); | 366 | exit_files(current); |
364 | current->files = init_task.files; | 367 | current->files = init_task.files; |
365 | atomic_inc(¤t->files->count); | 368 | atomic_inc(¤t->files->count); |
diff --git a/kernel/fork.c b/kernel/fork.c index 4ae8cfc1c8..ccdfbb16c8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -108,8 +108,10 @@ void free_task(struct task_struct *tsk) | |||
108 | } | 108 | } |
109 | EXPORT_SYMBOL(free_task); | 109 | EXPORT_SYMBOL(free_task); |
110 | 110 | ||
111 | void __put_task_struct(struct task_struct *tsk) | 111 | void __put_task_struct_cb(struct rcu_head *rhp) |
112 | { | 112 | { |
113 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | ||
114 | |||
113 | WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); | 115 | WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); |
114 | WARN_ON(atomic_read(&tsk->usage)); | 116 | WARN_ON(atomic_read(&tsk->usage)); |
115 | WARN_ON(tsk == current); | 117 | WARN_ON(tsk == current); |
@@ -446,6 +448,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
446 | } | 448 | } |
447 | } | 449 | } |
448 | 450 | ||
451 | /* | ||
452 | * Allocate a new mm structure and copy contents from the | ||
453 | * mm structure of the passed in task structure. | ||
454 | */ | ||
455 | static struct mm_struct *dup_mm(struct task_struct *tsk) | ||
456 | { | ||
457 | struct mm_struct *mm, *oldmm = current->mm; | ||
458 | int err; | ||
459 | |||
460 | if (!oldmm) | ||
461 | return NULL; | ||
462 | |||
463 | mm = allocate_mm(); | ||
464 | if (!mm) | ||
465 | goto fail_nomem; | ||
466 | |||
467 | memcpy(mm, oldmm, sizeof(*mm)); | ||
468 | |||
469 | if (!mm_init(mm)) | ||
470 | goto fail_nomem; | ||
471 | |||
472 | if (init_new_context(tsk, mm)) | ||
473 | goto fail_nocontext; | ||
474 | |||
475 | err = dup_mmap(mm, oldmm); | ||
476 | if (err) | ||
477 | goto free_pt; | ||
478 | |||
479 | mm->hiwater_rss = get_mm_rss(mm); | ||
480 | mm->hiwater_vm = mm->total_vm; | ||
481 | |||
482 | return mm; | ||
483 | |||
484 | free_pt: | ||
485 | mmput(mm); | ||
486 | |||
487 | fail_nomem: | ||
488 | return NULL; | ||
489 | |||
490 | fail_nocontext: | ||
491 | /* | ||
492 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
493 | * because it calls destroy_context() | ||
494 | */ | ||
495 | mm_free_pgd(mm); | ||
496 | free_mm(mm); | ||
497 | return NULL; | ||
498 | } | ||
499 | |||
449 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | 500 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) |
450 | { | 501 | { |
451 | struct mm_struct * mm, *oldmm; | 502 | struct mm_struct * mm, *oldmm; |
@@ -473,43 +524,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
473 | } | 524 | } |
474 | 525 | ||
475 | retval = -ENOMEM; | 526 | retval = -ENOMEM; |
476 | mm = allocate_mm(); | 527 | mm = dup_mm(tsk); |
477 | if (!mm) | 528 | if (!mm) |
478 | goto fail_nomem; | 529 | goto fail_nomem; |
479 | 530 | ||
480 | /* Copy the current MM stuff.. */ | ||
481 | memcpy(mm, oldmm, sizeof(*mm)); | ||
482 | if (!mm_init(mm)) | ||
483 | goto fail_nomem; | ||
484 | |||
485 | if (init_new_context(tsk,mm)) | ||
486 | goto fail_nocontext; | ||
487 | |||
488 | retval = dup_mmap(mm, oldmm); | ||
489 | if (retval) | ||
490 | goto free_pt; | ||
491 | |||
492 | mm->hiwater_rss = get_mm_rss(mm); | ||
493 | mm->hiwater_vm = mm->total_vm; | ||
494 | |||
495 | good_mm: | 531 | good_mm: |
496 | tsk->mm = mm; | 532 | tsk->mm = mm; |
497 | tsk->active_mm = mm; | 533 | tsk->active_mm = mm; |
498 | return 0; | 534 | return 0; |
499 | 535 | ||
500 | free_pt: | ||
501 | mmput(mm); | ||
502 | fail_nomem: | 536 | fail_nomem: |
503 | return retval; | 537 | return retval; |
504 | |||
505 | fail_nocontext: | ||
506 | /* | ||
507 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
508 | * because it calls destroy_context() | ||
509 | */ | ||
510 | mm_free_pgd(mm); | ||
511 | free_mm(mm); | ||
512 | return retval; | ||
513 | } | 538 | } |
514 | 539 | ||
515 | static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) | 540 | static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) |
@@ -597,32 +622,17 @@ out: | |||
597 | return newf; | 622 | return newf; |
598 | } | 623 | } |
599 | 624 | ||
600 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 625 | /* |
626 | * Allocate a new files structure and copy contents from the | ||
627 | * passed in files structure. | ||
628 | */ | ||
629 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | ||
601 | { | 630 | { |
602 | struct files_struct *oldf, *newf; | 631 | struct files_struct *newf; |
603 | struct file **old_fds, **new_fds; | 632 | struct file **old_fds, **new_fds; |
604 | int open_files, size, i, error = 0, expand; | 633 | int open_files, size, i, expand; |
605 | struct fdtable *old_fdt, *new_fdt; | 634 | struct fdtable *old_fdt, *new_fdt; |
606 | 635 | ||
607 | /* | ||
608 | * A background process may not have any files ... | ||
609 | */ | ||
610 | oldf = current->files; | ||
611 | if (!oldf) | ||
612 | goto out; | ||
613 | |||
614 | if (clone_flags & CLONE_FILES) { | ||
615 | atomic_inc(&oldf->count); | ||
616 | goto out; | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Note: we may be using current for both targets (See exec.c) | ||
621 | * This works because we cache current->files (old) as oldf. Don't | ||
622 | * break this. | ||
623 | */ | ||
624 | tsk->files = NULL; | ||
625 | error = -ENOMEM; | ||
626 | newf = alloc_files(); | 636 | newf = alloc_files(); |
627 | if (!newf) | 637 | if (!newf) |
628 | goto out; | 638 | goto out; |
@@ -651,9 +661,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
651 | if (expand) { | 661 | if (expand) { |
652 | spin_unlock(&oldf->file_lock); | 662 | spin_unlock(&oldf->file_lock); |
653 | spin_lock(&newf->file_lock); | 663 | spin_lock(&newf->file_lock); |
654 | error = expand_files(newf, open_files-1); | 664 | *errorp = expand_files(newf, open_files-1); |
655 | spin_unlock(&newf->file_lock); | 665 | spin_unlock(&newf->file_lock); |
656 | if (error < 0) | 666 | if (*errorp < 0) |
657 | goto out_release; | 667 | goto out_release; |
658 | new_fdt = files_fdtable(newf); | 668 | new_fdt = files_fdtable(newf); |
659 | /* | 669 | /* |
@@ -702,10 +712,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
702 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | 712 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); |
703 | } | 713 | } |
704 | 714 | ||
705 | tsk->files = newf; | ||
706 | error = 0; | ||
707 | out: | 715 | out: |
708 | return error; | 716 | return newf; |
709 | 717 | ||
710 | out_release: | 718 | out_release: |
711 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); | 719 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); |
@@ -715,6 +723,40 @@ out_release: | |||
715 | goto out; | 723 | goto out; |
716 | } | 724 | } |
717 | 725 | ||
726 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | ||
727 | { | ||
728 | struct files_struct *oldf, *newf; | ||
729 | int error = 0; | ||
730 | |||
731 | /* | ||
732 | * A background process may not have any files ... | ||
733 | */ | ||
734 | oldf = current->files; | ||
735 | if (!oldf) | ||
736 | goto out; | ||
737 | |||
738 | if (clone_flags & CLONE_FILES) { | ||
739 | atomic_inc(&oldf->count); | ||
740 | goto out; | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Note: we may be using current for both targets (See exec.c) | ||
745 | * This works because we cache current->files (old) as oldf. Don't | ||
746 | * break this. | ||
747 | */ | ||
748 | tsk->files = NULL; | ||
749 | error = -ENOMEM; | ||
750 | newf = dup_fd(oldf, &error); | ||
751 | if (!newf) | ||
752 | goto out; | ||
753 | |||
754 | tsk->files = newf; | ||
755 | error = 0; | ||
756 | out: | ||
757 | return error; | ||
758 | } | ||
759 | |||
718 | /* | 760 | /* |
719 | * Helper to unshare the files of the current task. | 761 | * Helper to unshare the files of the current task. |
720 | * We don't want to expose copy_files internals to | 762 | * We don't want to expose copy_files internals to |
@@ -802,7 +844,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
802 | init_sigpending(&sig->shared_pending); | 844 | init_sigpending(&sig->shared_pending); |
803 | INIT_LIST_HEAD(&sig->posix_timers); | 845 | INIT_LIST_HEAD(&sig->posix_timers); |
804 | 846 | ||
805 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); | 847 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL); |
806 | sig->it_real_incr.tv64 = 0; | 848 | sig->it_real_incr.tv64 = 0; |
807 | sig->real_timer.function = it_real_fn; | 849 | sig->real_timer.function = it_real_fn; |
808 | sig->real_timer.data = tsk; | 850 | sig->real_timer.data = tsk; |
@@ -1020,6 +1062,12 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1020 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1062 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; |
1021 | 1063 | ||
1022 | /* | 1064 | /* |
1065 | * sigaltstack should be cleared when sharing the same VM | ||
1066 | */ | ||
1067 | if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) | ||
1068 | p->sas_ss_sp = p->sas_ss_size = 0; | ||
1069 | |||
1070 | /* | ||
1023 | * Syscall tracing should be turned off in the child regardless | 1071 | * Syscall tracing should be turned off in the child regardless |
1024 | * of CLONE_PTRACE. | 1072 | * of CLONE_PTRACE. |
1025 | */ | 1073 | */ |
@@ -1083,8 +1131,8 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1083 | p->real_parent = current; | 1131 | p->real_parent = current; |
1084 | p->parent = p->real_parent; | 1132 | p->parent = p->real_parent; |
1085 | 1133 | ||
1134 | spin_lock(¤t->sighand->siglock); | ||
1086 | if (clone_flags & CLONE_THREAD) { | 1135 | if (clone_flags & CLONE_THREAD) { |
1087 | spin_lock(¤t->sighand->siglock); | ||
1088 | /* | 1136 | /* |
1089 | * Important: if an exit-all has been started then | 1137 | * Important: if an exit-all has been started then |
1090 | * do not create this new thread - the whole thread | 1138 | * do not create this new thread - the whole thread |
@@ -1122,8 +1170,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1122 | */ | 1170 | */ |
1123 | p->it_prof_expires = jiffies_to_cputime(1); | 1171 | p->it_prof_expires = jiffies_to_cputime(1); |
1124 | } | 1172 | } |
1125 | |||
1126 | spin_unlock(¤t->sighand->siglock); | ||
1127 | } | 1173 | } |
1128 | 1174 | ||
1129 | /* | 1175 | /* |
@@ -1135,8 +1181,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1135 | if (unlikely(p->ptrace & PT_PTRACED)) | 1181 | if (unlikely(p->ptrace & PT_PTRACED)) |
1136 | __ptrace_link(p, current->parent); | 1182 | __ptrace_link(p, current->parent); |
1137 | 1183 | ||
1138 | attach_pid(p, PIDTYPE_PID, p->pid); | ||
1139 | attach_pid(p, PIDTYPE_TGID, p->tgid); | ||
1140 | if (thread_group_leader(p)) { | 1184 | if (thread_group_leader(p)) { |
1141 | p->signal->tty = current->signal->tty; | 1185 | p->signal->tty = current->signal->tty; |
1142 | p->signal->pgrp = process_group(current); | 1186 | p->signal->pgrp = process_group(current); |
@@ -1146,9 +1190,12 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1146 | if (p->pid) | 1190 | if (p->pid) |
1147 | __get_cpu_var(process_counts)++; | 1191 | __get_cpu_var(process_counts)++; |
1148 | } | 1192 | } |
1193 | attach_pid(p, PIDTYPE_TGID, p->tgid); | ||
1194 | attach_pid(p, PIDTYPE_PID, p->pid); | ||
1149 | 1195 | ||
1150 | nr_threads++; | 1196 | nr_threads++; |
1151 | total_forks++; | 1197 | total_forks++; |
1198 | spin_unlock(¤t->sighand->siglock); | ||
1152 | write_unlock_irq(&tasklist_lock); | 1199 | write_unlock_irq(&tasklist_lock); |
1153 | proc_fork_connector(p); | 1200 | proc_fork_connector(p); |
1154 | return p; | 1201 | return p; |
@@ -1323,3 +1370,249 @@ void __init proc_caches_init(void) | |||
1323 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1370 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1324 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1371 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
1325 | } | 1372 | } |
1373 | |||
1374 | |||
1375 | /* | ||
1376 | * Check constraints on flags passed to the unshare system call and | ||
1377 | * force unsharing of additional process context as appropriate. | ||
1378 | */ | ||
1379 | static inline void check_unshare_flags(unsigned long *flags_ptr) | ||
1380 | { | ||
1381 | /* | ||
1382 | * If unsharing a thread from a thread group, must also | ||
1383 | * unshare vm. | ||
1384 | */ | ||
1385 | if (*flags_ptr & CLONE_THREAD) | ||
1386 | *flags_ptr |= CLONE_VM; | ||
1387 | |||
1388 | /* | ||
1389 | * If unsharing vm, must also unshare signal handlers. | ||
1390 | */ | ||
1391 | if (*flags_ptr & CLONE_VM) | ||
1392 | *flags_ptr |= CLONE_SIGHAND; | ||
1393 | |||
1394 | /* | ||
1395 | * If unsharing signal handlers and the task was created | ||
1396 | * using CLONE_THREAD, then must unshare the thread | ||
1397 | */ | ||
1398 | if ((*flags_ptr & CLONE_SIGHAND) && | ||
1399 | (atomic_read(¤t->signal->count) > 1)) | ||
1400 | *flags_ptr |= CLONE_THREAD; | ||
1401 | |||
1402 | /* | ||
1403 | * If unsharing namespace, must also unshare filesystem information. | ||
1404 | */ | ||
1405 | if (*flags_ptr & CLONE_NEWNS) | ||
1406 | *flags_ptr |= CLONE_FS; | ||
1407 | } | ||
1408 | |||
1409 | /* | ||
1410 | * Unsharing of tasks created with CLONE_THREAD is not supported yet | ||
1411 | */ | ||
1412 | static int unshare_thread(unsigned long unshare_flags) | ||
1413 | { | ||
1414 | if (unshare_flags & CLONE_THREAD) | ||
1415 | return -EINVAL; | ||
1416 | |||
1417 | return 0; | ||
1418 | } | ||
1419 | |||
1420 | /* | ||
1421 | * Unshare the filesystem structure if it is being shared | ||
1422 | */ | ||
1423 | static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | ||
1424 | { | ||
1425 | struct fs_struct *fs = current->fs; | ||
1426 | |||
1427 | if ((unshare_flags & CLONE_FS) && | ||
1428 | (fs && atomic_read(&fs->count) > 1)) { | ||
1429 | *new_fsp = __copy_fs_struct(current->fs); | ||
1430 | if (!*new_fsp) | ||
1431 | return -ENOMEM; | ||
1432 | } | ||
1433 | |||
1434 | return 0; | ||
1435 | } | ||
1436 | |||
1437 | /* | ||
1438 | * Unshare the namespace structure if it is being shared | ||
1439 | */ | ||
1440 | static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) | ||
1441 | { | ||
1442 | struct namespace *ns = current->namespace; | ||
1443 | |||
1444 | if ((unshare_flags & CLONE_NEWNS) && | ||
1445 | (ns && atomic_read(&ns->count) > 1)) { | ||
1446 | if (!capable(CAP_SYS_ADMIN)) | ||
1447 | return -EPERM; | ||
1448 | |||
1449 | *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs); | ||
1450 | if (!*new_nsp) | ||
1451 | return -ENOMEM; | ||
1452 | } | ||
1453 | |||
1454 | return 0; | ||
1455 | } | ||
1456 | |||
1457 | /* | ||
1458 | * Unsharing of sighand for tasks created with CLONE_SIGHAND is not | ||
1459 | * supported yet | ||
1460 | */ | ||
1461 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | ||
1462 | { | ||
1463 | struct sighand_struct *sigh = current->sighand; | ||
1464 | |||
1465 | if ((unshare_flags & CLONE_SIGHAND) && | ||
1466 | (sigh && atomic_read(&sigh->count) > 1)) | ||
1467 | return -EINVAL; | ||
1468 | else | ||
1469 | return 0; | ||
1470 | } | ||
1471 | |||
1472 | /* | ||
1473 | * Unshare vm if it is being shared | ||
1474 | */ | ||
1475 | static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) | ||
1476 | { | ||
1477 | struct mm_struct *mm = current->mm; | ||
1478 | |||
1479 | if ((unshare_flags & CLONE_VM) && | ||
1480 | (mm && atomic_read(&mm->mm_users) > 1)) { | ||
1481 | *new_mmp = dup_mm(current); | ||
1482 | if (!*new_mmp) | ||
1483 | return -ENOMEM; | ||
1484 | } | ||
1485 | |||
1486 | return 0; | ||
1487 | } | ||
1488 | |||
1489 | /* | ||
1490 | * Unshare file descriptor table if it is being shared | ||
1491 | */ | ||
1492 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) | ||
1493 | { | ||
1494 | struct files_struct *fd = current->files; | ||
1495 | int error = 0; | ||
1496 | |||
1497 | if ((unshare_flags & CLONE_FILES) && | ||
1498 | (fd && atomic_read(&fd->count) > 1)) { | ||
1499 | *new_fdp = dup_fd(fd, &error); | ||
1500 | if (!*new_fdp) | ||
1501 | return error; | ||
1502 | } | ||
1503 | |||
1504 | return 0; | ||
1505 | } | ||
1506 | |||
1507 | /* | ||
1508 | * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not | ||
1509 | * supported yet | ||
1510 | */ | ||
1511 | static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) | ||
1512 | { | ||
1513 | if (unshare_flags & CLONE_SYSVSEM) | ||
1514 | return -EINVAL; | ||
1515 | |||
1516 | return 0; | ||
1517 | } | ||
1518 | |||
1519 | /* | ||
1520 | * unshare allows a process to 'unshare' part of the process | ||
1521 | * context which was originally shared using clone. copy_* | ||
1522 | * functions used by do_fork() cannot be used here directly | ||
1523 | * because they modify an inactive task_struct that is being | ||
1524 | * constructed. Here we are modifying the current, active, | ||
1525 | * task_struct. | ||
1526 | */ | ||
1527 | asmlinkage long sys_unshare(unsigned long unshare_flags) | ||
1528 | { | ||
1529 | int err = 0; | ||
1530 | struct fs_struct *fs, *new_fs = NULL; | ||
1531 | struct namespace *ns, *new_ns = NULL; | ||
1532 | struct sighand_struct *sigh, *new_sigh = NULL; | ||
1533 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | ||
1534 | struct files_struct *fd, *new_fd = NULL; | ||
1535 | struct sem_undo_list *new_ulist = NULL; | ||
1536 | |||
1537 | check_unshare_flags(&unshare_flags); | ||
1538 | |||
1539 | if ((err = unshare_thread(unshare_flags))) | ||
1540 | goto bad_unshare_out; | ||
1541 | if ((err = unshare_fs(unshare_flags, &new_fs))) | ||
1542 | goto bad_unshare_cleanup_thread; | ||
1543 | if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs))) | ||
1544 | goto bad_unshare_cleanup_fs; | ||
1545 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | ||
1546 | goto bad_unshare_cleanup_ns; | ||
1547 | if ((err = unshare_vm(unshare_flags, &new_mm))) | ||
1548 | goto bad_unshare_cleanup_sigh; | ||
1549 | if ((err = unshare_fd(unshare_flags, &new_fd))) | ||
1550 | goto bad_unshare_cleanup_vm; | ||
1551 | if ((err = unshare_semundo(unshare_flags, &new_ulist))) | ||
1552 | goto bad_unshare_cleanup_fd; | ||
1553 | |||
1554 | if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) { | ||
1555 | |||
1556 | task_lock(current); | ||
1557 | |||
1558 | if (new_fs) { | ||
1559 | fs = current->fs; | ||
1560 | current->fs = new_fs; | ||
1561 | new_fs = fs; | ||
1562 | } | ||
1563 | |||
1564 | if (new_ns) { | ||
1565 | ns = current->namespace; | ||
1566 | current->namespace = new_ns; | ||
1567 | new_ns = ns; | ||
1568 | } | ||
1569 | |||
1570 | if (new_sigh) { | ||
1571 | sigh = current->sighand; | ||
1572 | current->sighand = new_sigh; | ||
1573 | new_sigh = sigh; | ||
1574 | } | ||
1575 | |||
1576 | if (new_mm) { | ||
1577 | mm = current->mm; | ||
1578 | active_mm = current->active_mm; | ||
1579 | current->mm = new_mm; | ||
1580 | current->active_mm = new_mm; | ||
1581 | activate_mm(active_mm, new_mm); | ||
1582 | new_mm = mm; | ||
1583 | } | ||
1584 | |||
1585 | if (new_fd) { | ||
1586 | fd = current->files; | ||
1587 | current->files = new_fd; | ||
1588 | new_fd = fd; | ||
1589 | } | ||
1590 | |||
1591 | task_unlock(current); | ||
1592 | } | ||
1593 | |||
1594 | bad_unshare_cleanup_fd: | ||
1595 | if (new_fd) | ||
1596 | put_files_struct(new_fd); | ||
1597 | |||
1598 | bad_unshare_cleanup_vm: | ||
1599 | if (new_mm) | ||
1600 | mmput(new_mm); | ||
1601 | |||
1602 | bad_unshare_cleanup_sigh: | ||
1603 | if (new_sigh) | ||
1604 | if (atomic_dec_and_test(&new_sigh->count)) | ||
1605 | kmem_cache_free(sighand_cachep, new_sigh); | ||
1606 | |||
1607 | bad_unshare_cleanup_ns: | ||
1608 | if (new_ns) | ||
1609 | put_namespace(new_ns); | ||
1610 | |||
1611 | bad_unshare_cleanup_fs: | ||
1612 | if (new_fs) | ||
1613 | put_fs_struct(new_fs); | ||
1614 | |||
1615 | bad_unshare_cleanup_thread: | ||
1616 | bad_unshare_out: | ||
1617 | return err; | ||
1618 | } | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f1c4155b49..14bc9cfa63 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -21,6 +21,12 @@ | |||
21 | * Credits: | 21 | * Credits: |
22 | * based on kernel/timer.c | 22 | * based on kernel/timer.c |
23 | * | 23 | * |
24 | * Help, testing, suggestions, bugfixes, improvements were | ||
25 | * provided by: | ||
26 | * | ||
27 | * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel | ||
28 | * et. al. | ||
29 | * | ||
24 | * For licencing details see kernel-base/COPYING | 30 | * For licencing details see kernel-base/COPYING |
25 | */ | 31 | */ |
26 | 32 | ||
@@ -66,6 +72,12 @@ EXPORT_SYMBOL_GPL(ktime_get_real); | |||
66 | 72 | ||
67 | /* | 73 | /* |
68 | * The timer bases: | 74 | * The timer bases: |
75 | * | ||
76 | * Note: If we want to add new timer bases, we have to skip the two | ||
77 | * clock ids captured by the cpu-timers. We do this by holding empty | ||
78 | * entries rather than doing math adjustment of the clock ids. | ||
79 | * This ensures that we capture erroneous accesses to these clock ids | ||
80 | * rather than moving them into the range of valid clock id's. | ||
69 | */ | 81 | */ |
70 | 82 | ||
71 | #define MAX_HRTIMER_BASES 2 | 83 | #define MAX_HRTIMER_BASES 2 |
@@ -406,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
406 | /* Switch the timer base, if necessary: */ | 418 | /* Switch the timer base, if necessary: */ |
407 | new_base = switch_hrtimer_base(timer, base); | 419 | new_base = switch_hrtimer_base(timer, base); |
408 | 420 | ||
409 | if (mode == HRTIMER_REL) | 421 | if (mode == HRTIMER_REL) { |
410 | tim = ktime_add(tim, new_base->get_time()); | 422 | tim = ktime_add(tim, new_base->get_time()); |
423 | /* | ||
424 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | ||
425 | * to signal that they simply return xtime in | ||
426 | * do_gettimeoffset(). In this case we want to round up by | ||
427 | * resolution when starting a relative timer, to avoid short | ||
428 | * timeouts. This will go away with the GTOD framework. | ||
429 | */ | ||
430 | #ifdef CONFIG_TIME_LOW_RES | ||
431 | tim = ktime_add(tim, base->resolution); | ||
432 | #endif | ||
433 | } | ||
411 | timer->expires = tim; | 434 | timer->expires = tim; |
412 | 435 | ||
413 | enqueue_hrtimer(timer, new_base); | 436 | enqueue_hrtimer(timer, new_base); |
@@ -482,30 +505,61 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | |||
482 | return rem; | 505 | return rem; |
483 | } | 506 | } |
484 | 507 | ||
508 | #ifdef CONFIG_NO_IDLE_HZ | ||
485 | /** | 509 | /** |
486 | * hrtimer_rebase - rebase an initialized hrtimer to a different base | 510 | * hrtimer_get_next_event - get the time until next expiry event |
487 | * | 511 | * |
488 | * @timer: the timer to be rebased | 512 | * Returns the delta to the next expiry event or KTIME_MAX if no timer |
489 | * @clock_id: the clock to be used | 513 | * is pending. |
490 | */ | 514 | */ |
491 | void hrtimer_rebase(struct hrtimer *timer, const clockid_t clock_id) | 515 | ktime_t hrtimer_get_next_event(void) |
492 | { | 516 | { |
493 | struct hrtimer_base *bases; | 517 | struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); |
518 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; | ||
519 | unsigned long flags; | ||
520 | int i; | ||
494 | 521 | ||
495 | bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); | 522 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { |
496 | timer->base = &bases[clock_id]; | 523 | struct hrtimer *timer; |
524 | |||
525 | spin_lock_irqsave(&base->lock, flags); | ||
526 | if (!base->first) { | ||
527 | spin_unlock_irqrestore(&base->lock, flags); | ||
528 | continue; | ||
529 | } | ||
530 | timer = rb_entry(base->first, struct hrtimer, node); | ||
531 | delta.tv64 = timer->expires.tv64; | ||
532 | spin_unlock_irqrestore(&base->lock, flags); | ||
533 | delta = ktime_sub(delta, base->get_time()); | ||
534 | if (delta.tv64 < mindelta.tv64) | ||
535 | mindelta.tv64 = delta.tv64; | ||
536 | } | ||
537 | if (mindelta.tv64 < 0) | ||
538 | mindelta.tv64 = 0; | ||
539 | return mindelta; | ||
497 | } | 540 | } |
541 | #endif | ||
498 | 542 | ||
499 | /** | 543 | /** |
500 | * hrtimer_init - initialize a timer to the given clock | 544 | * hrtimer_init - initialize a timer to the given clock |
501 | * | 545 | * |
502 | * @timer: the timer to be initialized | 546 | * @timer: the timer to be initialized |
503 | * @clock_id: the clock to be used | 547 | * @clock_id: the clock to be used |
548 | * @mode: timer mode abs/rel | ||
504 | */ | 549 | */ |
505 | void hrtimer_init(struct hrtimer *timer, const clockid_t clock_id) | 550 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, |
551 | enum hrtimer_mode mode) | ||
506 | { | 552 | { |
553 | struct hrtimer_base *bases; | ||
554 | |||
507 | memset(timer, 0, sizeof(struct hrtimer)); | 555 | memset(timer, 0, sizeof(struct hrtimer)); |
508 | hrtimer_rebase(timer, clock_id); | 556 | |
557 | bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); | ||
558 | |||
559 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS) | ||
560 | clock_id = CLOCK_MONOTONIC; | ||
561 | |||
562 | timer->base = &bases[clock_id]; | ||
509 | } | 563 | } |
510 | 564 | ||
511 | /** | 565 | /** |
@@ -550,6 +604,7 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base) | |||
550 | fn = timer->function; | 604 | fn = timer->function; |
551 | data = timer->data; | 605 | data = timer->data; |
552 | set_curr_timer(base, timer); | 606 | set_curr_timer(base, timer); |
607 | timer->state = HRTIMER_RUNNING; | ||
553 | __remove_hrtimer(timer, base); | 608 | __remove_hrtimer(timer, base); |
554 | spin_unlock_irq(&base->lock); | 609 | spin_unlock_irq(&base->lock); |
555 | 610 | ||
@@ -565,6 +620,10 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base) | |||
565 | 620 | ||
566 | spin_lock_irq(&base->lock); | 621 | spin_lock_irq(&base->lock); |
567 | 622 | ||
623 | /* Another CPU has added back the timer */ | ||
624 | if (timer->state != HRTIMER_RUNNING) | ||
625 | continue; | ||
626 | |||
568 | if (restart == HRTIMER_RESTART) | 627 | if (restart == HRTIMER_RESTART) |
569 | enqueue_hrtimer(timer, base); | 628 | enqueue_hrtimer(timer, base); |
570 | else | 629 | else |
@@ -638,8 +697,7 @@ schedule_hrtimer_interruptible(struct hrtimer *timer, | |||
638 | return schedule_hrtimer(timer, mode); | 697 | return schedule_hrtimer(timer, mode); |
639 | } | 698 | } |
640 | 699 | ||
641 | static long __sched | 700 | static long __sched nanosleep_restart(struct restart_block *restart) |
642 | nanosleep_restart(struct restart_block *restart, clockid_t clockid) | ||
643 | { | 701 | { |
644 | struct timespec __user *rmtp; | 702 | struct timespec __user *rmtp; |
645 | struct timespec tu; | 703 | struct timespec tu; |
@@ -649,7 +707,7 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid) | |||
649 | 707 | ||
650 | restart->fn = do_no_restart_syscall; | 708 | restart->fn = do_no_restart_syscall; |
651 | 709 | ||
652 | hrtimer_init(&timer, clockid); | 710 | hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS); |
653 | 711 | ||
654 | timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; | 712 | timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; |
655 | 713 | ||
@@ -669,16 +727,6 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid) | |||
669 | return -ERESTART_RESTARTBLOCK; | 727 | return -ERESTART_RESTARTBLOCK; |
670 | } | 728 | } |
671 | 729 | ||
672 | static long __sched nanosleep_restart_mono(struct restart_block *restart) | ||
673 | { | ||
674 | return nanosleep_restart(restart, CLOCK_MONOTONIC); | ||
675 | } | ||
676 | |||
677 | static long __sched nanosleep_restart_real(struct restart_block *restart) | ||
678 | { | ||
679 | return nanosleep_restart(restart, CLOCK_REALTIME); | ||
680 | } | ||
681 | |||
682 | long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | 730 | long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, |
683 | const enum hrtimer_mode mode, const clockid_t clockid) | 731 | const enum hrtimer_mode mode, const clockid_t clockid) |
684 | { | 732 | { |
@@ -687,7 +735,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
687 | struct timespec tu; | 735 | struct timespec tu; |
688 | ktime_t rem; | 736 | ktime_t rem; |
689 | 737 | ||
690 | hrtimer_init(&timer, clockid); | 738 | hrtimer_init(&timer, clockid, mode); |
691 | 739 | ||
692 | timer.expires = timespec_to_ktime(*rqtp); | 740 | timer.expires = timespec_to_ktime(*rqtp); |
693 | 741 | ||
@@ -695,7 +743,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
695 | if (rem.tv64 <= 0) | 743 | if (rem.tv64 <= 0) |
696 | return 0; | 744 | return 0; |
697 | 745 | ||
698 | /* Absolute timers do not update the rmtp value: */ | 746 | /* Absolute timers do not update the rmtp value and restart: */ |
699 | if (mode == HRTIMER_ABS) | 747 | if (mode == HRTIMER_ABS) |
700 | return -ERESTARTNOHAND; | 748 | return -ERESTARTNOHAND; |
701 | 749 | ||
@@ -705,11 +753,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
705 | return -EFAULT; | 753 | return -EFAULT; |
706 | 754 | ||
707 | restart = ¤t_thread_info()->restart_block; | 755 | restart = ¤t_thread_info()->restart_block; |
708 | restart->fn = (clockid == CLOCK_MONOTONIC) ? | 756 | restart->fn = nanosleep_restart; |
709 | nanosleep_restart_mono : nanosleep_restart_real; | ||
710 | restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; | 757 | restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; |
711 | restart->arg1 = timer.expires.tv64 >> 32; | 758 | restart->arg1 = timer.expires.tv64 >> 32; |
712 | restart->arg2 = (unsigned long) rmtp; | 759 | restart->arg2 = (unsigned long) rmtp; |
760 | restart->arg3 = (unsigned long) timer.base->index; | ||
713 | 761 | ||
714 | return -ERESTART_RESTARTBLOCK; | 762 | return -ERESTART_RESTARTBLOCK; |
715 | } | 763 | } |
@@ -736,10 +784,8 @@ static void __devinit init_hrtimers_cpu(int cpu) | |||
736 | struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); | 784 | struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); |
737 | int i; | 785 | int i; |
738 | 786 | ||
739 | for (i = 0; i < MAX_HRTIMER_BASES; i++) { | 787 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) |
740 | spin_lock_init(&base->lock); | 788 | spin_lock_init(&base->lock); |
741 | base++; | ||
742 | } | ||
743 | } | 789 | } |
744 | 790 | ||
745 | #ifdef CONFIG_HOTPLUG_CPU | 791 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/kernel/intermodule.c b/kernel/intermodule.c index 0cbe633420..55b1e5b85d 100644 --- a/kernel/intermodule.c +++ b/kernel/intermodule.c | |||
@@ -179,3 +179,6 @@ EXPORT_SYMBOL(inter_module_register); | |||
179 | EXPORT_SYMBOL(inter_module_unregister); | 179 | EXPORT_SYMBOL(inter_module_unregister); |
180 | EXPORT_SYMBOL(inter_module_get_request); | 180 | EXPORT_SYMBOL(inter_module_get_request); |
181 | EXPORT_SYMBOL(inter_module_put); | 181 | EXPORT_SYMBOL(inter_module_put); |
182 | |||
183 | MODULE_LICENSE("GPL"); | ||
184 | |||
diff --git a/kernel/itimer.c b/kernel/itimer.c index c2c05c4ff2..379be2f8c8 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
@@ -49,9 +49,11 @@ int do_getitimer(int which, struct itimerval *value) | |||
49 | 49 | ||
50 | switch (which) { | 50 | switch (which) { |
51 | case ITIMER_REAL: | 51 | case ITIMER_REAL: |
52 | spin_lock_irq(&tsk->sighand->siglock); | ||
52 | value->it_value = itimer_get_remtime(&tsk->signal->real_timer); | 53 | value->it_value = itimer_get_remtime(&tsk->signal->real_timer); |
53 | value->it_interval = | 54 | value->it_interval = |
54 | ktime_to_timeval(tsk->signal->it_real_incr); | 55 | ktime_to_timeval(tsk->signal->it_real_incr); |
56 | spin_unlock_irq(&tsk->sighand->siglock); | ||
55 | break; | 57 | break; |
56 | case ITIMER_VIRTUAL: | 58 | case ITIMER_VIRTUAL: |
57 | read_lock(&tasklist_lock); | 59 | read_lock(&tasklist_lock); |
@@ -150,18 +152,25 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) | |||
150 | 152 | ||
151 | switch (which) { | 153 | switch (which) { |
152 | case ITIMER_REAL: | 154 | case ITIMER_REAL: |
155 | again: | ||
156 | spin_lock_irq(&tsk->sighand->siglock); | ||
153 | timer = &tsk->signal->real_timer; | 157 | timer = &tsk->signal->real_timer; |
154 | hrtimer_cancel(timer); | ||
155 | if (ovalue) { | 158 | if (ovalue) { |
156 | ovalue->it_value = itimer_get_remtime(timer); | 159 | ovalue->it_value = itimer_get_remtime(timer); |
157 | ovalue->it_interval | 160 | ovalue->it_interval |
158 | = ktime_to_timeval(tsk->signal->it_real_incr); | 161 | = ktime_to_timeval(tsk->signal->it_real_incr); |
159 | } | 162 | } |
163 | /* We are sharing ->siglock with it_real_fn() */ | ||
164 | if (hrtimer_try_to_cancel(timer) < 0) { | ||
165 | spin_unlock_irq(&tsk->sighand->siglock); | ||
166 | goto again; | ||
167 | } | ||
160 | tsk->signal->it_real_incr = | 168 | tsk->signal->it_real_incr = |
161 | timeval_to_ktime(value->it_interval); | 169 | timeval_to_ktime(value->it_interval); |
162 | expires = timeval_to_ktime(value->it_value); | 170 | expires = timeval_to_ktime(value->it_value); |
163 | if (expires.tv64 != 0) | 171 | if (expires.tv64 != 0) |
164 | hrtimer_start(timer, expires, HRTIMER_REL); | 172 | hrtimer_start(timer, expires, HRTIMER_REL); |
173 | spin_unlock_irq(&tsk->sighand->siglock); | ||
165 | break; | 174 | break; |
166 | case ITIMER_VIRTUAL: | 175 | case ITIMER_VIRTUAL: |
167 | nval = timeval_to_cputime(&value->it_value); | 176 | nval = timeval_to_cputime(&value->it_value); |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3ea6325228..fef1af8a73 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -344,23 +344,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) | |||
344 | spin_unlock_irqrestore(&kretprobe_lock, flags); | 344 | spin_unlock_irqrestore(&kretprobe_lock, flags); |
345 | } | 345 | } |
346 | 346 | ||
347 | /* | ||
348 | * This kprobe pre_handler is registered with every kretprobe. When probe | ||
349 | * hits it will set up the return probe. | ||
350 | */ | ||
351 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, | ||
352 | struct pt_regs *regs) | ||
353 | { | ||
354 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); | ||
355 | unsigned long flags = 0; | ||
356 | |||
357 | /*TODO: consider to only swap the RA after the last pre_handler fired */ | ||
358 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
359 | arch_prepare_kretprobe(rp, regs); | ||
360 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | static inline void free_rp_inst(struct kretprobe *rp) | 347 | static inline void free_rp_inst(struct kretprobe *rp) |
365 | { | 348 | { |
366 | struct kretprobe_instance *ri; | 349 | struct kretprobe_instance *ri; |
@@ -578,6 +561,23 @@ void __kprobes unregister_jprobe(struct jprobe *jp) | |||
578 | 561 | ||
579 | #ifdef ARCH_SUPPORTS_KRETPROBES | 562 | #ifdef ARCH_SUPPORTS_KRETPROBES |
580 | 563 | ||
564 | /* | ||
565 | * This kprobe pre_handler is registered with every kretprobe. When probe | ||
566 | * hits it will set up the return probe. | ||
567 | */ | ||
568 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, | ||
569 | struct pt_regs *regs) | ||
570 | { | ||
571 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); | ||
572 | unsigned long flags = 0; | ||
573 | |||
574 | /*TODO: consider to only swap the RA after the last pre_handler fired */ | ||
575 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
576 | arch_prepare_kretprobe(rp, regs); | ||
577 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | int __kprobes register_kretprobe(struct kretprobe *rp) | 581 | int __kprobes register_kretprobe(struct kretprobe *rp) |
582 | { | 582 | { |
583 | int ret = 0; | 583 | int ret = 0; |
@@ -631,12 +631,12 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp) | |||
631 | unregister_kprobe(&rp->kp); | 631 | unregister_kprobe(&rp->kp); |
632 | /* No race here */ | 632 | /* No race here */ |
633 | spin_lock_irqsave(&kretprobe_lock, flags); | 633 | spin_lock_irqsave(&kretprobe_lock, flags); |
634 | free_rp_inst(rp); | ||
635 | while ((ri = get_used_rp_inst(rp)) != NULL) { | 634 | while ((ri = get_used_rp_inst(rp)) != NULL) { |
636 | ri->rp = NULL; | 635 | ri->rp = NULL; |
637 | hlist_del(&ri->uflist); | 636 | hlist_del(&ri->uflist); |
638 | } | 637 | } |
639 | spin_unlock_irqrestore(&kretprobe_lock, flags); | 638 | spin_unlock_irqrestore(&kretprobe_lock, flags); |
639 | free_rp_inst(rp); | ||
640 | } | 640 | } |
641 | 641 | ||
642 | static int __init init_kprobes(void) | 642 | static int __init init_kprobes(void) |
diff --git a/kernel/module.c b/kernel/module.c index 618ed6e23e..5aad477ddc 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1670,6 +1670,9 @@ static struct module *load_module(void __user *umod, | |||
1670 | goto free_mod; | 1670 | goto free_mod; |
1671 | } | 1671 | } |
1672 | 1672 | ||
1673 | /* Userspace could have altered the string after the strlen_user() */ | ||
1674 | args[arglen - 1] = '\0'; | ||
1675 | |||
1673 | if (find_module(mod->name)) { | 1676 | if (find_module(mod->name)) { |
1674 | err = -EEXIST; | 1677 | err = -EEXIST; |
1675 | goto free_mod; | 1678 | goto free_mod; |
@@ -2092,7 +2095,8 @@ static unsigned long mod_find_symname(struct module *mod, const char *name) | |||
2092 | unsigned int i; | 2095 | unsigned int i; |
2093 | 2096 | ||
2094 | for (i = 0; i < mod->num_symtab; i++) | 2097 | for (i = 0; i < mod->num_symtab; i++) |
2095 | if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0) | 2098 | if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 && |
2099 | mod->symtab[i].st_info != 'U') | ||
2096 | return mod->symtab[i].st_value; | 2100 | return mod->symtab[i].st_value; |
2097 | return 0; | 2101 | return 0; |
2098 | } | 2102 | } |
diff --git a/kernel/panic.c b/kernel/panic.c index c5c4ab2558..126dc43f1c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
130 | #endif | 130 | #endif |
131 | local_irq_enable(); | 131 | local_irq_enable(); |
132 | for (i = 0;;) { | 132 | for (i = 0;;) { |
133 | touch_softlockup_watchdog(); | ||
133 | i += panic_blink(i); | 134 | i += panic_blink(i); |
134 | mdelay(1); | 135 | mdelay(1); |
135 | i++; | 136 | i++; |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 197208b3aa..216f574b5f 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -194,9 +194,7 @@ static inline int common_clock_set(const clockid_t which_clock, | |||
194 | 194 | ||
195 | static int common_timer_create(struct k_itimer *new_timer) | 195 | static int common_timer_create(struct k_itimer *new_timer) |
196 | { | 196 | { |
197 | hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); | 197 | hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); |
198 | new_timer->it.real.timer.data = new_timer; | ||
199 | new_timer->it.real.timer.function = posix_timer_fn; | ||
200 | return 0; | 198 | return 0; |
201 | } | 199 | } |
202 | 200 | ||
@@ -290,7 +288,8 @@ void do_schedule_next_timer(struct siginfo *info) | |||
290 | info->si_overrun = timr->it_overrun_last; | 288 | info->si_overrun = timr->it_overrun_last; |
291 | } | 289 | } |
292 | 290 | ||
293 | unlock_timer(timr, flags); | 291 | if (timr) |
292 | unlock_timer(timr, flags); | ||
294 | } | 293 | } |
295 | 294 | ||
296 | int posix_timer_event(struct k_itimer *timr,int si_private) | 295 | int posix_timer_event(struct k_itimer *timr,int si_private) |
@@ -692,6 +691,7 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
692 | struct itimerspec *new_setting, struct itimerspec *old_setting) | 691 | struct itimerspec *new_setting, struct itimerspec *old_setting) |
693 | { | 692 | { |
694 | struct hrtimer *timer = &timr->it.real.timer; | 693 | struct hrtimer *timer = &timr->it.real.timer; |
694 | enum hrtimer_mode mode; | ||
695 | 695 | ||
696 | if (old_setting) | 696 | if (old_setting) |
697 | common_timer_get(timr, old_setting); | 697 | common_timer_get(timr, old_setting); |
@@ -713,14 +713,10 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
713 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) | 713 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) |
714 | return 0; | 714 | return 0; |
715 | 715 | ||
716 | /* Posix madness. Only absolute CLOCK_REALTIME timers | 716 | mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; |
717 | * are affected by clock sets. So we must reiniatilize | 717 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); |
718 | * the timer. | 718 | timr->it.real.timer.data = timr; |
719 | */ | 719 | timr->it.real.timer.function = posix_timer_fn; |
720 | if (timr->it_clock == CLOCK_REALTIME && (flags & TIMER_ABSTIME)) | ||
721 | hrtimer_rebase(timer, CLOCK_REALTIME); | ||
722 | else | ||
723 | hrtimer_rebase(timer, CLOCK_MONOTONIC); | ||
724 | 720 | ||
725 | timer->expires = timespec_to_ktime(new_setting->it_value); | 721 | timer->expires = timespec_to_ktime(new_setting->it_value); |
726 | 722 | ||
@@ -728,11 +724,15 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
728 | timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); | 724 | timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); |
729 | 725 | ||
730 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ | 726 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ |
731 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) | 727 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { |
728 | /* Setup correct expiry time for relative timers */ | ||
729 | if (mode == HRTIMER_REL) | ||
730 | timer->expires = ktime_add(timer->expires, | ||
731 | timer->base->get_time()); | ||
732 | return 0; | 732 | return 0; |
733 | } | ||
733 | 734 | ||
734 | hrtimer_start(timer, timer->expires, (flags & TIMER_ABSTIME) ? | 735 | hrtimer_start(timer, timer->expires, mode); |
735 | HRTIMER_ABS : HRTIMER_REL); | ||
736 | return 0; | 736 | return 0; |
737 | } | 737 | } |
738 | 738 | ||
@@ -875,12 +875,6 @@ int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp) | |||
875 | } | 875 | } |
876 | EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); | 876 | EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); |
877 | 877 | ||
878 | int do_posix_clock_notimer_create(struct k_itimer *timer) | ||
879 | { | ||
880 | return -EINVAL; | ||
881 | } | ||
882 | EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create); | ||
883 | |||
884 | int do_posix_clock_nonanosleep(const clockid_t clock, int flags, | 878 | int do_posix_clock_nonanosleep(const clockid_t clock, int flags, |
885 | struct timespec *t, struct timespec __user *r) | 879 | struct timespec *t, struct timespec __user *r) |
886 | { | 880 | { |
@@ -947,21 +941,8 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) | |||
947 | static int common_nsleep(const clockid_t which_clock, int flags, | 941 | static int common_nsleep(const clockid_t which_clock, int flags, |
948 | struct timespec *tsave, struct timespec __user *rmtp) | 942 | struct timespec *tsave, struct timespec __user *rmtp) |
949 | { | 943 | { |
950 | int mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; | 944 | return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? |
951 | int clockid = which_clock; | 945 | HRTIMER_ABS : HRTIMER_REL, which_clock); |
952 | |||
953 | switch (which_clock) { | ||
954 | case CLOCK_REALTIME: | ||
955 | /* Posix madness. Only absolute timers on clock realtime | ||
956 | are affected by clock set. */ | ||
957 | if (mode != HRTIMER_ABS) | ||
958 | clockid = CLOCK_MONOTONIC; | ||
959 | case CLOCK_MONOTONIC: | ||
960 | break; | ||
961 | default: | ||
962 | return -EINVAL; | ||
963 | } | ||
964 | return hrtimer_nanosleep(tsave, rmtp, mode, clockid); | ||
965 | } | 946 | } |
966 | 947 | ||
967 | asmlinkage long | 948 | asmlinkage long |
diff --git a/kernel/power/console.c b/kernel/power/console.c index 7ff375e7c9..623786d441 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c | |||
@@ -9,18 +9,13 @@ | |||
9 | #include <linux/console.h> | 9 | #include <linux/console.h> |
10 | #include "power.h" | 10 | #include "power.h" |
11 | 11 | ||
12 | static int new_loglevel = 10; | 12 | #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) |
13 | static int orig_loglevel; | 13 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) |
14 | #ifdef SUSPEND_CONSOLE | 14 | |
15 | static int orig_fgconsole, orig_kmsg; | 15 | static int orig_fgconsole, orig_kmsg; |
16 | #endif | ||
17 | 16 | ||
18 | int pm_prepare_console(void) | 17 | int pm_prepare_console(void) |
19 | { | 18 | { |
20 | orig_loglevel = console_loglevel; | ||
21 | console_loglevel = new_loglevel; | ||
22 | |||
23 | #ifdef SUSPEND_CONSOLE | ||
24 | acquire_console_sem(); | 19 | acquire_console_sem(); |
25 | 20 | ||
26 | orig_fgconsole = fg_console; | 21 | orig_fgconsole = fg_console; |
@@ -41,18 +36,15 @@ int pm_prepare_console(void) | |||
41 | } | 36 | } |
42 | orig_kmsg = kmsg_redirect; | 37 | orig_kmsg = kmsg_redirect; |
43 | kmsg_redirect = SUSPEND_CONSOLE; | 38 | kmsg_redirect = SUSPEND_CONSOLE; |
44 | #endif | ||
45 | return 0; | 39 | return 0; |
46 | } | 40 | } |
47 | 41 | ||
48 | void pm_restore_console(void) | 42 | void pm_restore_console(void) |
49 | { | 43 | { |
50 | console_loglevel = orig_loglevel; | ||
51 | #ifdef SUSPEND_CONSOLE | ||
52 | acquire_console_sem(); | 44 | acquire_console_sem(); |
53 | set_console(orig_fgconsole); | 45 | set_console(orig_fgconsole); |
54 | release_console_sem(); | 46 | release_console_sem(); |
55 | kmsg_redirect = orig_kmsg; | 47 | kmsg_redirect = orig_kmsg; |
56 | #endif | ||
57 | return; | 48 | return; |
58 | } | 49 | } |
50 | #endif | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e24446f8d8..0b43847dc9 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -53,7 +53,7 @@ static void power_down(suspend_disk_method_t mode) | |||
53 | 53 | ||
54 | switch(mode) { | 54 | switch(mode) { |
55 | case PM_DISK_PLATFORM: | 55 | case PM_DISK_PLATFORM: |
56 | kernel_power_off_prepare(); | 56 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); |
57 | error = pm_ops->enter(PM_SUSPEND_DISK); | 57 | error = pm_ops->enter(PM_SUSPEND_DISK); |
58 | break; | 58 | break; |
59 | case PM_DISK_SHUTDOWN: | 59 | case PM_DISK_SHUTDOWN: |
@@ -95,13 +95,6 @@ static int prepare_processes(void) | |||
95 | goto thaw; | 95 | goto thaw; |
96 | } | 96 | } |
97 | 97 | ||
98 | if (pm_disk_mode == PM_DISK_PLATFORM) { | ||
99 | if (pm_ops && pm_ops->prepare) { | ||
100 | if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) | ||
101 | goto thaw; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | /* Free memory before shutting down devices. */ | 98 | /* Free memory before shutting down devices. */ |
106 | if (!(error = swsusp_shrink_memory())) | 99 | if (!(error = swsusp_shrink_memory())) |
107 | return 0; | 100 | return 0; |
@@ -367,14 +360,14 @@ power_attr(resume); | |||
367 | 360 | ||
368 | static ssize_t image_size_show(struct subsystem * subsys, char *buf) | 361 | static ssize_t image_size_show(struct subsystem * subsys, char *buf) |
369 | { | 362 | { |
370 | return sprintf(buf, "%u\n", image_size); | 363 | return sprintf(buf, "%lu\n", image_size); |
371 | } | 364 | } |
372 | 365 | ||
373 | static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) | 366 | static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) |
374 | { | 367 | { |
375 | unsigned int size; | 368 | unsigned long size; |
376 | 369 | ||
377 | if (sscanf(buf, "%u", &size) == 1) { | 370 | if (sscanf(buf, "%lu", &size) == 1) { |
378 | image_size = size; | 371 | image_size = size; |
379 | return n; | 372 | return n; |
380 | } | 373 | } |
diff --git a/kernel/power/main.c b/kernel/power/main.c index d253f3ae2f..9cb235cba4 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -133,10 +133,10 @@ static int suspend_enter(suspend_state_t state) | |||
133 | static void suspend_finish(suspend_state_t state) | 133 | static void suspend_finish(suspend_state_t state) |
134 | { | 134 | { |
135 | device_resume(); | 135 | device_resume(); |
136 | if (pm_ops && pm_ops->finish) | ||
137 | pm_ops->finish(state); | ||
138 | thaw_processes(); | 136 | thaw_processes(); |
139 | enable_nonboot_cpus(); | 137 | enable_nonboot_cpus(); |
138 | if (pm_ops && pm_ops->finish) | ||
139 | pm_ops->finish(state); | ||
140 | pm_restore_console(); | 140 | pm_restore_console(); |
141 | } | 141 | } |
142 | 142 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index 7e8492fd14..388dba6808 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -1,14 +1,6 @@ | |||
1 | #include <linux/suspend.h> | 1 | #include <linux/suspend.h> |
2 | #include <linux/utsname.h> | 2 | #include <linux/utsname.h> |
3 | 3 | ||
4 | /* With SUSPEND_CONSOLE defined suspend looks *really* cool, but | ||
5 | we probably do not take enough locks for switching consoles, etc, | ||
6 | so bad things might happen. | ||
7 | */ | ||
8 | #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) | ||
9 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) | ||
10 | #endif | ||
11 | |||
12 | struct swsusp_info { | 4 | struct swsusp_info { |
13 | struct new_utsname uts; | 5 | struct new_utsname uts; |
14 | u32 version_code; | 6 | u32 version_code; |
@@ -42,17 +34,14 @@ static struct subsys_attribute _name##_attr = { \ | |||
42 | 34 | ||
43 | extern struct subsystem power_subsys; | 35 | extern struct subsystem power_subsys; |
44 | 36 | ||
45 | extern int pm_prepare_console(void); | ||
46 | extern void pm_restore_console(void); | ||
47 | |||
48 | /* References to section boundaries */ | 37 | /* References to section boundaries */ |
49 | extern const void __nosave_begin, __nosave_end; | 38 | extern const void __nosave_begin, __nosave_end; |
50 | 39 | ||
51 | extern unsigned int nr_copy_pages; | 40 | extern unsigned int nr_copy_pages; |
52 | extern struct pbe *pagedir_nosave; | 41 | extern struct pbe *pagedir_nosave; |
53 | 42 | ||
54 | /* Preferred image size in MB (default 500) */ | 43 | /* Preferred image size in bytes (default 500 MB) */ |
55 | extern unsigned int image_size; | 44 | extern unsigned long image_size; |
56 | 45 | ||
57 | extern asmlinkage int swsusp_arch_suspend(void); | 46 | extern asmlinkage int swsusp_arch_suspend(void); |
58 | extern asmlinkage int swsusp_arch_resume(void); | 47 | extern asmlinkage int swsusp_arch_resume(void); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 41f66365f0..8d5a5986d6 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone) | |||
91 | * corrected eventually when the cases giving rise to this | 91 | * corrected eventually when the cases giving rise to this |
92 | * are better understood. | 92 | * are better understood. |
93 | */ | 93 | */ |
94 | if (PageReserved(page)) { | 94 | if (PageReserved(page)) |
95 | printk("highmem reserved page?!\n"); | ||
96 | continue; | 95 | continue; |
97 | } | ||
98 | BUG_ON(PageNosave(page)); | 96 | BUG_ON(PageNosave(page)); |
99 | if (PageNosaveFree(page)) | 97 | if (PageNosaveFree(page)) |
100 | continue; | 98 | continue; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 55a18d26ab..2d9d08f72f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -70,12 +70,12 @@ | |||
70 | #include "power.h" | 70 | #include "power.h" |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * Preferred image size in MB (tunable via /sys/power/image_size). | 73 | * Preferred image size in bytes (tunable via /sys/power/image_size). |
74 | * When it is set to N, swsusp will do its best to ensure the image | 74 | * When it is set to N, swsusp will do its best to ensure the image |
75 | * size will not exceed N MB, but if that is impossible, it will | 75 | * size will not exceed N bytes, but if that is impossible, it will |
76 | * try to create the smallest image possible. | 76 | * try to create the smallest image possible. |
77 | */ | 77 | */ |
78 | unsigned int image_size = 500; | 78 | unsigned long image_size = 500 * 1024 * 1024; |
79 | 79 | ||
80 | #ifdef CONFIG_HIGHMEM | 80 | #ifdef CONFIG_HIGHMEM |
81 | unsigned int count_highmem_pages(void); | 81 | unsigned int count_highmem_pages(void); |
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
153 | { | 153 | { |
154 | int i; | 154 | int i; |
155 | 155 | ||
156 | if (!swsusp_resume_device) | ||
157 | return -ENODEV; | ||
158 | spin_lock(&swap_lock); | 156 | spin_lock(&swap_lock); |
159 | for (i = 0; i < MAX_SWAPFILES; i++) { | 157 | for (i = 0; i < MAX_SWAPFILES; i++) { |
160 | if (!(swap_info[i].flags & SWP_WRITEOK)) | 158 | if (!(swap_info[i].flags & SWP_WRITEOK)) |
161 | continue; | 159 | continue; |
162 | if (is_resume_device(swap_info + i)) { | 160 | if (!swsusp_resume_device || is_resume_device(swap_info + i)) { |
163 | spin_unlock(&swap_lock); | 161 | spin_unlock(&swap_lock); |
164 | root_swap = i; | 162 | root_swap = i; |
165 | return 0; | 163 | return 0; |
@@ -590,7 +588,7 @@ int swsusp_shrink_memory(void) | |||
590 | if (!tmp) | 588 | if (!tmp) |
591 | return -ENOMEM; | 589 | return -ENOMEM; |
592 | pages += tmp; | 590 | pages += tmp; |
593 | } else if (size > (image_size * 1024 * 1024) / PAGE_SIZE) { | 591 | } else if (size > image_size / PAGE_SIZE) { |
594 | tmp = shrink_all_memory(SHRINK_BITE); | 592 | tmp = shrink_all_memory(SHRINK_BITE); |
595 | pages += tmp; | 593 | pages += tmp; |
596 | } | 594 | } |
@@ -743,7 +741,6 @@ static int submit(int rw, pgoff_t page_off, void *page) | |||
743 | if (!bio) | 741 | if (!bio) |
744 | return -ENOMEM; | 742 | return -ENOMEM; |
745 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | 743 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); |
746 | bio_get(bio); | ||
747 | bio->bi_bdev = resume_bdev; | 744 | bio->bi_bdev = resume_bdev; |
748 | bio->bi_end_io = end_io; | 745 | bio->bi_end_io = end_io; |
749 | 746 | ||
@@ -753,14 +750,13 @@ static int submit(int rw, pgoff_t page_off, void *page) | |||
753 | goto Done; | 750 | goto Done; |
754 | } | 751 | } |
755 | 752 | ||
756 | if (rw == WRITE) | ||
757 | bio_set_pages_dirty(bio); | ||
758 | 753 | ||
759 | atomic_set(&io_done, 1); | 754 | atomic_set(&io_done, 1); |
760 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | 755 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); |
761 | while (atomic_read(&io_done)) | 756 | while (atomic_read(&io_done)) |
762 | yield(); | 757 | yield(); |
763 | 758 | if (rw == READ) | |
759 | bio_set_pages_dirty(bio); | ||
764 | Done: | 760 | Done: |
765 | bio_put(bio); | 761 | bio_put(bio); |
766 | return error; | 762 | return error; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5f33cdb6ff..d95a72c927 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child) | |||
72 | */ | 72 | */ |
73 | void __ptrace_unlink(task_t *child) | 73 | void __ptrace_unlink(task_t *child) |
74 | { | 74 | { |
75 | if (!child->ptrace) | 75 | BUG_ON(!child->ptrace); |
76 | BUG(); | 76 | |
77 | child->ptrace = 0; | 77 | child->ptrace = 0; |
78 | if (!list_empty(&child->ptrace_list)) { | 78 | if (!list_empty(&child->ptrace_list)) { |
79 | list_del_init(&child->ptrace_list); | 79 | list_del_init(&child->ptrace_list); |
@@ -184,22 +184,27 @@ bad: | |||
184 | return retval; | 184 | return retval; |
185 | } | 185 | } |
186 | 186 | ||
187 | void __ptrace_detach(struct task_struct *child, unsigned int data) | ||
188 | { | ||
189 | child->exit_code = data; | ||
190 | /* .. re-parent .. */ | ||
191 | __ptrace_unlink(child); | ||
192 | /* .. and wake it up. */ | ||
193 | if (child->exit_state != EXIT_ZOMBIE) | ||
194 | wake_up_process(child); | ||
195 | } | ||
196 | |||
187 | int ptrace_detach(struct task_struct *child, unsigned int data) | 197 | int ptrace_detach(struct task_struct *child, unsigned int data) |
188 | { | 198 | { |
189 | if (!valid_signal(data)) | 199 | if (!valid_signal(data)) |
190 | return -EIO; | 200 | return -EIO; |
191 | 201 | ||
192 | /* Architecture-specific hardware disable .. */ | 202 | /* Architecture-specific hardware disable .. */ |
193 | ptrace_disable(child); | 203 | ptrace_disable(child); |
194 | 204 | ||
195 | /* .. re-parent .. */ | ||
196 | child->exit_code = data; | ||
197 | |||
198 | write_lock_irq(&tasklist_lock); | 205 | write_lock_irq(&tasklist_lock); |
199 | __ptrace_unlink(child); | 206 | if (child->ptrace) |
200 | /* .. and wake it up. */ | 207 | __ptrace_detach(child, data); |
201 | if (child->exit_state != EXIT_ZOMBIE) | ||
202 | wake_up_process(child); | ||
203 | write_unlock_irq(&tasklist_lock); | 208 | write_unlock_irq(&tasklist_lock); |
204 | 209 | ||
205 | return 0; | 210 | return 0; |
@@ -242,8 +247,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
242 | if (write) { | 247 | if (write) { |
243 | copy_to_user_page(vma, page, addr, | 248 | copy_to_user_page(vma, page, addr, |
244 | maddr + offset, buf, bytes); | 249 | maddr + offset, buf, bytes); |
245 | if (!PageCompound(page)) | 250 | set_page_dirty_lock(page); |
246 | set_page_dirty_lock(page); | ||
247 | } else { | 251 | } else { |
248 | copy_from_user_page(vma, page, addr, | 252 | copy_from_user_page(vma, page, addr, |
249 | buf, maddr + offset, bytes); | 253 | buf, maddr + offset, bytes); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 0cf8146bd5..8cf15a569f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; | |||
67 | 67 | ||
68 | /* Fake initialization required by compiler */ | 68 | /* Fake initialization required by compiler */ |
69 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; | 69 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; |
70 | static int maxbatch = 10000; | 70 | static int blimit = 10; |
71 | static int qhimark = 10000; | ||
72 | static int qlowmark = 100; | ||
73 | #ifdef CONFIG_SMP | ||
74 | static int rsinterval = 1000; | ||
75 | #endif | ||
76 | |||
77 | static atomic_t rcu_barrier_cpu_count; | ||
78 | static struct semaphore rcu_barrier_sema; | ||
79 | static struct completion rcu_barrier_completion; | ||
80 | |||
81 | #ifdef CONFIG_SMP | ||
82 | static void force_quiescent_state(struct rcu_data *rdp, | ||
83 | struct rcu_ctrlblk *rcp) | ||
84 | { | ||
85 | int cpu; | ||
86 | cpumask_t cpumask; | ||
87 | set_need_resched(); | ||
88 | if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { | ||
89 | rdp->last_rs_qlen = rdp->qlen; | ||
90 | /* | ||
91 | * Don't send IPI to itself. With irqs disabled, | ||
92 | * rdp->cpu is the current cpu. | ||
93 | */ | ||
94 | cpumask = rcp->cpumask; | ||
95 | cpu_clear(rdp->cpu, cpumask); | ||
96 | for_each_cpu_mask(cpu, cpumask) | ||
97 | smp_send_reschedule(cpu); | ||
98 | } | ||
99 | } | ||
100 | #else | ||
101 | static inline void force_quiescent_state(struct rcu_data *rdp, | ||
102 | struct rcu_ctrlblk *rcp) | ||
103 | { | ||
104 | set_need_resched(); | ||
105 | } | ||
106 | #endif | ||
71 | 107 | ||
72 | /** | 108 | /** |
73 | * call_rcu - Queue an RCU callback for invocation after a grace period. | 109 | * call_rcu - Queue an RCU callback for invocation after a grace period. |
@@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head, | |||
92 | rdp = &__get_cpu_var(rcu_data); | 128 | rdp = &__get_cpu_var(rcu_data); |
93 | *rdp->nxttail = head; | 129 | *rdp->nxttail = head; |
94 | rdp->nxttail = &head->next; | 130 | rdp->nxttail = &head->next; |
95 | 131 | if (unlikely(++rdp->qlen > qhimark)) { | |
96 | if (unlikely(++rdp->count > 10000)) | 132 | rdp->blimit = INT_MAX; |
97 | set_need_resched(); | 133 | force_quiescent_state(rdp, &rcu_ctrlblk); |
98 | 134 | } | |
99 | local_irq_restore(flags); | 135 | local_irq_restore(flags); |
100 | } | 136 | } |
101 | 137 | ||
102 | static atomic_t rcu_barrier_cpu_count; | ||
103 | static struct semaphore rcu_barrier_sema; | ||
104 | static struct completion rcu_barrier_completion; | ||
105 | |||
106 | /** | 138 | /** |
107 | * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. | 139 | * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. |
108 | * @head: structure to be used for queueing the RCU updates. | 140 | * @head: structure to be used for queueing the RCU updates. |
@@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, | |||
131 | rdp = &__get_cpu_var(rcu_bh_data); | 163 | rdp = &__get_cpu_var(rcu_bh_data); |
132 | *rdp->nxttail = head; | 164 | *rdp->nxttail = head; |
133 | rdp->nxttail = &head->next; | 165 | rdp->nxttail = &head->next; |
134 | rdp->count++; | 166 | |
135 | /* | 167 | if (unlikely(++rdp->qlen > qhimark)) { |
136 | * Should we directly call rcu_do_batch() here ? | 168 | rdp->blimit = INT_MAX; |
137 | * if (unlikely(rdp->count > 10000)) | 169 | force_quiescent_state(rdp, &rcu_bh_ctrlblk); |
138 | * rcu_do_batch(rdp); | 170 | } |
139 | */ | 171 | |
140 | local_irq_restore(flags); | 172 | local_irq_restore(flags); |
141 | } | 173 | } |
142 | 174 | ||
@@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
199 | next = rdp->donelist = list->next; | 231 | next = rdp->donelist = list->next; |
200 | list->func(list); | 232 | list->func(list); |
201 | list = next; | 233 | list = next; |
202 | rdp->count--; | 234 | rdp->qlen--; |
203 | if (++count >= maxbatch) | 235 | if (++count >= rdp->blimit) |
204 | break; | 236 | break; |
205 | } | 237 | } |
238 | if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) | ||
239 | rdp->blimit = blimit; | ||
206 | if (!rdp->donelist) | 240 | if (!rdp->donelist) |
207 | rdp->donetail = &rdp->donelist; | 241 | rdp->donetail = &rdp->donelist; |
208 | else | 242 | else |
@@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, | |||
473 | rdp->quiescbatch = rcp->completed; | 507 | rdp->quiescbatch = rcp->completed; |
474 | rdp->qs_pending = 0; | 508 | rdp->qs_pending = 0; |
475 | rdp->cpu = cpu; | 509 | rdp->cpu = cpu; |
510 | rdp->blimit = blimit; | ||
476 | } | 511 | } |
477 | 512 | ||
478 | static void __devinit rcu_online_cpu(int cpu) | 513 | static void __devinit rcu_online_cpu(int cpu) |
@@ -567,7 +602,12 @@ void synchronize_kernel(void) | |||
567 | synchronize_rcu(); | 602 | synchronize_rcu(); |
568 | } | 603 | } |
569 | 604 | ||
570 | module_param(maxbatch, int, 0); | 605 | module_param(blimit, int, 0); |
606 | module_param(qhimark, int, 0); | ||
607 | module_param(qlowmark, int, 0); | ||
608 | #ifdef CONFIG_SMP | ||
609 | module_param(rsinterval, int, 0); | ||
610 | #endif | ||
571 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 611 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
572 | EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ | 612 | EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ |
573 | EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ | 613 | EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 773219907d..7712912dbc 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -114,16 +114,16 @@ rcu_torture_alloc(void) | |||
114 | { | 114 | { |
115 | struct list_head *p; | 115 | struct list_head *p; |
116 | 116 | ||
117 | spin_lock(&rcu_torture_lock); | 117 | spin_lock_bh(&rcu_torture_lock); |
118 | if (list_empty(&rcu_torture_freelist)) { | 118 | if (list_empty(&rcu_torture_freelist)) { |
119 | atomic_inc(&n_rcu_torture_alloc_fail); | 119 | atomic_inc(&n_rcu_torture_alloc_fail); |
120 | spin_unlock(&rcu_torture_lock); | 120 | spin_unlock_bh(&rcu_torture_lock); |
121 | return NULL; | 121 | return NULL; |
122 | } | 122 | } |
123 | atomic_inc(&n_rcu_torture_alloc); | 123 | atomic_inc(&n_rcu_torture_alloc); |
124 | p = rcu_torture_freelist.next; | 124 | p = rcu_torture_freelist.next; |
125 | list_del_init(p); | 125 | list_del_init(p); |
126 | spin_unlock(&rcu_torture_lock); | 126 | spin_unlock_bh(&rcu_torture_lock); |
127 | return container_of(p, struct rcu_torture, rtort_free); | 127 | return container_of(p, struct rcu_torture, rtort_free); |
128 | } | 128 | } |
129 | 129 | ||
@@ -134,9 +134,9 @@ static void | |||
134 | rcu_torture_free(struct rcu_torture *p) | 134 | rcu_torture_free(struct rcu_torture *p) |
135 | { | 135 | { |
136 | atomic_inc(&n_rcu_torture_free); | 136 | atomic_inc(&n_rcu_torture_free); |
137 | spin_lock(&rcu_torture_lock); | 137 | spin_lock_bh(&rcu_torture_lock); |
138 | list_add_tail(&p->rtort_free, &rcu_torture_freelist); | 138 | list_add_tail(&p->rtort_free, &rcu_torture_freelist); |
139 | spin_unlock(&rcu_torture_lock); | 139 | spin_unlock_bh(&rcu_torture_lock); |
140 | } | 140 | } |
141 | 141 | ||
142 | static void | 142 | static void |
diff --git a/kernel/sched.c b/kernel/sched.c index 3ee2ae4512..4d46e90f59 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p) | |||
178 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ | 178 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ |
179 | < (long long) (sd)->cache_hot_time) | 179 | < (long long) (sd)->cache_hot_time) |
180 | 180 | ||
181 | void __put_task_struct_cb(struct rcu_head *rhp) | ||
182 | { | ||
183 | __put_task_struct(container_of(rhp, struct task_struct, rcu)); | ||
184 | } | ||
185 | |||
186 | EXPORT_SYMBOL_GPL(__put_task_struct_cb); | ||
187 | |||
188 | /* | 181 | /* |
189 | * These are the runqueue data structures: | 182 | * These are the runqueue data structures: |
190 | */ | 183 | */ |
@@ -215,7 +208,6 @@ struct runqueue { | |||
215 | */ | 208 | */ |
216 | unsigned long nr_running; | 209 | unsigned long nr_running; |
217 | #ifdef CONFIG_SMP | 210 | #ifdef CONFIG_SMP |
218 | unsigned long prio_bias; | ||
219 | unsigned long cpu_load[3]; | 211 | unsigned long cpu_load[3]; |
220 | #endif | 212 | #endif |
221 | unsigned long long nr_switches; | 213 | unsigned long long nr_switches; |
@@ -669,68 +661,13 @@ static int effective_prio(task_t *p) | |||
669 | return prio; | 661 | return prio; |
670 | } | 662 | } |
671 | 663 | ||
672 | #ifdef CONFIG_SMP | ||
673 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
674 | { | ||
675 | rq->prio_bias += MAX_PRIO - prio; | ||
676 | } | ||
677 | |||
678 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
679 | { | ||
680 | rq->prio_bias -= MAX_PRIO - prio; | ||
681 | } | ||
682 | |||
683 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
684 | { | ||
685 | rq->nr_running++; | ||
686 | if (rt_task(p)) { | ||
687 | if (p != rq->migration_thread) | ||
688 | /* | ||
689 | * The migration thread does the actual balancing. Do | ||
690 | * not bias by its priority as the ultra high priority | ||
691 | * will skew balancing adversely. | ||
692 | */ | ||
693 | inc_prio_bias(rq, p->prio); | ||
694 | } else | ||
695 | inc_prio_bias(rq, p->static_prio); | ||
696 | } | ||
697 | |||
698 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
699 | { | ||
700 | rq->nr_running--; | ||
701 | if (rt_task(p)) { | ||
702 | if (p != rq->migration_thread) | ||
703 | dec_prio_bias(rq, p->prio); | ||
704 | } else | ||
705 | dec_prio_bias(rq, p->static_prio); | ||
706 | } | ||
707 | #else | ||
708 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
709 | { | ||
710 | } | ||
711 | |||
712 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
713 | { | ||
714 | } | ||
715 | |||
716 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
717 | { | ||
718 | rq->nr_running++; | ||
719 | } | ||
720 | |||
721 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
722 | { | ||
723 | rq->nr_running--; | ||
724 | } | ||
725 | #endif | ||
726 | |||
727 | /* | 664 | /* |
728 | * __activate_task - move a task to the runqueue. | 665 | * __activate_task - move a task to the runqueue. |
729 | */ | 666 | */ |
730 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 667 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
731 | { | 668 | { |
732 | enqueue_task(p, rq->active); | 669 | enqueue_task(p, rq->active); |
733 | inc_nr_running(p, rq); | 670 | rq->nr_running++; |
734 | } | 671 | } |
735 | 672 | ||
736 | /* | 673 | /* |
@@ -739,7 +676,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
739 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 676 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
740 | { | 677 | { |
741 | enqueue_task_head(p, rq->active); | 678 | enqueue_task_head(p, rq->active); |
742 | inc_nr_running(p, rq); | 679 | rq->nr_running++; |
743 | } | 680 | } |
744 | 681 | ||
745 | static int recalc_task_prio(task_t *p, unsigned long long now) | 682 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -863,7 +800,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
863 | */ | 800 | */ |
864 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 801 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
865 | { | 802 | { |
866 | dec_nr_running(p, rq); | 803 | rq->nr_running--; |
867 | dequeue_task(p, p->array); | 804 | dequeue_task(p, p->array); |
868 | p->array = NULL; | 805 | p->array = NULL; |
869 | } | 806 | } |
@@ -1007,61 +944,27 @@ void kick_process(task_t *p) | |||
1007 | * We want to under-estimate the load of migration sources, to | 944 | * We want to under-estimate the load of migration sources, to |
1008 | * balance conservatively. | 945 | * balance conservatively. |
1009 | */ | 946 | */ |
1010 | static unsigned long __source_load(int cpu, int type, enum idle_type idle) | 947 | static inline unsigned long source_load(int cpu, int type) |
1011 | { | 948 | { |
1012 | runqueue_t *rq = cpu_rq(cpu); | 949 | runqueue_t *rq = cpu_rq(cpu); |
1013 | unsigned long running = rq->nr_running; | 950 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1014 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1015 | load_now = running * SCHED_LOAD_SCALE; | ||
1016 | |||
1017 | if (type == 0) | 951 | if (type == 0) |
1018 | source_load = load_now; | 952 | return load_now; |
1019 | else | ||
1020 | source_load = min(cpu_load, load_now); | ||
1021 | |||
1022 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1023 | /* | ||
1024 | * If we are busy rebalancing the load is biased by | ||
1025 | * priority to create 'nice' support across cpus. When | ||
1026 | * idle rebalancing we should only bias the source_load if | ||
1027 | * there is more than one task running on that queue to | ||
1028 | * prevent idle rebalance from trying to pull tasks from a | ||
1029 | * queue with only one running task. | ||
1030 | */ | ||
1031 | source_load = source_load * rq->prio_bias / running; | ||
1032 | |||
1033 | return source_load; | ||
1034 | } | ||
1035 | 953 | ||
1036 | static inline unsigned long source_load(int cpu, int type) | 954 | return min(rq->cpu_load[type-1], load_now); |
1037 | { | ||
1038 | return __source_load(cpu, type, NOT_IDLE); | ||
1039 | } | 955 | } |
1040 | 956 | ||
1041 | /* | 957 | /* |
1042 | * Return a high guess at the load of a migration-target cpu | 958 | * Return a high guess at the load of a migration-target cpu |
1043 | */ | 959 | */ |
1044 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) | 960 | static inline unsigned long target_load(int cpu, int type) |
1045 | { | 961 | { |
1046 | runqueue_t *rq = cpu_rq(cpu); | 962 | runqueue_t *rq = cpu_rq(cpu); |
1047 | unsigned long running = rq->nr_running; | 963 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1048 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1049 | load_now = running * SCHED_LOAD_SCALE; | ||
1050 | |||
1051 | if (type == 0) | 964 | if (type == 0) |
1052 | target_load = load_now; | 965 | return load_now; |
1053 | else | ||
1054 | target_load = max(cpu_load, load_now); | ||
1055 | |||
1056 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1057 | target_load = target_load * rq->prio_bias / running; | ||
1058 | 966 | ||
1059 | return target_load; | 967 | return max(rq->cpu_load[type-1], load_now); |
1060 | } | ||
1061 | |||
1062 | static inline unsigned long target_load(int cpu, int type) | ||
1063 | { | ||
1064 | return __target_load(cpu, type, NOT_IDLE); | ||
1065 | } | 968 | } |
1066 | 969 | ||
1067 | /* | 970 | /* |
@@ -1294,9 +1197,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync) | |||
1294 | } | 1197 | } |
1295 | } | 1198 | } |
1296 | 1199 | ||
1297 | if (p->last_waker_cpu != this_cpu) | ||
1298 | goto out_set_cpu; | ||
1299 | |||
1300 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1200 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1301 | goto out_set_cpu; | 1201 | goto out_set_cpu; |
1302 | 1202 | ||
@@ -1367,8 +1267,6 @@ out_set_cpu: | |||
1367 | cpu = task_cpu(p); | 1267 | cpu = task_cpu(p); |
1368 | } | 1268 | } |
1369 | 1269 | ||
1370 | p->last_waker_cpu = this_cpu; | ||
1371 | |||
1372 | out_activate: | 1270 | out_activate: |
1373 | #endif /* CONFIG_SMP */ | 1271 | #endif /* CONFIG_SMP */ |
1374 | if (old_state == TASK_UNINTERRUPTIBLE) { | 1272 | if (old_state == TASK_UNINTERRUPTIBLE) { |
@@ -1450,12 +1348,9 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
1450 | #ifdef CONFIG_SCHEDSTATS | 1348 | #ifdef CONFIG_SCHEDSTATS |
1451 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1349 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
1452 | #endif | 1350 | #endif |
1453 | #if defined(CONFIG_SMP) | 1351 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1454 | p->last_waker_cpu = cpu; | ||
1455 | #if defined(__ARCH_WANT_UNLOCKED_CTXSW) | ||
1456 | p->oncpu = 0; | 1352 | p->oncpu = 0; |
1457 | #endif | 1353 | #endif |
1458 | #endif | ||
1459 | #ifdef CONFIG_PREEMPT | 1354 | #ifdef CONFIG_PREEMPT |
1460 | /* Want to start with kernel preemption disabled. */ | 1355 | /* Want to start with kernel preemption disabled. */ |
1461 | task_thread_info(p)->preempt_count = 1; | 1356 | task_thread_info(p)->preempt_count = 1; |
@@ -1530,7 +1425,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1530 | list_add_tail(&p->run_list, ¤t->run_list); | 1425 | list_add_tail(&p->run_list, ¤t->run_list); |
1531 | p->array = current->array; | 1426 | p->array = current->array; |
1532 | p->array->nr_active++; | 1427 | p->array->nr_active++; |
1533 | inc_nr_running(p, rq); | 1428 | rq->nr_running++; |
1534 | } | 1429 | } |
1535 | set_need_resched(); | 1430 | set_need_resched(); |
1536 | } else | 1431 | } else |
@@ -1875,9 +1770,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1875 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1770 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1876 | { | 1771 | { |
1877 | dequeue_task(p, src_array); | 1772 | dequeue_task(p, src_array); |
1878 | dec_nr_running(p, src_rq); | 1773 | src_rq->nr_running--; |
1879 | set_task_cpu(p, this_cpu); | 1774 | set_task_cpu(p, this_cpu); |
1880 | inc_nr_running(p, this_rq); | 1775 | this_rq->nr_running++; |
1881 | enqueue_task(p, this_array); | 1776 | enqueue_task(p, this_array); |
1882 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1777 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1883 | + this_rq->timestamp_last_tick; | 1778 | + this_rq->timestamp_last_tick; |
@@ -2056,9 +1951,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2056 | 1951 | ||
2057 | /* Bias balancing toward cpus of our domain */ | 1952 | /* Bias balancing toward cpus of our domain */ |
2058 | if (local_group) | 1953 | if (local_group) |
2059 | load = __target_load(i, load_idx, idle); | 1954 | load = target_load(i, load_idx); |
2060 | else | 1955 | else |
2061 | load = __source_load(i, load_idx, idle); | 1956 | load = source_load(i, load_idx); |
2062 | 1957 | ||
2063 | avg_load += load; | 1958 | avg_load += load; |
2064 | } | 1959 | } |
@@ -2171,7 +2066,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group, | |||
2171 | int i; | 2066 | int i; |
2172 | 2067 | ||
2173 | for_each_cpu_mask(i, group->cpumask) { | 2068 | for_each_cpu_mask(i, group->cpumask) { |
2174 | load = __source_load(i, 0, idle); | 2069 | load = source_load(i, 0); |
2175 | 2070 | ||
2176 | if (load > max_load) { | 2071 | if (load > max_load) { |
2177 | max_load = load; | 2072 | max_load = load; |
@@ -3571,10 +3466,8 @@ void set_user_nice(task_t *p, long nice) | |||
3571 | goto out_unlock; | 3466 | goto out_unlock; |
3572 | } | 3467 | } |
3573 | array = p->array; | 3468 | array = p->array; |
3574 | if (array) { | 3469 | if (array) |
3575 | dequeue_task(p, array); | 3470 | dequeue_task(p, array); |
3576 | dec_prio_bias(rq, p->static_prio); | ||
3577 | } | ||
3578 | 3471 | ||
3579 | old_prio = p->prio; | 3472 | old_prio = p->prio; |
3580 | new_prio = NICE_TO_PRIO(nice); | 3473 | new_prio = NICE_TO_PRIO(nice); |
@@ -3584,7 +3477,6 @@ void set_user_nice(task_t *p, long nice) | |||
3584 | 3477 | ||
3585 | if (array) { | 3478 | if (array) { |
3586 | enqueue_task(p, array); | 3479 | enqueue_task(p, array); |
3587 | inc_prio_bias(rq, p->static_prio); | ||
3588 | /* | 3480 | /* |
3589 | * If the task increased its priority or is running and | 3481 | * If the task increased its priority or is running and |
3590 | * lowered its priority, then reschedule its CPU: | 3482 | * lowered its priority, then reschedule its CPU: |
@@ -4031,7 +3923,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) | |||
4031 | goto out_unlock; | 3923 | goto out_unlock; |
4032 | 3924 | ||
4033 | retval = 0; | 3925 | retval = 0; |
4034 | cpus_and(*mask, p->cpus_allowed, cpu_possible_map); | 3926 | cpus_and(*mask, p->cpus_allowed, cpu_online_map); |
4035 | 3927 | ||
4036 | out_unlock: | 3928 | out_unlock: |
4037 | read_unlock(&tasklist_lock); | 3929 | read_unlock(&tasklist_lock); |
@@ -4129,6 +4021,8 @@ static inline void __cond_resched(void) | |||
4129 | */ | 4021 | */ |
4130 | if (unlikely(preempt_count())) | 4022 | if (unlikely(preempt_count())) |
4131 | return; | 4023 | return; |
4024 | if (unlikely(system_state != SYSTEM_RUNNING)) | ||
4025 | return; | ||
4132 | do { | 4026 | do { |
4133 | add_preempt_count(PREEMPT_ACTIVE); | 4027 | add_preempt_count(PREEMPT_ACTIVE); |
4134 | schedule(); | 4028 | schedule(); |
@@ -4434,6 +4328,7 @@ void __devinit init_idle(task_t *idle, int cpu) | |||
4434 | runqueue_t *rq = cpu_rq(cpu); | 4328 | runqueue_t *rq = cpu_rq(cpu); |
4435 | unsigned long flags; | 4329 | unsigned long flags; |
4436 | 4330 | ||
4331 | idle->timestamp = sched_clock(); | ||
4437 | idle->sleep_avg = 0; | 4332 | idle->sleep_avg = 0; |
4438 | idle->array = NULL; | 4333 | idle->array = NULL; |
4439 | idle->prio = MAX_PRIO; | 4334 | idle->prio = MAX_PRIO; |
@@ -5141,7 +5036,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5141 | #define SEARCH_SCOPE 2 | 5036 | #define SEARCH_SCOPE 2 |
5142 | #define MIN_CACHE_SIZE (64*1024U) | 5037 | #define MIN_CACHE_SIZE (64*1024U) |
5143 | #define DEFAULT_CACHE_SIZE (5*1024*1024U) | 5038 | #define DEFAULT_CACHE_SIZE (5*1024*1024U) |
5144 | #define ITERATIONS 2 | 5039 | #define ITERATIONS 1 |
5145 | #define SIZE_THRESH 130 | 5040 | #define SIZE_THRESH 130 |
5146 | #define COST_THRESH 130 | 5041 | #define COST_THRESH 130 |
5147 | 5042 | ||
@@ -5159,7 +5054,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5159 | #define MAX_DOMAIN_DISTANCE 32 | 5054 | #define MAX_DOMAIN_DISTANCE 32 |
5160 | 5055 | ||
5161 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = | 5056 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = |
5162 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; | 5057 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = |
5058 | /* | ||
5059 | * Architectures may override the migration cost and thus avoid | ||
5060 | * boot-time calibration. Unit is nanoseconds. Mostly useful for | ||
5061 | * virtualized hardware: | ||
5062 | */ | ||
5063 | #ifdef CONFIG_DEFAULT_MIGRATION_COST | ||
5064 | CONFIG_DEFAULT_MIGRATION_COST | ||
5065 | #else | ||
5066 | -1LL | ||
5067 | #endif | ||
5068 | }; | ||
5163 | 5069 | ||
5164 | /* | 5070 | /* |
5165 | * Allow override of migration cost - in units of microseconds. | 5071 | * Allow override of migration cost - in units of microseconds. |
@@ -5480,9 +5386,9 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) | |||
5480 | break; | 5386 | break; |
5481 | } | 5387 | } |
5482 | /* | 5388 | /* |
5483 | * Increase the cachesize in 5% steps: | 5389 | * Increase the cachesize in 10% steps: |
5484 | */ | 5390 | */ |
5485 | size = size * 20 / 19; | 5391 | size = size * 10 / 9; |
5486 | } | 5392 | } |
5487 | 5393 | ||
5488 | if (migration_debug) | 5394 | if (migration_debug) |
@@ -5551,13 +5457,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map) | |||
5551 | -1 | 5457 | -1 |
5552 | #endif | 5458 | #endif |
5553 | ); | 5459 | ); |
5554 | printk("migration_cost="); | 5460 | if (system_state == SYSTEM_BOOTING) { |
5555 | for (distance = 0; distance <= max_distance; distance++) { | 5461 | printk("migration_cost="); |
5556 | if (distance) | 5462 | for (distance = 0; distance <= max_distance; distance++) { |
5557 | printk(","); | 5463 | if (distance) |
5558 | printk("%ld", (long)migration_cost[distance] / 1000); | 5464 | printk(","); |
5465 | printk("%ld", (long)migration_cost[distance] / 1000); | ||
5466 | } | ||
5467 | printk("\n"); | ||
5559 | } | 5468 | } |
5560 | printk("\n"); | ||
5561 | j1 = jiffies; | 5469 | j1 = jiffies; |
5562 | if (migration_debug) | 5470 | if (migration_debug) |
5563 | printk("migration: %ld seconds\n", (j1-j0)/HZ); | 5471 | printk("migration: %ld seconds\n", (j1-j0)/HZ); |
@@ -6109,7 +6017,7 @@ void __init sched_init(void) | |||
6109 | runqueue_t *rq; | 6017 | runqueue_t *rq; |
6110 | int i, j, k; | 6018 | int i, j, k; |
6111 | 6019 | ||
6112 | for (i = 0; i < NR_CPUS; i++) { | 6020 | for_each_cpu(i) { |
6113 | prio_array_t *array; | 6021 | prio_array_t *array; |
6114 | 6022 | ||
6115 | rq = cpu_rq(i); | 6023 | rq = cpu_rq(i); |
diff --git a/kernel/signal.c b/kernel/signal.c index d3efafd810..ea154104a0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -283,7 +283,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, | |||
283 | return(q); | 283 | return(q); |
284 | } | 284 | } |
285 | 285 | ||
286 | static inline void __sigqueue_free(struct sigqueue *q) | 286 | static void __sigqueue_free(struct sigqueue *q) |
287 | { | 287 | { |
288 | if (q->flags & SIGQUEUE_PREALLOC) | 288 | if (q->flags & SIGQUEUE_PREALLOC) |
289 | return; | 289 | return; |
@@ -2430,7 +2430,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) | |||
2430 | } | 2430 | } |
2431 | 2431 | ||
2432 | int | 2432 | int |
2433 | do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) | 2433 | do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) |
2434 | { | 2434 | { |
2435 | struct k_sigaction *k; | 2435 | struct k_sigaction *k; |
2436 | sigset_t mask; | 2436 | sigset_t mask; |
@@ -2454,6 +2454,8 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) | |||
2454 | *oact = *k; | 2454 | *oact = *k; |
2455 | 2455 | ||
2456 | if (act) { | 2456 | if (act) { |
2457 | sigdelsetmask(&act->sa.sa_mask, | ||
2458 | sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
2457 | /* | 2459 | /* |
2458 | * POSIX 3.3.1.3: | 2460 | * POSIX 3.3.1.3: |
2459 | * "Setting a signal action to SIG_IGN for a signal that is | 2461 | * "Setting a signal action to SIG_IGN for a signal that is |
@@ -2479,8 +2481,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) | |||
2479 | read_lock(&tasklist_lock); | 2481 | read_lock(&tasklist_lock); |
2480 | spin_lock_irq(&t->sighand->siglock); | 2482 | spin_lock_irq(&t->sighand->siglock); |
2481 | *k = *act; | 2483 | *k = *act; |
2482 | sigdelsetmask(&k->sa.sa_mask, | ||
2483 | sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
2484 | sigemptyset(&mask); | 2484 | sigemptyset(&mask); |
2485 | sigaddset(&mask, sig); | 2485 | sigaddset(&mask, sig); |
2486 | rm_from_queue_full(&mask, &t->signal->shared_pending); | 2486 | rm_from_queue_full(&mask, &t->signal->shared_pending); |
@@ -2495,8 +2495,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) | |||
2495 | } | 2495 | } |
2496 | 2496 | ||
2497 | *k = *act; | 2497 | *k = *act; |
2498 | sigdelsetmask(&k->sa.sa_mask, | ||
2499 | sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
2500 | } | 2498 | } |
2501 | 2499 | ||
2502 | spin_unlock_irq(¤t->sighand->siglock); | 2500 | spin_unlock_irq(¤t->sighand->siglock); |
@@ -2702,6 +2700,7 @@ sys_signal(int sig, __sighandler_t handler) | |||
2702 | 2700 | ||
2703 | new_sa.sa.sa_handler = handler; | 2701 | new_sa.sa.sa_handler = handler; |
2704 | new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; | 2702 | new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; |
2703 | sigemptyset(&new_sa.sa.sa_mask); | ||
2705 | 2704 | ||
2706 | ret = do_sigaction(sig, &new_sa, &old_sa); | 2705 | ret = do_sigaction(sig, &new_sa, &old_sa); |
2707 | 2706 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index d09cac23fd..f91218a546 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -428,7 +428,7 @@ void kernel_kexec(void) | |||
428 | { | 428 | { |
429 | #ifdef CONFIG_KEXEC | 429 | #ifdef CONFIG_KEXEC |
430 | struct kimage *image; | 430 | struct kimage *image; |
431 | image = xchg(&kexec_image, 0); | 431 | image = xchg(&kexec_image, NULL); |
432 | if (!image) { | 432 | if (!image) { |
433 | return; | 433 | return; |
434 | } | 434 | } |
@@ -440,23 +440,25 @@ void kernel_kexec(void) | |||
440 | } | 440 | } |
441 | EXPORT_SYMBOL_GPL(kernel_kexec); | 441 | EXPORT_SYMBOL_GPL(kernel_kexec); |
442 | 442 | ||
443 | void kernel_shutdown_prepare(enum system_states state) | ||
444 | { | ||
445 | notifier_call_chain(&reboot_notifier_list, | ||
446 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | ||
447 | system_state = state; | ||
448 | device_shutdown(); | ||
449 | } | ||
443 | /** | 450 | /** |
444 | * kernel_halt - halt the system | 451 | * kernel_halt - halt the system |
445 | * | 452 | * |
446 | * Shutdown everything and perform a clean system halt. | 453 | * Shutdown everything and perform a clean system halt. |
447 | */ | 454 | */ |
448 | void kernel_halt_prepare(void) | ||
449 | { | ||
450 | notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); | ||
451 | system_state = SYSTEM_HALT; | ||
452 | device_shutdown(); | ||
453 | } | ||
454 | void kernel_halt(void) | 455 | void kernel_halt(void) |
455 | { | 456 | { |
456 | kernel_halt_prepare(); | 457 | kernel_shutdown_prepare(SYSTEM_HALT); |
457 | printk(KERN_EMERG "System halted.\n"); | 458 | printk(KERN_EMERG "System halted.\n"); |
458 | machine_halt(); | 459 | machine_halt(); |
459 | } | 460 | } |
461 | |||
460 | EXPORT_SYMBOL_GPL(kernel_halt); | 462 | EXPORT_SYMBOL_GPL(kernel_halt); |
461 | 463 | ||
462 | /** | 464 | /** |
@@ -464,20 +466,13 @@ EXPORT_SYMBOL_GPL(kernel_halt); | |||
464 | * | 466 | * |
465 | * Shutdown everything and perform a clean system power_off. | 467 | * Shutdown everything and perform a clean system power_off. |
466 | */ | 468 | */ |
467 | void kernel_power_off_prepare(void) | ||
468 | { | ||
469 | notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); | ||
470 | system_state = SYSTEM_POWER_OFF; | ||
471 | device_shutdown(); | ||
472 | } | ||
473 | void kernel_power_off(void) | 469 | void kernel_power_off(void) |
474 | { | 470 | { |
475 | kernel_power_off_prepare(); | 471 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); |
476 | printk(KERN_EMERG "Power down.\n"); | 472 | printk(KERN_EMERG "Power down.\n"); |
477 | machine_power_off(); | 473 | machine_power_off(); |
478 | } | 474 | } |
479 | EXPORT_SYMBOL_GPL(kernel_power_off); | 475 | EXPORT_SYMBOL_GPL(kernel_power_off); |
480 | |||
481 | /* | 476 | /* |
482 | * Reboot system call: for obvious reasons only root may call it, | 477 | * Reboot system call: for obvious reasons only root may call it, |
483 | * and even root needs to set up some magic numbers in the registers | 478 | * and even root needs to set up some magic numbers in the registers |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 17313b99e5..1067090db6 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16); | |||
104 | cond_syscall(sys_setuid16); | 104 | cond_syscall(sys_setuid16); |
105 | cond_syscall(sys_vm86old); | 105 | cond_syscall(sys_vm86old); |
106 | cond_syscall(sys_vm86); | 106 | cond_syscall(sys_vm86); |
107 | cond_syscall(compat_sys_ipc); | ||
108 | cond_syscall(compat_sys_sysctl); | ||
107 | 109 | ||
108 | /* arch-specific weak syscall entries */ | 110 | /* arch-specific weak syscall entries */ |
109 | cond_syscall(sys_pciconfig_read); | 111 | cond_syscall(sys_pciconfig_read); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index cb99a42f8b..32b48e8ee3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -44,13 +44,14 @@ | |||
44 | #include <linux/limits.h> | 44 | #include <linux/limits.h> |
45 | #include <linux/dcache.h> | 45 | #include <linux/dcache.h> |
46 | #include <linux/syscalls.h> | 46 | #include <linux/syscalls.h> |
47 | #include <linux/nfs_fs.h> | ||
48 | #include <linux/acpi.h> | ||
47 | 49 | ||
48 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
49 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
50 | 52 | ||
51 | #ifdef CONFIG_ROOT_NFS | 53 | extern int proc_nr_files(ctl_table *table, int write, struct file *filp, |
52 | #include <linux/nfs_fs.h> | 54 | void __user *buffer, size_t *lenp, loff_t *ppos); |
53 | #endif | ||
54 | 55 | ||
55 | #if defined(CONFIG_SYSCTL) | 56 | #if defined(CONFIG_SYSCTL) |
56 | 57 | ||
@@ -126,7 +127,9 @@ extern int sysctl_hz_timer; | |||
126 | extern int acct_parm[]; | 127 | extern int acct_parm[]; |
127 | #endif | 128 | #endif |
128 | 129 | ||
129 | int randomize_va_space = 1; | 130 | #ifdef CONFIG_IA64 |
131 | extern int no_unaligned_warning; | ||
132 | #endif | ||
130 | 133 | ||
131 | static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, | 134 | static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, |
132 | ctl_table *, void **); | 135 | ctl_table *, void **); |
@@ -640,6 +643,7 @@ static ctl_table kern_table[] = { | |||
640 | .proc_handler = &proc_dointvec, | 643 | .proc_handler = &proc_dointvec, |
641 | }, | 644 | }, |
642 | #endif | 645 | #endif |
646 | #if defined(CONFIG_MMU) | ||
643 | { | 647 | { |
644 | .ctl_name = KERN_RANDOMIZE, | 648 | .ctl_name = KERN_RANDOMIZE, |
645 | .procname = "randomize_va_space", | 649 | .procname = "randomize_va_space", |
@@ -648,6 +652,7 @@ static ctl_table kern_table[] = { | |||
648 | .mode = 0644, | 652 | .mode = 0644, |
649 | .proc_handler = &proc_dointvec, | 653 | .proc_handler = &proc_dointvec, |
650 | }, | 654 | }, |
655 | #endif | ||
651 | #if defined(CONFIG_S390) && defined(CONFIG_SMP) | 656 | #if defined(CONFIG_S390) && defined(CONFIG_SMP) |
652 | { | 657 | { |
653 | .ctl_name = KERN_SPIN_RETRY, | 658 | .ctl_name = KERN_SPIN_RETRY, |
@@ -658,6 +663,26 @@ static ctl_table kern_table[] = { | |||
658 | .proc_handler = &proc_dointvec, | 663 | .proc_handler = &proc_dointvec, |
659 | }, | 664 | }, |
660 | #endif | 665 | #endif |
666 | #ifdef CONFIG_ACPI_SLEEP | ||
667 | { | ||
668 | .ctl_name = KERN_ACPI_VIDEO_FLAGS, | ||
669 | .procname = "acpi_video_flags", | ||
670 | .data = &acpi_video_flags, | ||
671 | .maxlen = sizeof (unsigned long), | ||
672 | .mode = 0644, | ||
673 | .proc_handler = &proc_doulongvec_minmax, | ||
674 | }, | ||
675 | #endif | ||
676 | #ifdef CONFIG_IA64 | ||
677 | { | ||
678 | .ctl_name = KERN_IA64_UNALIGNED, | ||
679 | .procname = "ignore-unaligned-usertrap", | ||
680 | .data = &no_unaligned_warning, | ||
681 | .maxlen = sizeof (int), | ||
682 | .mode = 0644, | ||
683 | .proc_handler = &proc_dointvec, | ||
684 | }, | ||
685 | #endif | ||
661 | { .ctl_name = 0 } | 686 | { .ctl_name = 0 } |
662 | }; | 687 | }; |
663 | 688 | ||
@@ -878,7 +903,17 @@ static ctl_table vm_table[] = { | |||
878 | .maxlen = sizeof(zone_reclaim_mode), | 903 | .maxlen = sizeof(zone_reclaim_mode), |
879 | .mode = 0644, | 904 | .mode = 0644, |
880 | .proc_handler = &proc_dointvec, | 905 | .proc_handler = &proc_dointvec, |
881 | .strategy = &zero, | 906 | .strategy = &sysctl_intvec, |
907 | .extra1 = &zero, | ||
908 | }, | ||
909 | { | ||
910 | .ctl_name = VM_ZONE_RECLAIM_INTERVAL, | ||
911 | .procname = "zone_reclaim_interval", | ||
912 | .data = &zone_reclaim_interval, | ||
913 | .maxlen = sizeof(zone_reclaim_interval), | ||
914 | .mode = 0644, | ||
915 | .proc_handler = &proc_dointvec_jiffies, | ||
916 | .strategy = &sysctl_jiffies, | ||
882 | }, | 917 | }, |
883 | #endif | 918 | #endif |
884 | { .ctl_name = 0 } | 919 | { .ctl_name = 0 } |
@@ -911,7 +946,7 @@ static ctl_table fs_table[] = { | |||
911 | .data = &files_stat, | 946 | .data = &files_stat, |
912 | .maxlen = 3*sizeof(int), | 947 | .maxlen = 3*sizeof(int), |
913 | .mode = 0444, | 948 | .mode = 0444, |
914 | .proc_handler = &proc_dointvec, | 949 | .proc_handler = &proc_nr_files, |
915 | }, | 950 | }, |
916 | { | 951 | { |
917 | .ctl_name = FS_MAXFILE, | 952 | .ctl_name = FS_MAXFILE, |
diff --git a/kernel/time.c b/kernel/time.c index 7477b1d207..804539165d 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -155,7 +155,7 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) | |||
155 | static int firsttime = 1; | 155 | static int firsttime = 1; |
156 | int error = 0; | 156 | int error = 0; |
157 | 157 | ||
158 | if (!timespec_valid(tv)) | 158 | if (tv && !timespec_valid(tv)) |
159 | return -EINVAL; | 159 | return -EINVAL; |
160 | 160 | ||
161 | error = security_settime(tv, tz); | 161 | error = security_settime(tv, tz); |
@@ -637,15 +637,16 @@ void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) | |||
637 | * | 637 | * |
638 | * Returns the timespec representation of the nsec parameter. | 638 | * Returns the timespec representation of the nsec parameter. |
639 | */ | 639 | */ |
640 | inline struct timespec ns_to_timespec(const nsec_t nsec) | 640 | struct timespec ns_to_timespec(const nsec_t nsec) |
641 | { | 641 | { |
642 | struct timespec ts; | 642 | struct timespec ts; |
643 | 643 | ||
644 | if (nsec) | 644 | if (!nsec) |
645 | ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, | 645 | return (struct timespec) {0, 0}; |
646 | &ts.tv_nsec); | 646 | |
647 | else | 647 | ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec); |
648 | ts.tv_sec = ts.tv_nsec = 0; | 648 | if (unlikely(nsec < 0)) |
649 | set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec); | ||
649 | 650 | ||
650 | return ts; | 651 | return ts; |
651 | } | 652 | } |
diff --git a/kernel/timer.c b/kernel/timer.c index 4f1cb0ab52..bf7c4193b9 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -489,13 +489,25 @@ unsigned long next_timer_interrupt(void) | |||
489 | struct list_head *list; | 489 | struct list_head *list; |
490 | struct timer_list *nte; | 490 | struct timer_list *nte; |
491 | unsigned long expires; | 491 | unsigned long expires; |
492 | unsigned long hr_expires = MAX_JIFFY_OFFSET; | ||
493 | ktime_t hr_delta; | ||
492 | tvec_t *varray[4]; | 494 | tvec_t *varray[4]; |
493 | int i, j; | 495 | int i, j; |
494 | 496 | ||
497 | hr_delta = hrtimer_get_next_event(); | ||
498 | if (hr_delta.tv64 != KTIME_MAX) { | ||
499 | struct timespec tsdelta; | ||
500 | tsdelta = ktime_to_timespec(hr_delta); | ||
501 | hr_expires = timespec_to_jiffies(&tsdelta); | ||
502 | if (hr_expires < 3) | ||
503 | return hr_expires + jiffies; | ||
504 | } | ||
505 | hr_expires += jiffies; | ||
506 | |||
495 | base = &__get_cpu_var(tvec_bases); | 507 | base = &__get_cpu_var(tvec_bases); |
496 | spin_lock(&base->t_base.lock); | 508 | spin_lock(&base->t_base.lock); |
497 | expires = base->timer_jiffies + (LONG_MAX >> 1); | 509 | expires = base->timer_jiffies + (LONG_MAX >> 1); |
498 | list = 0; | 510 | list = NULL; |
499 | 511 | ||
500 | /* Look for timer events in tv1. */ | 512 | /* Look for timer events in tv1. */ |
501 | j = base->timer_jiffies & TVR_MASK; | 513 | j = base->timer_jiffies & TVR_MASK; |
@@ -542,6 +554,10 @@ found: | |||
542 | } | 554 | } |
543 | } | 555 | } |
544 | spin_unlock(&base->t_base.lock); | 556 | spin_unlock(&base->t_base.lock); |
557 | |||
558 | if (time_before(hr_expires, expires)) | ||
559 | return hr_expires; | ||
560 | |||
545 | return expires; | 561 | return expires; |
546 | } | 562 | } |
547 | #endif | 563 | #endif |
@@ -717,12 +733,16 @@ static void second_overflow(void) | |||
717 | #endif | 733 | #endif |
718 | } | 734 | } |
719 | 735 | ||
720 | /* in the NTP reference this is called "hardclock()" */ | 736 | /* |
721 | static void update_wall_time_one_tick(void) | 737 | * Returns how many microseconds we need to add to xtime this tick |
738 | * in doing an adjustment requested with adjtime. | ||
739 | */ | ||
740 | static long adjtime_adjustment(void) | ||
722 | { | 741 | { |
723 | long time_adjust_step, delta_nsec; | 742 | long time_adjust_step; |
724 | 743 | ||
725 | if ((time_adjust_step = time_adjust) != 0 ) { | 744 | time_adjust_step = time_adjust; |
745 | if (time_adjust_step) { | ||
726 | /* | 746 | /* |
727 | * We are doing an adjtime thing. Prepare time_adjust_step to | 747 | * We are doing an adjtime thing. Prepare time_adjust_step to |
728 | * be within bounds. Note that a positive time_adjust means we | 748 | * be within bounds. Note that a positive time_adjust means we |
@@ -733,10 +753,19 @@ static void update_wall_time_one_tick(void) | |||
733 | */ | 753 | */ |
734 | time_adjust_step = min(time_adjust_step, (long)tickadj); | 754 | time_adjust_step = min(time_adjust_step, (long)tickadj); |
735 | time_adjust_step = max(time_adjust_step, (long)-tickadj); | 755 | time_adjust_step = max(time_adjust_step, (long)-tickadj); |
756 | } | ||
757 | return time_adjust_step; | ||
758 | } | ||
736 | 759 | ||
760 | /* in the NTP reference this is called "hardclock()" */ | ||
761 | static void update_wall_time_one_tick(void) | ||
762 | { | ||
763 | long time_adjust_step, delta_nsec; | ||
764 | |||
765 | time_adjust_step = adjtime_adjustment(); | ||
766 | if (time_adjust_step) | ||
737 | /* Reduce by this step the amount of time left */ | 767 | /* Reduce by this step the amount of time left */ |
738 | time_adjust -= time_adjust_step; | 768 | time_adjust -= time_adjust_step; |
739 | } | ||
740 | delta_nsec = tick_nsec + time_adjust_step * 1000; | 769 | delta_nsec = tick_nsec + time_adjust_step * 1000; |
741 | /* | 770 | /* |
742 | * Advance the phase, once it gets to one microsecond, then | 771 | * Advance the phase, once it gets to one microsecond, then |
@@ -759,6 +788,22 @@ static void update_wall_time_one_tick(void) | |||
759 | } | 788 | } |
760 | 789 | ||
761 | /* | 790 | /* |
791 | * Return how long ticks are at the moment, that is, how much time | ||
792 | * update_wall_time_one_tick will add to xtime next time we call it | ||
793 | * (assuming no calls to do_adjtimex in the meantime). | ||
794 | * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 | ||
795 | * bits to the right of the binary point. | ||
796 | * This function has no side-effects. | ||
797 | */ | ||
798 | u64 current_tick_length(void) | ||
799 | { | ||
800 | long delta_nsec; | ||
801 | |||
802 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; | ||
803 | return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; | ||
804 | } | ||
805 | |||
806 | /* | ||
762 | * Using a loop looks inefficient, but "ticks" is | 807 | * Using a loop looks inefficient, but "ticks" is |
763 | * usually just one (we shouldn't be losing ticks, | 808 | * usually just one (we shouldn't be losing ticks, |
764 | * we're doing this this way mainly for interrupt | 809 | * we're doing this this way mainly for interrupt |
@@ -896,6 +941,8 @@ static inline void update_times(void) | |||
896 | void do_timer(struct pt_regs *regs) | 941 | void do_timer(struct pt_regs *regs) |
897 | { | 942 | { |
898 | jiffies_64++; | 943 | jiffies_64++; |
944 | /* prevent loading jiffies before storing new jiffies_64 value. */ | ||
945 | barrier(); | ||
899 | update_times(); | 946 | update_times(); |
900 | softlockup_tick(regs); | 947 | softlockup_tick(regs); |
901 | } | 948 | } |
@@ -1322,10 +1369,10 @@ static inline u64 time_interpolator_get_cycles(unsigned int src) | |||
1322 | return x(); | 1369 | return x(); |
1323 | 1370 | ||
1324 | case TIME_SOURCE_MMIO64 : | 1371 | case TIME_SOURCE_MMIO64 : |
1325 | return readq((void __iomem *) time_interpolator->addr); | 1372 | return readq_relaxed((void __iomem *)time_interpolator->addr); |
1326 | 1373 | ||
1327 | case TIME_SOURCE_MMIO32 : | 1374 | case TIME_SOURCE_MMIO32 : |
1328 | return readl((void __iomem *) time_interpolator->addr); | 1375 | return readl_relaxed((void __iomem *)time_interpolator->addr); |
1329 | 1376 | ||
1330 | default: return get_cycles(); | 1377 | default: return get_cycles(); |
1331 | } | 1378 | } |
diff --git a/kernel/user.c b/kernel/user.c index 89e562feb1..d9deae43a9 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
15 | #include <linux/key.h> | 15 | #include <linux/key.h> |
16 | #include <linux/interrupt.h> | ||
16 | 17 | ||
17 | /* | 18 | /* |
18 | * UID task count cache, to get fast user lookup in "alloc_uid" | 19 | * UID task count cache, to get fast user lookup in "alloc_uid" |
@@ -27,6 +28,16 @@ | |||
27 | 28 | ||
28 | static kmem_cache_t *uid_cachep; | 29 | static kmem_cache_t *uid_cachep; |
29 | static struct list_head uidhash_table[UIDHASH_SZ]; | 30 | static struct list_head uidhash_table[UIDHASH_SZ]; |
31 | |||
32 | /* | ||
33 | * The uidhash_lock is mostly taken from process context, but it is | ||
34 | * occasionally also taken from softirq/tasklet context, when | ||
35 | * task-structs get RCU-freed. Hence all locking must be softirq-safe. | ||
36 | * But free_uid() is also called with local interrupts disabled, and running | ||
37 | * local_bh_enable() with local interrupts disabled is an error - we'll run | ||
38 | * softirq callbacks, and they can unconditionally enable interrupts, and | ||
39 | * the caller of free_uid() didn't expect that.. | ||
40 | */ | ||
30 | static DEFINE_SPINLOCK(uidhash_lock); | 41 | static DEFINE_SPINLOCK(uidhash_lock); |
31 | 42 | ||
32 | struct user_struct root_user = { | 43 | struct user_struct root_user = { |
@@ -82,15 +93,19 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has | |||
82 | struct user_struct *find_user(uid_t uid) | 93 | struct user_struct *find_user(uid_t uid) |
83 | { | 94 | { |
84 | struct user_struct *ret; | 95 | struct user_struct *ret; |
96 | unsigned long flags; | ||
85 | 97 | ||
86 | spin_lock(&uidhash_lock); | 98 | spin_lock_irqsave(&uidhash_lock, flags); |
87 | ret = uid_hash_find(uid, uidhashentry(uid)); | 99 | ret = uid_hash_find(uid, uidhashentry(uid)); |
88 | spin_unlock(&uidhash_lock); | 100 | spin_unlock_irqrestore(&uidhash_lock, flags); |
89 | return ret; | 101 | return ret; |
90 | } | 102 | } |
91 | 103 | ||
92 | void free_uid(struct user_struct *up) | 104 | void free_uid(struct user_struct *up) |
93 | { | 105 | { |
106 | unsigned long flags; | ||
107 | |||
108 | local_irq_save(flags); | ||
94 | if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { | 109 | if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { |
95 | uid_hash_remove(up); | 110 | uid_hash_remove(up); |
96 | key_put(up->uid_keyring); | 111 | key_put(up->uid_keyring); |
@@ -98,6 +113,7 @@ void free_uid(struct user_struct *up) | |||
98 | kmem_cache_free(uid_cachep, up); | 113 | kmem_cache_free(uid_cachep, up); |
99 | spin_unlock(&uidhash_lock); | 114 | spin_unlock(&uidhash_lock); |
100 | } | 115 | } |
116 | local_irq_restore(flags); | ||
101 | } | 117 | } |
102 | 118 | ||
103 | struct user_struct * alloc_uid(uid_t uid) | 119 | struct user_struct * alloc_uid(uid_t uid) |
@@ -105,9 +121,9 @@ struct user_struct * alloc_uid(uid_t uid) | |||
105 | struct list_head *hashent = uidhashentry(uid); | 121 | struct list_head *hashent = uidhashentry(uid); |
106 | struct user_struct *up; | 122 | struct user_struct *up; |
107 | 123 | ||
108 | spin_lock(&uidhash_lock); | 124 | spin_lock_irq(&uidhash_lock); |
109 | up = uid_hash_find(uid, hashent); | 125 | up = uid_hash_find(uid, hashent); |
110 | spin_unlock(&uidhash_lock); | 126 | spin_unlock_irq(&uidhash_lock); |
111 | 127 | ||
112 | if (!up) { | 128 | if (!up) { |
113 | struct user_struct *new; | 129 | struct user_struct *new; |
@@ -137,7 +153,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
137 | * Before adding this, check whether we raced | 153 | * Before adding this, check whether we raced |
138 | * on adding the same user already.. | 154 | * on adding the same user already.. |
139 | */ | 155 | */ |
140 | spin_lock(&uidhash_lock); | 156 | spin_lock_irq(&uidhash_lock); |
141 | up = uid_hash_find(uid, hashent); | 157 | up = uid_hash_find(uid, hashent); |
142 | if (up) { | 158 | if (up) { |
143 | key_put(new->uid_keyring); | 159 | key_put(new->uid_keyring); |
@@ -147,7 +163,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
147 | uid_hash_insert(new, hashent); | 163 | uid_hash_insert(new, hashent); |
148 | up = new; | 164 | up = new; |
149 | } | 165 | } |
150 | spin_unlock(&uidhash_lock); | 166 | spin_unlock_irq(&uidhash_lock); |
151 | 167 | ||
152 | } | 168 | } |
153 | return up; | 169 | return up; |
@@ -183,9 +199,9 @@ static int __init uid_cache_init(void) | |||
183 | INIT_LIST_HEAD(uidhash_table + n); | 199 | INIT_LIST_HEAD(uidhash_table + n); |
184 | 200 | ||
185 | /* Insert the root user immediately (init already runs as root) */ | 201 | /* Insert the root user immediately (init already runs as root) */ |
186 | spin_lock(&uidhash_lock); | 202 | spin_lock_irq(&uidhash_lock); |
187 | uid_hash_insert(&root_user, uidhashentry(0)); | 203 | uid_hash_insert(&root_user, uidhashentry(0)); |
188 | spin_unlock(&uidhash_lock); | 204 | spin_unlock_irq(&uidhash_lock); |
189 | 205 | ||
190 | return 0; | 206 | return 0; |
191 | } | 207 | } |