diff options
Diffstat (limited to 'kernel')
39 files changed, 686 insertions, 287 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 642d4277c2ea..2a999836ca18 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -4,11 +4,12 @@ | |||
4 | 4 | ||
5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ |
6 | exit.o itimer.o time.o softirq.o resource.o \ | 6 | exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o | 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ |
12 | utsname.o | ||
12 | 13 | ||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 14 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
14 | obj-y += time/ | 15 | obj-y += time/ |
@@ -48,7 +49,6 @@ obj-$(CONFIG_SECCOMP) += seccomp.o | |||
48 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
49 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
50 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 51 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
51 | obj-$(CONFIG_UTS_NS) += utsname.o | ||
52 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 52 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
53 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 53 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
54 | 54 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index d13276d41410..5ce8851facf7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <linux/selinux.h> | 58 | #include <linux/selinux.h> |
59 | #include <linux/inotify.h> | 59 | #include <linux/inotify.h> |
60 | #include <linux/freezer.h> | 60 | #include <linux/freezer.h> |
61 | #include <linux/tty.h> | ||
61 | 62 | ||
62 | #include "audit.h" | 63 | #include "audit.h" |
63 | 64 | ||
@@ -423,6 +424,31 @@ static int kauditd_thread(void *dummy) | |||
423 | return 0; | 424 | return 0; |
424 | } | 425 | } |
425 | 426 | ||
427 | static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) | ||
428 | { | ||
429 | struct task_struct *tsk; | ||
430 | int err; | ||
431 | |||
432 | read_lock(&tasklist_lock); | ||
433 | tsk = find_task_by_pid(pid); | ||
434 | err = -ESRCH; | ||
435 | if (!tsk) | ||
436 | goto out; | ||
437 | err = 0; | ||
438 | |||
439 | spin_lock_irq(&tsk->sighand->siglock); | ||
440 | if (!tsk->signal->audit_tty) | ||
441 | err = -EPERM; | ||
442 | spin_unlock_irq(&tsk->sighand->siglock); | ||
443 | if (err) | ||
444 | goto out; | ||
445 | |||
446 | tty_audit_push_task(tsk, loginuid); | ||
447 | out: | ||
448 | read_unlock(&tasklist_lock); | ||
449 | return err; | ||
450 | } | ||
451 | |||
426 | int audit_send_list(void *_dest) | 452 | int audit_send_list(void *_dest) |
427 | { | 453 | { |
428 | struct audit_netlink_list *dest = _dest; | 454 | struct audit_netlink_list *dest = _dest; |
@@ -511,6 +537,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
511 | case AUDIT_DEL: | 537 | case AUDIT_DEL: |
512 | case AUDIT_DEL_RULE: | 538 | case AUDIT_DEL_RULE: |
513 | case AUDIT_SIGNAL_INFO: | 539 | case AUDIT_SIGNAL_INFO: |
540 | case AUDIT_TTY_GET: | ||
541 | case AUDIT_TTY_SET: | ||
514 | if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) | 542 | if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) |
515 | err = -EPERM; | 543 | err = -EPERM; |
516 | break; | 544 | break; |
@@ -622,6 +650,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
622 | err = audit_filter_user(&NETLINK_CB(skb), msg_type); | 650 | err = audit_filter_user(&NETLINK_CB(skb), msg_type); |
623 | if (err == 1) { | 651 | if (err == 1) { |
624 | err = 0; | 652 | err = 0; |
653 | if (msg_type == AUDIT_USER_TTY) { | ||
654 | err = audit_prepare_user_tty(pid, loginuid); | ||
655 | if (err) | ||
656 | break; | ||
657 | } | ||
625 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 658 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
626 | if (ab) { | 659 | if (ab) { |
627 | audit_log_format(ab, | 660 | audit_log_format(ab, |
@@ -638,8 +671,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
638 | " subj=%s", ctx); | 671 | " subj=%s", ctx); |
639 | kfree(ctx); | 672 | kfree(ctx); |
640 | } | 673 | } |
641 | audit_log_format(ab, " msg='%.1024s'", | 674 | if (msg_type != AUDIT_USER_TTY) |
642 | (char *)data); | 675 | audit_log_format(ab, " msg='%.1024s'", |
676 | (char *)data); | ||
677 | else { | ||
678 | int size; | ||
679 | |||
680 | audit_log_format(ab, " msg="); | ||
681 | size = nlmsg_len(nlh); | ||
682 | audit_log_n_untrustedstring(ab, size, | ||
683 | data); | ||
684 | } | ||
643 | audit_set_pid(ab, pid); | 685 | audit_set_pid(ab, pid); |
644 | audit_log_end(ab); | 686 | audit_log_end(ab); |
645 | } | 687 | } |
@@ -730,6 +772,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
730 | 0, 0, sig_data, sizeof(*sig_data) + len); | 772 | 0, 0, sig_data, sizeof(*sig_data) + len); |
731 | kfree(sig_data); | 773 | kfree(sig_data); |
732 | break; | 774 | break; |
775 | case AUDIT_TTY_GET: { | ||
776 | struct audit_tty_status s; | ||
777 | struct task_struct *tsk; | ||
778 | |||
779 | read_lock(&tasklist_lock); | ||
780 | tsk = find_task_by_pid(pid); | ||
781 | if (!tsk) | ||
782 | err = -ESRCH; | ||
783 | else { | ||
784 | spin_lock_irq(&tsk->sighand->siglock); | ||
785 | s.enabled = tsk->signal->audit_tty != 0; | ||
786 | spin_unlock_irq(&tsk->sighand->siglock); | ||
787 | } | ||
788 | read_unlock(&tasklist_lock); | ||
789 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, | ||
790 | &s, sizeof(s)); | ||
791 | break; | ||
792 | } | ||
793 | case AUDIT_TTY_SET: { | ||
794 | struct audit_tty_status *s; | ||
795 | struct task_struct *tsk; | ||
796 | |||
797 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) | ||
798 | return -EINVAL; | ||
799 | s = data; | ||
800 | if (s->enabled != 0 && s->enabled != 1) | ||
801 | return -EINVAL; | ||
802 | read_lock(&tasklist_lock); | ||
803 | tsk = find_task_by_pid(pid); | ||
804 | if (!tsk) | ||
805 | err = -ESRCH; | ||
806 | else { | ||
807 | spin_lock_irq(&tsk->sighand->siglock); | ||
808 | tsk->signal->audit_tty = s->enabled != 0; | ||
809 | spin_unlock_irq(&tsk->sighand->siglock); | ||
810 | } | ||
811 | read_unlock(&tasklist_lock); | ||
812 | break; | ||
813 | } | ||
733 | default: | 814 | default: |
734 | err = -EINVAL; | 815 | err = -EINVAL; |
735 | break; | 816 | break; |
@@ -1185,7 +1266,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
1185 | } | 1266 | } |
1186 | 1267 | ||
1187 | /** | 1268 | /** |
1188 | * audit_log_n_unstrustedstring - log a string that may contain random characters | 1269 | * audit_log_n_untrustedstring - log a string that may contain random characters |
1189 | * @ab: audit_buffer | 1270 | * @ab: audit_buffer |
1190 | * @len: lenth of string (not including trailing null) | 1271 | * @len: lenth of string (not including trailing null) |
1191 | * @string: string to be logged | 1272 | * @string: string to be logged |
@@ -1201,25 +1282,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
1201 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, | 1282 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, |
1202 | const char *string) | 1283 | const char *string) |
1203 | { | 1284 | { |
1204 | const unsigned char *p = string; | 1285 | const unsigned char *p; |
1205 | 1286 | ||
1206 | while (*p) { | 1287 | for (p = string; p < (const unsigned char *)string + len && *p; p++) { |
1207 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { | 1288 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { |
1208 | audit_log_hex(ab, string, len); | 1289 | audit_log_hex(ab, string, len); |
1209 | return string + len + 1; | 1290 | return string + len + 1; |
1210 | } | 1291 | } |
1211 | p++; | ||
1212 | } | 1292 | } |
1213 | audit_log_n_string(ab, len, string); | 1293 | audit_log_n_string(ab, len, string); |
1214 | return p + 1; | 1294 | return p + 1; |
1215 | } | 1295 | } |
1216 | 1296 | ||
1217 | /** | 1297 | /** |
1218 | * audit_log_unstrustedstring - log a string that may contain random characters | 1298 | * audit_log_untrustedstring - log a string that may contain random characters |
1219 | * @ab: audit_buffer | 1299 | * @ab: audit_buffer |
1220 | * @string: string to be logged | 1300 | * @string: string to be logged |
1221 | * | 1301 | * |
1222 | * Same as audit_log_n_unstrustedstring(), except that strlen is used to | 1302 | * Same as audit_log_n_untrustedstring(), except that strlen is used to |
1223 | * determine string length. | 1303 | * determine string length. |
1224 | */ | 1304 | */ |
1225 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | 1305 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) |
diff --git a/kernel/audit.h b/kernel/audit.h index 815d6f5c04ee..95877435c347 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | |||
115 | extern void audit_send_reply(int pid, int seq, int type, | 115 | extern void audit_send_reply(int pid, int seq, int type, |
116 | int done, int multi, | 116 | int done, int multi, |
117 | void *payload, int size); | 117 | void *payload, int size); |
118 | extern void audit_log_lost(const char *message); | ||
119 | extern void audit_panic(const char *message); | 118 | extern void audit_panic(const char *message); |
120 | 119 | ||
121 | struct audit_netlink_list { | 120 | struct audit_netlink_list { |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e36481ed61b4..b7640a5f382a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -71,9 +71,6 @@ | |||
71 | 71 | ||
72 | extern struct list_head audit_filter_list[]; | 72 | extern struct list_head audit_filter_list[]; |
73 | 73 | ||
74 | /* No syscall auditing will take place unless audit_enabled != 0. */ | ||
75 | extern int audit_enabled; | ||
76 | |||
77 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context | 74 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context |
78 | * for saving names from getname(). */ | 75 | * for saving names from getname(). */ |
79 | #define AUDIT_NAMES 20 | 76 | #define AUDIT_NAMES 20 |
@@ -2040,7 +2037,7 @@ int __audit_signal_info(int sig, struct task_struct *t) | |||
2040 | 2037 | ||
2041 | /** | 2038 | /** |
2042 | * audit_core_dumps - record information about processes that end abnormally | 2039 | * audit_core_dumps - record information about processes that end abnormally |
2043 | * @sig: signal value | 2040 | * @signr: signal value |
2044 | * | 2041 | * |
2045 | * If a process ends with a core dump, something fishy is going on and we | 2042 | * If a process ends with a core dump, something fishy is going on and we |
2046 | * should record the event for investigation. | 2043 | * should record the event for investigation. |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4c49188cc49b..824b1c01f410 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -981,10 +981,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
981 | mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); | 981 | mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); |
982 | if (!mmarray) | 982 | if (!mmarray) |
983 | goto done; | 983 | goto done; |
984 | write_lock_irq(&tasklist_lock); /* block fork */ | 984 | read_lock(&tasklist_lock); /* block fork */ |
985 | if (atomic_read(&cs->count) <= ntasks) | 985 | if (atomic_read(&cs->count) <= ntasks) |
986 | break; /* got enough */ | 986 | break; /* got enough */ |
987 | write_unlock_irq(&tasklist_lock); /* try again */ | 987 | read_unlock(&tasklist_lock); /* try again */ |
988 | kfree(mmarray); | 988 | kfree(mmarray); |
989 | } | 989 | } |
990 | 990 | ||
@@ -1006,7 +1006,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
1006 | continue; | 1006 | continue; |
1007 | mmarray[n++] = mm; | 1007 | mmarray[n++] = mm; |
1008 | } while_each_thread(g, p); | 1008 | } while_each_thread(g, p); |
1009 | write_unlock_irq(&tasklist_lock); | 1009 | read_unlock(&tasklist_lock); |
1010 | 1010 | ||
1011 | /* | 1011 | /* |
1012 | * Now that we've dropped the tasklist spinlock, we can | 1012 | * Now that we've dropped the tasklist spinlock, we can |
diff --git a/kernel/exit.c b/kernel/exit.c index ca6a11b73023..57626692cd90 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -858,6 +858,34 @@ static void exit_notify(struct task_struct *tsk) | |||
858 | release_task(tsk); | 858 | release_task(tsk); |
859 | } | 859 | } |
860 | 860 | ||
861 | #ifdef CONFIG_DEBUG_STACK_USAGE | ||
862 | static void check_stack_usage(void) | ||
863 | { | ||
864 | static DEFINE_SPINLOCK(low_water_lock); | ||
865 | static int lowest_to_date = THREAD_SIZE; | ||
866 | unsigned long *n = end_of_stack(current); | ||
867 | unsigned long free; | ||
868 | |||
869 | while (*n == 0) | ||
870 | n++; | ||
871 | free = (unsigned long)n - (unsigned long)end_of_stack(current); | ||
872 | |||
873 | if (free >= lowest_to_date) | ||
874 | return; | ||
875 | |||
876 | spin_lock(&low_water_lock); | ||
877 | if (free < lowest_to_date) { | ||
878 | printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " | ||
879 | "left\n", | ||
880 | current->comm, free); | ||
881 | lowest_to_date = free; | ||
882 | } | ||
883 | spin_unlock(&low_water_lock); | ||
884 | } | ||
885 | #else | ||
886 | static inline void check_stack_usage(void) {} | ||
887 | #endif | ||
888 | |||
861 | fastcall NORET_TYPE void do_exit(long code) | 889 | fastcall NORET_TYPE void do_exit(long code) |
862 | { | 890 | { |
863 | struct task_struct *tsk = current; | 891 | struct task_struct *tsk = current; |
@@ -937,6 +965,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
937 | if (unlikely(tsk->compat_robust_list)) | 965 | if (unlikely(tsk->compat_robust_list)) |
938 | compat_exit_robust_list(tsk); | 966 | compat_exit_robust_list(tsk); |
939 | #endif | 967 | #endif |
968 | if (group_dead) | ||
969 | tty_audit_exit(); | ||
940 | if (unlikely(tsk->audit_context)) | 970 | if (unlikely(tsk->audit_context)) |
941 | audit_free(tsk); | 971 | audit_free(tsk); |
942 | 972 | ||
@@ -949,6 +979,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
949 | exit_sem(tsk); | 979 | exit_sem(tsk); |
950 | __exit_files(tsk); | 980 | __exit_files(tsk); |
951 | __exit_fs(tsk); | 981 | __exit_fs(tsk); |
982 | check_stack_usage(); | ||
952 | exit_thread(); | 983 | exit_thread(); |
953 | cpuset_exit(tsk); | 984 | cpuset_exit(tsk); |
954 | exit_keys(tsk); | 985 | exit_keys(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index da3a155bba0d..7c5c5888e00a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/delayacct.h> | 49 | #include <linux/delayacct.h> |
50 | #include <linux/taskstats_kern.h> | 50 | #include <linux/taskstats_kern.h> |
51 | #include <linux/random.h> | 51 | #include <linux/random.h> |
52 | #include <linux/tty.h> | ||
52 | 53 | ||
53 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
54 | #include <asm/pgalloc.h> | 55 | #include <asm/pgalloc.h> |
@@ -897,6 +898,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
897 | } | 898 | } |
898 | acct_init_pacct(&sig->pacct); | 899 | acct_init_pacct(&sig->pacct); |
899 | 900 | ||
901 | tty_audit_fork(sig); | ||
902 | |||
900 | return 0; | 903 | return 0; |
901 | } | 904 | } |
902 | 905 | ||
@@ -999,7 +1002,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
999 | if (atomic_read(&p->user->processes) >= | 1002 | if (atomic_read(&p->user->processes) >= |
1000 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1003 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { |
1001 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1004 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
1002 | p->user != &root_user) | 1005 | p->user != current->nsproxy->user_ns->root_user) |
1003 | goto bad_fork_free; | 1006 | goto bad_fork_free; |
1004 | } | 1007 | } |
1005 | 1008 | ||
@@ -1059,6 +1062,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1059 | 1062 | ||
1060 | p->lock_depth = -1; /* -1 = no lock */ | 1063 | p->lock_depth = -1; /* -1 = no lock */ |
1061 | do_posix_clock_monotonic_gettime(&p->start_time); | 1064 | do_posix_clock_monotonic_gettime(&p->start_time); |
1065 | p->real_start_time = p->start_time; | ||
1066 | monotonic_to_bootbased(&p->real_start_time); | ||
1062 | p->security = NULL; | 1067 | p->security = NULL; |
1063 | p->io_context = NULL; | 1068 | p->io_context = NULL; |
1064 | p->io_wait = NULL; | 1069 | p->io_wait = NULL; |
@@ -1601,7 +1606,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1601 | err = -EINVAL; | 1606 | err = -EINVAL; |
1602 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1607 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1603 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1608 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1604 | CLONE_NEWUTS|CLONE_NEWIPC)) | 1609 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) |
1605 | goto bad_unshare_out; | 1610 | goto bad_unshare_out; |
1606 | 1611 | ||
1607 | if ((err = unshare_thread(unshare_flags))) | 1612 | if ((err = unshare_thread(unshare_flags))) |
diff --git a/kernel/futex.c b/kernel/futex.c index 45490bec5831..5c3f45d07c53 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -121,6 +121,24 @@ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; | |||
121 | static struct vfsmount *futex_mnt; | 121 | static struct vfsmount *futex_mnt; |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * Take mm->mmap_sem, when futex is shared | ||
125 | */ | ||
126 | static inline void futex_lock_mm(struct rw_semaphore *fshared) | ||
127 | { | ||
128 | if (fshared) | ||
129 | down_read(fshared); | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Release mm->mmap_sem, when the futex is shared | ||
134 | */ | ||
135 | static inline void futex_unlock_mm(struct rw_semaphore *fshared) | ||
136 | { | ||
137 | if (fshared) | ||
138 | up_read(fshared); | ||
139 | } | ||
140 | |||
141 | /* | ||
124 | * We hash on the keys returned from get_futex_key (see below). | 142 | * We hash on the keys returned from get_futex_key (see below). |
125 | */ | 143 | */ |
126 | static struct futex_hash_bucket *hash_futex(union futex_key *key) | 144 | static struct futex_hash_bucket *hash_futex(union futex_key *key) |
@@ -287,7 +305,18 @@ void drop_futex_key_refs(union futex_key *key) | |||
287 | } | 305 | } |
288 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); | 306 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); |
289 | 307 | ||
290 | static inline int get_futex_value_locked(u32 *dest, u32 __user *from) | 308 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) |
309 | { | ||
310 | u32 curval; | ||
311 | |||
312 | pagefault_disable(); | ||
313 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
314 | pagefault_enable(); | ||
315 | |||
316 | return curval; | ||
317 | } | ||
318 | |||
319 | static int get_futex_value_locked(u32 *dest, u32 __user *from) | ||
291 | { | 320 | { |
292 | int ret; | 321 | int ret; |
293 | 322 | ||
@@ -620,9 +649,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
620 | 649 | ||
621 | newval = FUTEX_WAITERS | new_owner->pid; | 650 | newval = FUTEX_WAITERS | new_owner->pid; |
622 | 651 | ||
623 | pagefault_disable(); | 652 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
624 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
625 | pagefault_enable(); | ||
626 | 653 | ||
627 | if (curval == -EFAULT) | 654 | if (curval == -EFAULT) |
628 | ret = -EFAULT; | 655 | ret = -EFAULT; |
@@ -659,9 +686,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) | |||
659 | * There is no waiter, so we unlock the futex. The owner died | 686 | * There is no waiter, so we unlock the futex. The owner died |
660 | * bit has not to be preserved here. We are the owner: | 687 | * bit has not to be preserved here. We are the owner: |
661 | */ | 688 | */ |
662 | pagefault_disable(); | 689 | oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); |
663 | oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); | ||
664 | pagefault_enable(); | ||
665 | 690 | ||
666 | if (oldval == -EFAULT) | 691 | if (oldval == -EFAULT) |
667 | return oldval; | 692 | return oldval; |
@@ -700,8 +725,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
700 | union futex_key key; | 725 | union futex_key key; |
701 | int ret; | 726 | int ret; |
702 | 727 | ||
703 | if (fshared) | 728 | futex_lock_mm(fshared); |
704 | down_read(fshared); | ||
705 | 729 | ||
706 | ret = get_futex_key(uaddr, fshared, &key); | 730 | ret = get_futex_key(uaddr, fshared, &key); |
707 | if (unlikely(ret != 0)) | 731 | if (unlikely(ret != 0)) |
@@ -725,8 +749,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
725 | 749 | ||
726 | spin_unlock(&hb->lock); | 750 | spin_unlock(&hb->lock); |
727 | out: | 751 | out: |
728 | if (fshared) | 752 | futex_unlock_mm(fshared); |
729 | up_read(fshared); | ||
730 | return ret; | 753 | return ret; |
731 | } | 754 | } |
732 | 755 | ||
@@ -746,8 +769,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
746 | int ret, op_ret, attempt = 0; | 769 | int ret, op_ret, attempt = 0; |
747 | 770 | ||
748 | retryfull: | 771 | retryfull: |
749 | if (fshared) | 772 | futex_lock_mm(fshared); |
750 | down_read(fshared); | ||
751 | 773 | ||
752 | ret = get_futex_key(uaddr1, fshared, &key1); | 774 | ret = get_futex_key(uaddr1, fshared, &key1); |
753 | if (unlikely(ret != 0)) | 775 | if (unlikely(ret != 0)) |
@@ -793,7 +815,7 @@ retry: | |||
793 | */ | 815 | */ |
794 | if (attempt++) { | 816 | if (attempt++) { |
795 | ret = futex_handle_fault((unsigned long)uaddr2, | 817 | ret = futex_handle_fault((unsigned long)uaddr2, |
796 | fshared, attempt); | 818 | fshared, attempt); |
797 | if (ret) | 819 | if (ret) |
798 | goto out; | 820 | goto out; |
799 | goto retry; | 821 | goto retry; |
@@ -803,8 +825,7 @@ retry: | |||
803 | * If we would have faulted, release mmap_sem, | 825 | * If we would have faulted, release mmap_sem, |
804 | * fault it in and start all over again. | 826 | * fault it in and start all over again. |
805 | */ | 827 | */ |
806 | if (fshared) | 828 | futex_unlock_mm(fshared); |
807 | up_read(fshared); | ||
808 | 829 | ||
809 | ret = get_user(dummy, uaddr2); | 830 | ret = get_user(dummy, uaddr2); |
810 | if (ret) | 831 | if (ret) |
@@ -841,8 +862,8 @@ retry: | |||
841 | if (hb1 != hb2) | 862 | if (hb1 != hb2) |
842 | spin_unlock(&hb2->lock); | 863 | spin_unlock(&hb2->lock); |
843 | out: | 864 | out: |
844 | if (fshared) | 865 | futex_unlock_mm(fshared); |
845 | up_read(fshared); | 866 | |
846 | return ret; | 867 | return ret; |
847 | } | 868 | } |
848 | 869 | ||
@@ -861,8 +882,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
861 | int ret, drop_count = 0; | 882 | int ret, drop_count = 0; |
862 | 883 | ||
863 | retry: | 884 | retry: |
864 | if (fshared) | 885 | futex_lock_mm(fshared); |
865 | down_read(fshared); | ||
866 | 886 | ||
867 | ret = get_futex_key(uaddr1, fshared, &key1); | 887 | ret = get_futex_key(uaddr1, fshared, &key1); |
868 | if (unlikely(ret != 0)) | 888 | if (unlikely(ret != 0)) |
@@ -890,8 +910,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
890 | * If we would have faulted, release mmap_sem, fault | 910 | * If we would have faulted, release mmap_sem, fault |
891 | * it in and start all over again. | 911 | * it in and start all over again. |
892 | */ | 912 | */ |
893 | if (fshared) | 913 | futex_unlock_mm(fshared); |
894 | up_read(fshared); | ||
895 | 914 | ||
896 | ret = get_user(curval, uaddr1); | 915 | ret = get_user(curval, uaddr1); |
897 | 916 | ||
@@ -944,8 +963,7 @@ out_unlock: | |||
944 | drop_futex_key_refs(&key1); | 963 | drop_futex_key_refs(&key1); |
945 | 964 | ||
946 | out: | 965 | out: |
947 | if (fshared) | 966 | futex_unlock_mm(fshared); |
948 | up_read(fshared); | ||
949 | return ret; | 967 | return ret; |
950 | } | 968 | } |
951 | 969 | ||
@@ -1113,10 +1131,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
1113 | while (!ret) { | 1131 | while (!ret) { |
1114 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | 1132 | newval = (uval & FUTEX_OWNER_DIED) | newtid; |
1115 | 1133 | ||
1116 | pagefault_disable(); | 1134 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
1117 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | ||
1118 | uval, newval); | ||
1119 | pagefault_enable(); | ||
1120 | 1135 | ||
1121 | if (curval == -EFAULT) | 1136 | if (curval == -EFAULT) |
1122 | ret = -EFAULT; | 1137 | ret = -EFAULT; |
@@ -1134,6 +1149,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
1134 | #define ARG3_SHARED 1 | 1149 | #define ARG3_SHARED 1 |
1135 | 1150 | ||
1136 | static long futex_wait_restart(struct restart_block *restart); | 1151 | static long futex_wait_restart(struct restart_block *restart); |
1152 | |||
1137 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | 1153 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, |
1138 | u32 val, ktime_t *abs_time) | 1154 | u32 val, ktime_t *abs_time) |
1139 | { | 1155 | { |
@@ -1148,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1148 | 1164 | ||
1149 | q.pi_state = NULL; | 1165 | q.pi_state = NULL; |
1150 | retry: | 1166 | retry: |
1151 | if (fshared) | 1167 | futex_lock_mm(fshared); |
1152 | down_read(fshared); | ||
1153 | 1168 | ||
1154 | ret = get_futex_key(uaddr, fshared, &q.key); | 1169 | ret = get_futex_key(uaddr, fshared, &q.key); |
1155 | if (unlikely(ret != 0)) | 1170 | if (unlikely(ret != 0)) |
@@ -1186,8 +1201,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1186 | * If we would have faulted, release mmap_sem, fault it in and | 1201 | * If we would have faulted, release mmap_sem, fault it in and |
1187 | * start all over again. | 1202 | * start all over again. |
1188 | */ | 1203 | */ |
1189 | if (fshared) | 1204 | futex_unlock_mm(fshared); |
1190 | up_read(fshared); | ||
1191 | 1205 | ||
1192 | ret = get_user(uval, uaddr); | 1206 | ret = get_user(uval, uaddr); |
1193 | 1207 | ||
@@ -1206,8 +1220,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1206 | * Now the futex is queued and we have checked the data, we | 1220 | * Now the futex is queued and we have checked the data, we |
1207 | * don't want to hold mmap_sem while we sleep. | 1221 | * don't want to hold mmap_sem while we sleep. |
1208 | */ | 1222 | */ |
1209 | if (fshared) | 1223 | futex_unlock_mm(fshared); |
1210 | up_read(fshared); | ||
1211 | 1224 | ||
1212 | /* | 1225 | /* |
1213 | * There might have been scheduling since the queue_me(), as we | 1226 | * There might have been scheduling since the queue_me(), as we |
@@ -1285,8 +1298,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1285 | queue_unlock(&q, hb); | 1298 | queue_unlock(&q, hb); |
1286 | 1299 | ||
1287 | out_release_sem: | 1300 | out_release_sem: |
1288 | if (fshared) | 1301 | futex_unlock_mm(fshared); |
1289 | up_read(fshared); | ||
1290 | return ret; | 1302 | return ret; |
1291 | } | 1303 | } |
1292 | 1304 | ||
@@ -1333,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1333 | 1345 | ||
1334 | q.pi_state = NULL; | 1346 | q.pi_state = NULL; |
1335 | retry: | 1347 | retry: |
1336 | if (fshared) | 1348 | futex_lock_mm(fshared); |
1337 | down_read(fshared); | ||
1338 | 1349 | ||
1339 | ret = get_futex_key(uaddr, fshared, &q.key); | 1350 | ret = get_futex_key(uaddr, fshared, &q.key); |
1340 | if (unlikely(ret != 0)) | 1351 | if (unlikely(ret != 0)) |
@@ -1353,9 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1353 | */ | 1364 | */ |
1354 | newval = current->pid; | 1365 | newval = current->pid; |
1355 | 1366 | ||
1356 | pagefault_disable(); | 1367 | curval = cmpxchg_futex_value_locked(uaddr, 0, newval); |
1357 | curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); | ||
1358 | pagefault_enable(); | ||
1359 | 1368 | ||
1360 | if (unlikely(curval == -EFAULT)) | 1369 | if (unlikely(curval == -EFAULT)) |
1361 | goto uaddr_faulted; | 1370 | goto uaddr_faulted; |
@@ -1398,9 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1398 | lock_taken = 1; | 1407 | lock_taken = 1; |
1399 | } | 1408 | } |
1400 | 1409 | ||
1401 | pagefault_disable(); | 1410 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
1402 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
1403 | pagefault_enable(); | ||
1404 | 1411 | ||
1405 | if (unlikely(curval == -EFAULT)) | 1412 | if (unlikely(curval == -EFAULT)) |
1406 | goto uaddr_faulted; | 1413 | goto uaddr_faulted; |
@@ -1428,8 +1435,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1428 | * exit to complete. | 1435 | * exit to complete. |
1429 | */ | 1436 | */ |
1430 | queue_unlock(&q, hb); | 1437 | queue_unlock(&q, hb); |
1431 | if (fshared) | 1438 | futex_unlock_mm(fshared); |
1432 | up_read(fshared); | ||
1433 | cond_resched(); | 1439 | cond_resched(); |
1434 | goto retry; | 1440 | goto retry; |
1435 | 1441 | ||
@@ -1465,8 +1471,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1465 | * Now the futex is queued and we have checked the data, we | 1471 | * Now the futex is queued and we have checked the data, we |
1466 | * don't want to hold mmap_sem while we sleep. | 1472 | * don't want to hold mmap_sem while we sleep. |
1467 | */ | 1473 | */ |
1468 | if (fshared) | 1474 | futex_unlock_mm(fshared); |
1469 | up_read(fshared); | ||
1470 | 1475 | ||
1471 | WARN_ON(!q.pi_state); | 1476 | WARN_ON(!q.pi_state); |
1472 | /* | 1477 | /* |
@@ -1480,8 +1485,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1480 | ret = ret ? 0 : -EWOULDBLOCK; | 1485 | ret = ret ? 0 : -EWOULDBLOCK; |
1481 | } | 1486 | } |
1482 | 1487 | ||
1483 | if (fshared) | 1488 | futex_lock_mm(fshared); |
1484 | down_read(fshared); | ||
1485 | spin_lock(q.lock_ptr); | 1489 | spin_lock(q.lock_ptr); |
1486 | 1490 | ||
1487 | if (!ret) { | 1491 | if (!ret) { |
@@ -1518,8 +1522,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1518 | 1522 | ||
1519 | /* Unqueue and drop the lock */ | 1523 | /* Unqueue and drop the lock */ |
1520 | unqueue_me_pi(&q); | 1524 | unqueue_me_pi(&q); |
1521 | if (fshared) | 1525 | futex_unlock_mm(fshared); |
1522 | up_read(fshared); | ||
1523 | 1526 | ||
1524 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | 1527 | return ret != -EINTR ? ret : -ERESTARTNOINTR; |
1525 | 1528 | ||
@@ -1527,8 +1530,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1527 | queue_unlock(&q, hb); | 1530 | queue_unlock(&q, hb); |
1528 | 1531 | ||
1529 | out_release_sem: | 1532 | out_release_sem: |
1530 | if (fshared) | 1533 | futex_unlock_mm(fshared); |
1531 | up_read(fshared); | ||
1532 | return ret; | 1534 | return ret; |
1533 | 1535 | ||
1534 | uaddr_faulted: | 1536 | uaddr_faulted: |
@@ -1550,8 +1552,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1550 | goto retry_unlocked; | 1552 | goto retry_unlocked; |
1551 | } | 1553 | } |
1552 | 1554 | ||
1553 | if (fshared) | 1555 | futex_unlock_mm(fshared); |
1554 | up_read(fshared); | ||
1555 | 1556 | ||
1556 | ret = get_user(uval, uaddr); | 1557 | ret = get_user(uval, uaddr); |
1557 | if (!ret && (uval != -EFAULT)) | 1558 | if (!ret && (uval != -EFAULT)) |
@@ -1585,8 +1586,7 @@ retry: | |||
1585 | /* | 1586 | /* |
1586 | * First take all the futex related locks: | 1587 | * First take all the futex related locks: |
1587 | */ | 1588 | */ |
1588 | if (fshared) | 1589 | futex_lock_mm(fshared); |
1589 | down_read(fshared); | ||
1590 | 1590 | ||
1591 | ret = get_futex_key(uaddr, fshared, &key); | 1591 | ret = get_futex_key(uaddr, fshared, &key); |
1592 | if (unlikely(ret != 0)) | 1592 | if (unlikely(ret != 0)) |
@@ -1601,11 +1601,9 @@ retry_unlocked: | |||
1601 | * again. If it succeeds then we can return without waking | 1601 | * again. If it succeeds then we can return without waking |
1602 | * anyone else up: | 1602 | * anyone else up: |
1603 | */ | 1603 | */ |
1604 | if (!(uval & FUTEX_OWNER_DIED)) { | 1604 | if (!(uval & FUTEX_OWNER_DIED)) |
1605 | pagefault_disable(); | 1605 | uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0); |
1606 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1606 | |
1607 | pagefault_enable(); | ||
1608 | } | ||
1609 | 1607 | ||
1610 | if (unlikely(uval == -EFAULT)) | 1608 | if (unlikely(uval == -EFAULT)) |
1611 | goto pi_faulted; | 1609 | goto pi_faulted; |
@@ -1647,8 +1645,7 @@ retry_unlocked: | |||
1647 | out_unlock: | 1645 | out_unlock: |
1648 | spin_unlock(&hb->lock); | 1646 | spin_unlock(&hb->lock); |
1649 | out: | 1647 | out: |
1650 | if (fshared) | 1648 | futex_unlock_mm(fshared); |
1651 | up_read(fshared); | ||
1652 | 1649 | ||
1653 | return ret; | 1650 | return ret; |
1654 | 1651 | ||
@@ -1671,8 +1668,7 @@ pi_faulted: | |||
1671 | goto retry_unlocked; | 1668 | goto retry_unlocked; |
1672 | } | 1669 | } |
1673 | 1670 | ||
1674 | if (fshared) | 1671 | futex_unlock_mm(fshared); |
1675 | up_read(fshared); | ||
1676 | 1672 | ||
1677 | ret = get_user(uval, uaddr); | 1673 | ret = get_user(uval, uaddr); |
1678 | if (!ret && (uval != -EFAULT)) | 1674 | if (!ret && (uval != -EFAULT)) |
@@ -1729,8 +1725,8 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
1729 | 1725 | ||
1730 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { | 1726 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { |
1731 | printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " | 1727 | printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " |
1732 | "will be removed from the kernel in June 2007\n", | 1728 | "will be removed from the kernel in June 2007\n", |
1733 | current->comm); | 1729 | current->comm); |
1734 | } | 1730 | } |
1735 | 1731 | ||
1736 | ret = -EINVAL; | 1732 | ret = -EINVAL; |
@@ -1908,10 +1904,8 @@ retry: | |||
1908 | * Wake robust non-PI futexes here. The wakeup of | 1904 | * Wake robust non-PI futexes here. The wakeup of |
1909 | * PI futexes happens in exit_pi_state(): | 1905 | * PI futexes happens in exit_pi_state(): |
1910 | */ | 1906 | */ |
1911 | if (!pi) { | 1907 | if (!pi && (uval & FUTEX_WAITERS)) |
1912 | if (uval & FUTEX_WAITERS) | ||
1913 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); | 1908 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); |
1914 | } | ||
1915 | } | 1909 | } |
1916 | return 0; | 1910 | return 0; |
1917 | } | 1911 | } |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index bd9e272d55e9..32b161972fad 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -172,7 +172,17 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
172 | irqreturn_t action_ret) | 172 | irqreturn_t action_ret) |
173 | { | 173 | { |
174 | if (unlikely(action_ret != IRQ_HANDLED)) { | 174 | if (unlikely(action_ret != IRQ_HANDLED)) { |
175 | desc->irqs_unhandled++; | 175 | /* |
176 | * If we are seeing only the odd spurious IRQ caused by | ||
177 | * bus asynchronicity then don't eventually trigger an error, | ||
178 | * otherwise the couter becomes a doomsday timer for otherwise | ||
179 | * working systems | ||
180 | */ | ||
181 | if (jiffies - desc->last_unhandled > HZ/10) | ||
182 | desc->irqs_unhandled = 1; | ||
183 | else | ||
184 | desc->irqs_unhandled++; | ||
185 | desc->last_unhandled = jiffies; | ||
176 | if (unlikely(action_ret != IRQ_NONE)) | 186 | if (unlikely(action_ret != IRQ_NONE)) |
177 | report_bad_irq(irq, desc, action_ret); | 187 | report_bad_irq(irq, desc, action_ret); |
178 | } | 188 | } |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fed54418626c..0d662475dd9f 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -317,13 +317,12 @@ int sprint_symbol(char *buffer, unsigned long address) | |||
317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | 317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); |
318 | if (!name) | 318 | if (!name) |
319 | return sprintf(buffer, "0x%lx", address); | 319 | return sprintf(buffer, "0x%lx", address); |
320 | else { | 320 | |
321 | if (modname) | 321 | if (modname) |
322 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, | 322 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, |
323 | size, modname); | 323 | size, modname); |
324 | else | 324 | else |
325 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); | 325 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); |
326 | } | ||
327 | } | 326 | } |
328 | 327 | ||
329 | /* Look up a kernel symbol and print it to the kernel messages. */ | 328 | /* Look up a kernel symbol and print it to the kernel messages. */ |
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index cee419143fd4..bc41ad0f24f8 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/kfifo.h> | 26 | #include <linux/kfifo.h> |
27 | #include <linux/log2.h> | ||
27 | 28 | ||
28 | /** | 29 | /** |
29 | * kfifo_init - allocates a new FIFO using a preallocated buffer | 30 | * kfifo_init - allocates a new FIFO using a preallocated buffer |
@@ -41,7 +42,7 @@ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, | |||
41 | struct kfifo *fifo; | 42 | struct kfifo *fifo; |
42 | 43 | ||
43 | /* size must be a power of 2 */ | 44 | /* size must be a power of 2 */ |
44 | BUG_ON(size & (size - 1)); | 45 | BUG_ON(!is_power_of_2(size)); |
45 | 46 | ||
46 | fifo = kmalloc(sizeof(struct kfifo), gfp_mask); | 47 | fifo = kmalloc(sizeof(struct kfifo), gfp_mask); |
47 | if (!fifo) | 48 | if (!fifo) |
diff --git a/kernel/kthread.c b/kernel/kthread.c index bbd51b81a3e8..a404f7ee7395 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -215,7 +215,7 @@ int kthread_stop(struct task_struct *k) | |||
215 | EXPORT_SYMBOL(kthread_stop); | 215 | EXPORT_SYMBOL(kthread_stop); |
216 | 216 | ||
217 | 217 | ||
218 | static __init void kthreadd_setup(void) | 218 | static noinline __init_refok void kthreadd_setup(void) |
219 | { | 219 | { |
220 | struct task_struct *tsk = current; | 220 | struct task_struct *tsk = current; |
221 | 221 | ||
diff --git a/kernel/module.c b/kernel/module.c index 015d60cfd90e..539fed9ac83c 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -61,10 +61,8 @@ extern int module_sysfs_initialized; | |||
61 | /* If this is set, the section belongs in the init part of the module */ | 61 | /* If this is set, the section belongs in the init part of the module */ |
62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
63 | 63 | ||
64 | /* Protects module list */ | 64 | /* List of modules, protected by module_mutex or preempt_disable |
65 | static DEFINE_SPINLOCK(modlist_lock); | 65 | * (add/delete uses stop_machine). */ |
66 | |||
67 | /* List of modules, protected by module_mutex AND modlist_lock */ | ||
68 | static DEFINE_MUTEX(module_mutex); | 66 | static DEFINE_MUTEX(module_mutex); |
69 | static LIST_HEAD(modules); | 67 | static LIST_HEAD(modules); |
70 | 68 | ||
@@ -760,14 +758,13 @@ static void print_unload_info(struct seq_file *m, struct module *mod) | |||
760 | void __symbol_put(const char *symbol) | 758 | void __symbol_put(const char *symbol) |
761 | { | 759 | { |
762 | struct module *owner; | 760 | struct module *owner; |
763 | unsigned long flags; | ||
764 | const unsigned long *crc; | 761 | const unsigned long *crc; |
765 | 762 | ||
766 | spin_lock_irqsave(&modlist_lock, flags); | 763 | preempt_disable(); |
767 | if (!__find_symbol(symbol, &owner, &crc, 1)) | 764 | if (!__find_symbol(symbol, &owner, &crc, 1)) |
768 | BUG(); | 765 | BUG(); |
769 | module_put(owner); | 766 | module_put(owner); |
770 | spin_unlock_irqrestore(&modlist_lock, flags); | 767 | preempt_enable(); |
771 | } | 768 | } |
772 | EXPORT_SYMBOL(__symbol_put); | 769 | EXPORT_SYMBOL(__symbol_put); |
773 | 770 | ||
@@ -1228,14 +1225,14 @@ static void free_module(struct module *mod) | |||
1228 | void *__symbol_get(const char *symbol) | 1225 | void *__symbol_get(const char *symbol) |
1229 | { | 1226 | { |
1230 | struct module *owner; | 1227 | struct module *owner; |
1231 | unsigned long value, flags; | 1228 | unsigned long value; |
1232 | const unsigned long *crc; | 1229 | const unsigned long *crc; |
1233 | 1230 | ||
1234 | spin_lock_irqsave(&modlist_lock, flags); | 1231 | preempt_disable(); |
1235 | value = __find_symbol(symbol, &owner, &crc, 1); | 1232 | value = __find_symbol(symbol, &owner, &crc, 1); |
1236 | if (value && !strong_try_module_get(owner)) | 1233 | if (value && !strong_try_module_get(owner)) |
1237 | value = 0; | 1234 | value = 0; |
1238 | spin_unlock_irqrestore(&modlist_lock, flags); | 1235 | preempt_enable(); |
1239 | 1236 | ||
1240 | return (void *)value; | 1237 | return (void *)value; |
1241 | } | 1238 | } |
@@ -2232,26 +2229,13 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
2232 | /* Called by the /proc file system to return a list of modules. */ | 2229 | /* Called by the /proc file system to return a list of modules. */ |
2233 | static void *m_start(struct seq_file *m, loff_t *pos) | 2230 | static void *m_start(struct seq_file *m, loff_t *pos) |
2234 | { | 2231 | { |
2235 | struct list_head *i; | ||
2236 | loff_t n = 0; | ||
2237 | |||
2238 | mutex_lock(&module_mutex); | 2232 | mutex_lock(&module_mutex); |
2239 | list_for_each(i, &modules) { | 2233 | return seq_list_start(&modules, *pos); |
2240 | if (n++ == *pos) | ||
2241 | break; | ||
2242 | } | ||
2243 | if (i == &modules) | ||
2244 | return NULL; | ||
2245 | return i; | ||
2246 | } | 2234 | } |
2247 | 2235 | ||
2248 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | 2236 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) |
2249 | { | 2237 | { |
2250 | struct list_head *i = p; | 2238 | return seq_list_next(p, &modules, pos); |
2251 | (*pos)++; | ||
2252 | if (i->next == &modules) | ||
2253 | return NULL; | ||
2254 | return i->next; | ||
2255 | } | 2239 | } |
2256 | 2240 | ||
2257 | static void m_stop(struct seq_file *m, void *p) | 2241 | static void m_stop(struct seq_file *m, void *p) |
@@ -2321,11 +2305,10 @@ const struct seq_operations modules_op = { | |||
2321 | /* Given an address, look for it in the module exception tables. */ | 2305 | /* Given an address, look for it in the module exception tables. */ |
2322 | const struct exception_table_entry *search_module_extables(unsigned long addr) | 2306 | const struct exception_table_entry *search_module_extables(unsigned long addr) |
2323 | { | 2307 | { |
2324 | unsigned long flags; | ||
2325 | const struct exception_table_entry *e = NULL; | 2308 | const struct exception_table_entry *e = NULL; |
2326 | struct module *mod; | 2309 | struct module *mod; |
2327 | 2310 | ||
2328 | spin_lock_irqsave(&modlist_lock, flags); | 2311 | preempt_disable(); |
2329 | list_for_each_entry(mod, &modules, list) { | 2312 | list_for_each_entry(mod, &modules, list) { |
2330 | if (mod->num_exentries == 0) | 2313 | if (mod->num_exentries == 0) |
2331 | continue; | 2314 | continue; |
@@ -2336,7 +2319,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
2336 | if (e) | 2319 | if (e) |
2337 | break; | 2320 | break; |
2338 | } | 2321 | } |
2339 | spin_unlock_irqrestore(&modlist_lock, flags); | 2322 | preempt_enable(); |
2340 | 2323 | ||
2341 | /* Now, if we found one, we are running inside it now, hence | 2324 | /* Now, if we found one, we are running inside it now, hence |
2342 | we cannot unload the module, hence no refcnt needed. */ | 2325 | we cannot unload the module, hence no refcnt needed. */ |
@@ -2348,25 +2331,24 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
2348 | */ | 2331 | */ |
2349 | int is_module_address(unsigned long addr) | 2332 | int is_module_address(unsigned long addr) |
2350 | { | 2333 | { |
2351 | unsigned long flags; | ||
2352 | struct module *mod; | 2334 | struct module *mod; |
2353 | 2335 | ||
2354 | spin_lock_irqsave(&modlist_lock, flags); | 2336 | preempt_disable(); |
2355 | 2337 | ||
2356 | list_for_each_entry(mod, &modules, list) { | 2338 | list_for_each_entry(mod, &modules, list) { |
2357 | if (within(addr, mod->module_core, mod->core_size)) { | 2339 | if (within(addr, mod->module_core, mod->core_size)) { |
2358 | spin_unlock_irqrestore(&modlist_lock, flags); | 2340 | preempt_enable(); |
2359 | return 1; | 2341 | return 1; |
2360 | } | 2342 | } |
2361 | } | 2343 | } |
2362 | 2344 | ||
2363 | spin_unlock_irqrestore(&modlist_lock, flags); | 2345 | preempt_enable(); |
2364 | 2346 | ||
2365 | return 0; | 2347 | return 0; |
2366 | } | 2348 | } |
2367 | 2349 | ||
2368 | 2350 | ||
2369 | /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ | 2351 | /* Is this a valid kernel address? */ |
2370 | struct module *__module_text_address(unsigned long addr) | 2352 | struct module *__module_text_address(unsigned long addr) |
2371 | { | 2353 | { |
2372 | struct module *mod; | 2354 | struct module *mod; |
@@ -2381,11 +2363,10 @@ struct module *__module_text_address(unsigned long addr) | |||
2381 | struct module *module_text_address(unsigned long addr) | 2363 | struct module *module_text_address(unsigned long addr) |
2382 | { | 2364 | { |
2383 | struct module *mod; | 2365 | struct module *mod; |
2384 | unsigned long flags; | ||
2385 | 2366 | ||
2386 | spin_lock_irqsave(&modlist_lock, flags); | 2367 | preempt_disable(); |
2387 | mod = __module_text_address(addr); | 2368 | mod = __module_text_address(addr); |
2388 | spin_unlock_irqrestore(&modlist_lock, flags); | 2369 | preempt_enable(); |
2389 | 2370 | ||
2390 | return mod; | 2371 | return mod; |
2391 | } | 2372 | } |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 9e83b589f754..10f0bbba382b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/utsname.h> | 21 | #include <linux/utsname.h> |
22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
23 | 23 | ||
24 | static struct kmem_cache *nsproxy_cachep; | ||
25 | |||
24 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); | 26 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); |
25 | 27 | ||
26 | static inline void get_nsproxy(struct nsproxy *ns) | 28 | static inline void get_nsproxy(struct nsproxy *ns) |
@@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig) | |||
43 | { | 45 | { |
44 | struct nsproxy *ns; | 46 | struct nsproxy *ns; |
45 | 47 | ||
46 | ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); | 48 | ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); |
47 | if (ns) | 49 | if (ns) { |
50 | memcpy(ns, orig, sizeof(struct nsproxy)); | ||
48 | atomic_set(&ns->count, 1); | 51 | atomic_set(&ns->count, 1); |
52 | } | ||
49 | return ns; | 53 | return ns; |
50 | } | 54 | } |
51 | 55 | ||
@@ -54,33 +58,51 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig) | |||
54 | * Return the newly created nsproxy. Do not attach this to the task, | 58 | * Return the newly created nsproxy. Do not attach this to the task, |
55 | * leave it to the caller to do proper locking and attach it to task. | 59 | * leave it to the caller to do proper locking and attach it to task. |
56 | */ | 60 | */ |
57 | static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, | 61 | static struct nsproxy *create_new_namespaces(unsigned long flags, |
58 | struct fs_struct *new_fs) | 62 | struct task_struct *tsk, struct fs_struct *new_fs) |
59 | { | 63 | { |
60 | struct nsproxy *new_nsp; | 64 | struct nsproxy *new_nsp; |
65 | int err; | ||
61 | 66 | ||
62 | new_nsp = clone_nsproxy(tsk->nsproxy); | 67 | new_nsp = clone_nsproxy(tsk->nsproxy); |
63 | if (!new_nsp) | 68 | if (!new_nsp) |
64 | return ERR_PTR(-ENOMEM); | 69 | return ERR_PTR(-ENOMEM); |
65 | 70 | ||
66 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); | 71 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); |
67 | if (IS_ERR(new_nsp->mnt_ns)) | 72 | if (IS_ERR(new_nsp->mnt_ns)) { |
73 | err = PTR_ERR(new_nsp->mnt_ns); | ||
68 | goto out_ns; | 74 | goto out_ns; |
75 | } | ||
69 | 76 | ||
70 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); | 77 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); |
71 | if (IS_ERR(new_nsp->uts_ns)) | 78 | if (IS_ERR(new_nsp->uts_ns)) { |
79 | err = PTR_ERR(new_nsp->uts_ns); | ||
72 | goto out_uts; | 80 | goto out_uts; |
81 | } | ||
73 | 82 | ||
74 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); | 83 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); |
75 | if (IS_ERR(new_nsp->ipc_ns)) | 84 | if (IS_ERR(new_nsp->ipc_ns)) { |
85 | err = PTR_ERR(new_nsp->ipc_ns); | ||
76 | goto out_ipc; | 86 | goto out_ipc; |
87 | } | ||
77 | 88 | ||
78 | new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); | 89 | new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); |
79 | if (IS_ERR(new_nsp->pid_ns)) | 90 | if (IS_ERR(new_nsp->pid_ns)) { |
91 | err = PTR_ERR(new_nsp->pid_ns); | ||
80 | goto out_pid; | 92 | goto out_pid; |
93 | } | ||
94 | |||
95 | new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); | ||
96 | if (IS_ERR(new_nsp->user_ns)) { | ||
97 | err = PTR_ERR(new_nsp->user_ns); | ||
98 | goto out_user; | ||
99 | } | ||
81 | 100 | ||
82 | return new_nsp; | 101 | return new_nsp; |
83 | 102 | ||
103 | out_user: | ||
104 | if (new_nsp->pid_ns) | ||
105 | put_pid_ns(new_nsp->pid_ns); | ||
84 | out_pid: | 106 | out_pid: |
85 | if (new_nsp->ipc_ns) | 107 | if (new_nsp->ipc_ns) |
86 | put_ipc_ns(new_nsp->ipc_ns); | 108 | put_ipc_ns(new_nsp->ipc_ns); |
@@ -91,15 +113,15 @@ out_uts: | |||
91 | if (new_nsp->mnt_ns) | 113 | if (new_nsp->mnt_ns) |
92 | put_mnt_ns(new_nsp->mnt_ns); | 114 | put_mnt_ns(new_nsp->mnt_ns); |
93 | out_ns: | 115 | out_ns: |
94 | kfree(new_nsp); | 116 | kmem_cache_free(nsproxy_cachep, new_nsp); |
95 | return ERR_PTR(-ENOMEM); | 117 | return ERR_PTR(err); |
96 | } | 118 | } |
97 | 119 | ||
98 | /* | 120 | /* |
99 | * called from clone. This now handles copy for nsproxy and all | 121 | * called from clone. This now handles copy for nsproxy and all |
100 | * namespaces therein. | 122 | * namespaces therein. |
101 | */ | 123 | */ |
102 | int copy_namespaces(int flags, struct task_struct *tsk) | 124 | int copy_namespaces(unsigned long flags, struct task_struct *tsk) |
103 | { | 125 | { |
104 | struct nsproxy *old_ns = tsk->nsproxy; | 126 | struct nsproxy *old_ns = tsk->nsproxy; |
105 | struct nsproxy *new_ns; | 127 | struct nsproxy *new_ns; |
@@ -110,7 +132,7 @@ int copy_namespaces(int flags, struct task_struct *tsk) | |||
110 | 132 | ||
111 | get_nsproxy(old_ns); | 133 | get_nsproxy(old_ns); |
112 | 134 | ||
113 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) | 135 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER))) |
114 | return 0; | 136 | return 0; |
115 | 137 | ||
116 | if (!capable(CAP_SYS_ADMIN)) { | 138 | if (!capable(CAP_SYS_ADMIN)) { |
@@ -140,7 +162,9 @@ void free_nsproxy(struct nsproxy *ns) | |||
140 | put_ipc_ns(ns->ipc_ns); | 162 | put_ipc_ns(ns->ipc_ns); |
141 | if (ns->pid_ns) | 163 | if (ns->pid_ns) |
142 | put_pid_ns(ns->pid_ns); | 164 | put_pid_ns(ns->pid_ns); |
143 | kfree(ns); | 165 | if (ns->user_ns) |
166 | put_user_ns(ns->user_ns); | ||
167 | kmem_cache_free(nsproxy_cachep, ns); | ||
144 | } | 168 | } |
145 | 169 | ||
146 | /* | 170 | /* |
@@ -152,19 +176,10 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
152 | { | 176 | { |
153 | int err = 0; | 177 | int err = 0; |
154 | 178 | ||
155 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) | 179 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
180 | CLONE_NEWUSER))) | ||
156 | return 0; | 181 | return 0; |
157 | 182 | ||
158 | #ifndef CONFIG_IPC_NS | ||
159 | if (unshare_flags & CLONE_NEWIPC) | ||
160 | return -EINVAL; | ||
161 | #endif | ||
162 | |||
163 | #ifndef CONFIG_UTS_NS | ||
164 | if (unshare_flags & CLONE_NEWUTS) | ||
165 | return -EINVAL; | ||
166 | #endif | ||
167 | |||
168 | if (!capable(CAP_SYS_ADMIN)) | 183 | if (!capable(CAP_SYS_ADMIN)) |
169 | return -EPERM; | 184 | return -EPERM; |
170 | 185 | ||
@@ -174,3 +189,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
174 | err = PTR_ERR(*new_nsp); | 189 | err = PTR_ERR(*new_nsp); |
175 | return err; | 190 | return err; |
176 | } | 191 | } |
192 | |||
193 | static int __init nsproxy_cache_init(void) | ||
194 | { | ||
195 | nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), | ||
196 | 0, SLAB_PANIC, NULL, NULL); | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | module_init(nsproxy_cache_init); | ||
diff --git a/kernel/pid.c b/kernel/pid.c index eb66bd2953ab..c6e3f9ffff87 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -365,7 +365,7 @@ struct pid *find_ge_pid(int nr) | |||
365 | } | 365 | } |
366 | EXPORT_SYMBOL_GPL(find_get_pid); | 366 | EXPORT_SYMBOL_GPL(find_get_pid); |
367 | 367 | ||
368 | struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) | 368 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) |
369 | { | 369 | { |
370 | BUG_ON(!old_ns); | 370 | BUG_ON(!old_ns); |
371 | get_pid_ns(old_ns); | 371 | get_pid_ns(old_ns); |
diff --git a/kernel/printk.c b/kernel/printk.c index 0bbdeac2810c..051d27e36a6c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -449,13 +449,16 @@ static int printk_time = 1; | |||
449 | #else | 449 | #else |
450 | static int printk_time = 0; | 450 | static int printk_time = 0; |
451 | #endif | 451 | #endif |
452 | module_param(printk_time, int, S_IRUGO | S_IWUSR); | 452 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); |
453 | 453 | ||
454 | static int __init printk_time_setup(char *str) | 454 | static int __init printk_time_setup(char *str) |
455 | { | 455 | { |
456 | if (*str) | 456 | if (*str) |
457 | return 0; | 457 | return 0; |
458 | printk_time = 1; | 458 | printk_time = 1; |
459 | printk(KERN_NOTICE "The 'time' option is deprecated and " | ||
460 | "is scheduled for removal in early 2008\n"); | ||
461 | printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n"); | ||
459 | return 1; | 462 | return 1; |
460 | } | 463 | } |
461 | 464 | ||
@@ -483,6 +486,9 @@ static int have_callable_console(void) | |||
483 | * @fmt: format string | 486 | * @fmt: format string |
484 | * | 487 | * |
485 | * This is printk(). It can be called from any context. We want it to work. | 488 | * This is printk(). It can be called from any context. We want it to work. |
489 | * Be aware of the fact that if oops_in_progress is not set, we might try to | ||
490 | * wake klogd up which could deadlock on runqueue lock if printk() is called | ||
491 | * from scheduler code. | ||
486 | * | 492 | * |
487 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and | 493 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and |
488 | * call the console drivers. If we fail to get the semaphore we place the output | 494 | * call the console drivers. If we fail to get the semaphore we place the output |
@@ -654,7 +660,7 @@ static void call_console_drivers(unsigned long start, unsigned long end) | |||
654 | */ | 660 | */ |
655 | static int __init console_setup(char *str) | 661 | static int __init console_setup(char *str) |
656 | { | 662 | { |
657 | char name[sizeof(console_cmdline[0].name)]; | 663 | char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ |
658 | char *s, *options; | 664 | char *s, *options; |
659 | int idx; | 665 | int idx; |
660 | 666 | ||
@@ -662,27 +668,27 @@ static int __init console_setup(char *str) | |||
662 | * Decode str into name, index, options. | 668 | * Decode str into name, index, options. |
663 | */ | 669 | */ |
664 | if (str[0] >= '0' && str[0] <= '9') { | 670 | if (str[0] >= '0' && str[0] <= '9') { |
665 | strcpy(name, "ttyS"); | 671 | strcpy(buf, "ttyS"); |
666 | strncpy(name + 4, str, sizeof(name) - 5); | 672 | strncpy(buf + 4, str, sizeof(buf) - 5); |
667 | } else { | 673 | } else { |
668 | strncpy(name, str, sizeof(name) - 1); | 674 | strncpy(buf, str, sizeof(buf) - 1); |
669 | } | 675 | } |
670 | name[sizeof(name) - 1] = 0; | 676 | buf[sizeof(buf) - 1] = 0; |
671 | if ((options = strchr(str, ',')) != NULL) | 677 | if ((options = strchr(str, ',')) != NULL) |
672 | *(options++) = 0; | 678 | *(options++) = 0; |
673 | #ifdef __sparc__ | 679 | #ifdef __sparc__ |
674 | if (!strcmp(str, "ttya")) | 680 | if (!strcmp(str, "ttya")) |
675 | strcpy(name, "ttyS0"); | 681 | strcpy(buf, "ttyS0"); |
676 | if (!strcmp(str, "ttyb")) | 682 | if (!strcmp(str, "ttyb")) |
677 | strcpy(name, "ttyS1"); | 683 | strcpy(buf, "ttyS1"); |
678 | #endif | 684 | #endif |
679 | for (s = name; *s; s++) | 685 | for (s = buf; *s; s++) |
680 | if ((*s >= '0' && *s <= '9') || *s == ',') | 686 | if ((*s >= '0' && *s <= '9') || *s == ',') |
681 | break; | 687 | break; |
682 | idx = simple_strtoul(s, NULL, 10); | 688 | idx = simple_strtoul(s, NULL, 10); |
683 | *s = 0; | 689 | *s = 0; |
684 | 690 | ||
685 | add_preferred_console(name, idx, options); | 691 | add_preferred_console(buf, idx, options); |
686 | return 1; | 692 | return 1; |
687 | } | 693 | } |
688 | __setup("console=", console_setup); | 694 | __setup("console=", console_setup); |
@@ -709,7 +715,7 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
709 | * See if this tty is not yet registered, and | 715 | * See if this tty is not yet registered, and |
710 | * if we have a slot free. | 716 | * if we have a slot free. |
711 | */ | 717 | */ |
712 | for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | 718 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) |
713 | if (strcmp(console_cmdline[i].name, name) == 0 && | 719 | if (strcmp(console_cmdline[i].name, name) == 0 && |
714 | console_cmdline[i].index == idx) { | 720 | console_cmdline[i].index == idx) { |
715 | selected_console = i; | 721 | selected_console = i; |
@@ -726,6 +732,25 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
726 | return 0; | 732 | return 0; |
727 | } | 733 | } |
728 | 734 | ||
735 | int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) | ||
736 | { | ||
737 | struct console_cmdline *c; | ||
738 | int i; | ||
739 | |||
740 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | ||
741 | if (strcmp(console_cmdline[i].name, name) == 0 && | ||
742 | console_cmdline[i].index == idx) { | ||
743 | c = &console_cmdline[i]; | ||
744 | memcpy(c->name, name_new, sizeof(c->name)); | ||
745 | c->name[sizeof(c->name) - 1] = 0; | ||
746 | c->options = options; | ||
747 | c->index = idx_new; | ||
748 | return i; | ||
749 | } | ||
750 | /* not found */ | ||
751 | return -1; | ||
752 | } | ||
753 | |||
729 | #ifndef CONFIG_DISABLE_CONSOLE_SUSPEND | 754 | #ifndef CONFIG_DISABLE_CONSOLE_SUSPEND |
730 | /** | 755 | /** |
731 | * suspend_console - suspend the console subsystem | 756 | * suspend_console - suspend the console subsystem |
@@ -942,6 +967,9 @@ void register_console(struct console *console) | |||
942 | if (preferred_console < 0 || bootconsole || !console_drivers) | 967 | if (preferred_console < 0 || bootconsole || !console_drivers) |
943 | preferred_console = selected_console; | 968 | preferred_console = selected_console; |
944 | 969 | ||
970 | if (console->early_setup) | ||
971 | console->early_setup(); | ||
972 | |||
945 | /* | 973 | /* |
946 | * See if we want to use this console driver. If we | 974 | * See if we want to use this console driver. If we |
947 | * didn't select a console we take the first one | 975 | * didn't select a console we take the first one |
@@ -985,12 +1013,15 @@ void register_console(struct console *console) | |||
985 | if (!(console->flags & CON_ENABLED)) | 1013 | if (!(console->flags & CON_ENABLED)) |
986 | return; | 1014 | return; |
987 | 1015 | ||
988 | if (bootconsole) { | 1016 | if (bootconsole && (console->flags & CON_CONSDEV)) { |
989 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", | 1017 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", |
990 | bootconsole->name, bootconsole->index, | 1018 | bootconsole->name, bootconsole->index, |
991 | console->name, console->index); | 1019 | console->name, console->index); |
992 | unregister_console(bootconsole); | 1020 | unregister_console(bootconsole); |
993 | console->flags &= ~CON_PRINTBUFFER; | 1021 | console->flags &= ~CON_PRINTBUFFER; |
1022 | } else { | ||
1023 | printk(KERN_INFO "console [%s%d] enabled\n", | ||
1024 | console->name, console->index); | ||
994 | } | 1025 | } |
995 | 1026 | ||
996 | /* | 1027 | /* |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ad7949a589dd..b1d11f1c7cf7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -161,6 +161,7 @@ int ptrace_may_attach(struct task_struct *task) | |||
161 | int ptrace_attach(struct task_struct *task) | 161 | int ptrace_attach(struct task_struct *task) |
162 | { | 162 | { |
163 | int retval; | 163 | int retval; |
164 | unsigned long flags; | ||
164 | 165 | ||
165 | audit_ptrace(task); | 166 | audit_ptrace(task); |
166 | 167 | ||
@@ -181,9 +182,7 @@ repeat: | |||
181 | * cpu's that may have task_lock). | 182 | * cpu's that may have task_lock). |
182 | */ | 183 | */ |
183 | task_lock(task); | 184 | task_lock(task); |
184 | local_irq_disable(); | 185 | if (!write_trylock_irqsave(&tasklist_lock, flags)) { |
185 | if (!write_trylock(&tasklist_lock)) { | ||
186 | local_irq_enable(); | ||
187 | task_unlock(task); | 186 | task_unlock(task); |
188 | do { | 187 | do { |
189 | cpu_relax(); | 188 | cpu_relax(); |
@@ -211,7 +210,7 @@ repeat: | |||
211 | force_sig_specific(SIGSTOP, task); | 210 | force_sig_specific(SIGSTOP, task); |
212 | 211 | ||
213 | bad: | 212 | bad: |
214 | write_unlock_irq(&tasklist_lock); | 213 | write_unlock_irqrestore(&tasklist_lock, flags); |
215 | task_unlock(task); | 214 | task_unlock(task); |
216 | out: | 215 | out: |
217 | return retval; | 216 | return retval; |
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index da8d6bf46457..5aedbee014df 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c | |||
@@ -29,12 +29,6 @@ | |||
29 | 29 | ||
30 | #include "rtmutex_common.h" | 30 | #include "rtmutex_common.h" |
31 | 31 | ||
32 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
33 | # include "rtmutex-debug.h" | ||
34 | #else | ||
35 | # include "rtmutex.h" | ||
36 | #endif | ||
37 | |||
38 | # define TRACE_WARN_ON(x) WARN_ON(x) | 32 | # define TRACE_WARN_ON(x) WARN_ON(x) |
39 | # define TRACE_BUG_ON(x) BUG_ON(x) | 33 | # define TRACE_BUG_ON(x) BUG_ON(x) |
40 | 34 | ||
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 17d28ce20300..8cd9bd2cdb34 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -17,12 +17,6 @@ | |||
17 | 17 | ||
18 | #include "rtmutex_common.h" | 18 | #include "rtmutex_common.h" |
19 | 19 | ||
20 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
21 | # include "rtmutex-debug.h" | ||
22 | #else | ||
23 | # include "rtmutex.h" | ||
24 | #endif | ||
25 | |||
26 | /* | 20 | /* |
27 | * lock->owner state tracking: | 21 | * lock->owner state tracking: |
28 | * | 22 | * |
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 9c75856e791e..2d3b83593ca3 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h | |||
@@ -103,7 +103,7 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) | |||
103 | 103 | ||
104 | static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) | 104 | static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) |
105 | { | 105 | { |
106 | return (struct task_struct *) | 106 | return (struct task_struct *) |
107 | ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); | 107 | ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); |
108 | } | 108 | } |
109 | 109 | ||
@@ -120,4 +120,11 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |||
120 | struct task_struct *proxy_owner); | 120 | struct task_struct *proxy_owner); |
121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, | 121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
122 | struct task_struct *proxy_owner); | 122 | struct task_struct *proxy_owner); |
123 | |||
124 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
125 | # include "rtmutex-debug.h" | ||
126 | #else | ||
127 | # include "rtmutex.h" | ||
128 | #endif | ||
129 | |||
123 | #endif | 130 | #endif |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index c3391b6020e8..ad64fcb731f2 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | 11 | ||
12 | /* #define SECCOMP_DEBUG 1 */ | 12 | /* #define SECCOMP_DEBUG 1 */ |
13 | #define NR_SECCOMP_MODES 1 | ||
13 | 14 | ||
14 | /* | 15 | /* |
15 | * Secure computing mode 1 allows only read/write/exit/sigreturn. | 16 | * Secure computing mode 1 allows only read/write/exit/sigreturn. |
@@ -54,3 +55,31 @@ void __secure_computing(int this_syscall) | |||
54 | #endif | 55 | #endif |
55 | do_exit(SIGKILL); | 56 | do_exit(SIGKILL); |
56 | } | 57 | } |
58 | |||
59 | long prctl_get_seccomp(void) | ||
60 | { | ||
61 | return current->seccomp.mode; | ||
62 | } | ||
63 | |||
64 | long prctl_set_seccomp(unsigned long seccomp_mode) | ||
65 | { | ||
66 | long ret; | ||
67 | |||
68 | /* can set it only once to be even more secure */ | ||
69 | ret = -EPERM; | ||
70 | if (unlikely(current->seccomp.mode)) | ||
71 | goto out; | ||
72 | |||
73 | ret = -EINVAL; | ||
74 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | ||
75 | current->seccomp.mode = seccomp_mode; | ||
76 | set_thread_flag(TIF_SECCOMP); | ||
77 | #ifdef TIF_NOTSC | ||
78 | disable_TSC(); | ||
79 | #endif | ||
80 | ret = 0; | ||
81 | } | ||
82 | |||
83 | out: | ||
84 | return ret; | ||
85 | } | ||
diff --git a/kernel/signal.c b/kernel/signal.c index f9405609774e..39d122753bac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -718,6 +718,37 @@ out_set: | |||
718 | #define LEGACY_QUEUE(sigptr, sig) \ | 718 | #define LEGACY_QUEUE(sigptr, sig) \ |
719 | (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) | 719 | (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) |
720 | 720 | ||
721 | int print_fatal_signals; | ||
722 | |||
723 | static void print_fatal_signal(struct pt_regs *regs, int signr) | ||
724 | { | ||
725 | printk("%s/%d: potentially unexpected fatal signal %d.\n", | ||
726 | current->comm, current->pid, signr); | ||
727 | |||
728 | #ifdef __i386__ | ||
729 | printk("code at %08lx: ", regs->eip); | ||
730 | { | ||
731 | int i; | ||
732 | for (i = 0; i < 16; i++) { | ||
733 | unsigned char insn; | ||
734 | |||
735 | __get_user(insn, (unsigned char *)(regs->eip + i)); | ||
736 | printk("%02x ", insn); | ||
737 | } | ||
738 | } | ||
739 | #endif | ||
740 | printk("\n"); | ||
741 | show_regs(regs); | ||
742 | } | ||
743 | |||
744 | static int __init setup_print_fatal_signals(char *str) | ||
745 | { | ||
746 | get_option (&str, &print_fatal_signals); | ||
747 | |||
748 | return 1; | ||
749 | } | ||
750 | |||
751 | __setup("print-fatal-signals=", setup_print_fatal_signals); | ||
721 | 752 | ||
722 | static int | 753 | static int |
723 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) | 754 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
@@ -1855,6 +1886,8 @@ relock: | |||
1855 | * Anything else is fatal, maybe with a core dump. | 1886 | * Anything else is fatal, maybe with a core dump. |
1856 | */ | 1887 | */ |
1857 | current->flags |= PF_SIGNALED; | 1888 | current->flags |= PF_SIGNALED; |
1889 | if ((signr != SIGKILL) && print_fatal_signals) | ||
1890 | print_fatal_signal(regs, signr); | ||
1858 | if (sig_kernel_coredump(signr)) { | 1891 | if (sig_kernel_coredump(signr)) { |
1859 | /* | 1892 | /* |
1860 | * If it was able to dump core, this kills all | 1893 | * If it was able to dump core, this kills all |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 73217a9e2875..8de267790166 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -614,12 +614,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
614 | kthread_bind(per_cpu(ksoftirqd, hotcpu), | 614 | kthread_bind(per_cpu(ksoftirqd, hotcpu), |
615 | any_online_cpu(cpu_online_map)); | 615 | any_online_cpu(cpu_online_map)); |
616 | case CPU_DEAD: | 616 | case CPU_DEAD: |
617 | case CPU_DEAD_FROZEN: | 617 | case CPU_DEAD_FROZEN: { |
618 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
619 | |||
618 | p = per_cpu(ksoftirqd, hotcpu); | 620 | p = per_cpu(ksoftirqd, hotcpu); |
619 | per_cpu(ksoftirqd, hotcpu) = NULL; | 621 | per_cpu(ksoftirqd, hotcpu) = NULL; |
622 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
620 | kthread_stop(p); | 623 | kthread_stop(p); |
621 | takeover_tasklets(hotcpu); | 624 | takeover_tasklets(hotcpu); |
622 | break; | 625 | break; |
626 | } | ||
623 | #endif /* CONFIG_HOTPLUG_CPU */ | 627 | #endif /* CONFIG_HOTPLUG_CPU */ |
624 | } | 628 | } |
625 | return NOTIFY_OK; | 629 | return NOTIFY_OK; |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index fcee2a8e6da3..319821ef78af 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -93,10 +93,6 @@ static void stopmachine_set_state(enum stopmachine_state state) | |||
93 | static int stop_machine(void) | 93 | static int stop_machine(void) |
94 | { | 94 | { |
95 | int i, ret = 0; | 95 | int i, ret = 0; |
96 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
97 | |||
98 | /* One high-prio thread per cpu. We'll do this one. */ | ||
99 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
100 | 96 | ||
101 | atomic_set(&stopmachine_thread_ack, 0); | 97 | atomic_set(&stopmachine_thread_ack, 0); |
102 | stopmachine_num_threads = 0; | 98 | stopmachine_num_threads = 0; |
@@ -189,6 +185,10 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, | |||
189 | 185 | ||
190 | p = kthread_create(do_stop, &smdata, "kstopmachine"); | 186 | p = kthread_create(do_stop, &smdata, "kstopmachine"); |
191 | if (!IS_ERR(p)) { | 187 | if (!IS_ERR(p)) { |
188 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
189 | |||
190 | /* One high-prio thread per cpu. We'll do this one. */ | ||
191 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
192 | kthread_bind(p, cpu); | 192 | kthread_bind(p, cpu); |
193 | wake_up_process(p); | 193 | wake_up_process(p); |
194 | wait_for_completion(&smdata.done); | 194 | wait_for_completion(&smdata.done); |
diff --git a/kernel/sys.c b/kernel/sys.c index 872271ccc384..4d141ae3e802 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -31,10 +31,12 @@ | |||
31 | #include <linux/cn_proc.h> | 31 | #include <linux/cn_proc.h> |
32 | #include <linux/getcpu.h> | 32 | #include <linux/getcpu.h> |
33 | #include <linux/task_io_accounting_ops.h> | 33 | #include <linux/task_io_accounting_ops.h> |
34 | #include <linux/seccomp.h> | ||
34 | 35 | ||
35 | #include <linux/compat.h> | 36 | #include <linux/compat.h> |
36 | #include <linux/syscalls.h> | 37 | #include <linux/syscalls.h> |
37 | #include <linux/kprobes.h> | 38 | #include <linux/kprobes.h> |
39 | #include <linux/user_namespace.h> | ||
38 | 40 | ||
39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
40 | #include <asm/io.h> | 42 | #include <asm/io.h> |
@@ -1078,13 +1080,13 @@ static int set_user(uid_t new_ruid, int dumpclear) | |||
1078 | { | 1080 | { |
1079 | struct user_struct *new_user; | 1081 | struct user_struct *new_user; |
1080 | 1082 | ||
1081 | new_user = alloc_uid(new_ruid); | 1083 | new_user = alloc_uid(current->nsproxy->user_ns, new_ruid); |
1082 | if (!new_user) | 1084 | if (!new_user) |
1083 | return -EAGAIN; | 1085 | return -EAGAIN; |
1084 | 1086 | ||
1085 | if (atomic_read(&new_user->processes) >= | 1087 | if (atomic_read(&new_user->processes) >= |
1086 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && | 1088 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && |
1087 | new_user != &root_user) { | 1089 | new_user != current->nsproxy->user_ns->root_user) { |
1088 | free_uid(new_user); | 1090 | free_uid(new_user); |
1089 | return -EAGAIN; | 1091 | return -EAGAIN; |
1090 | } | 1092 | } |
@@ -2241,6 +2243,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
2241 | error = SET_ENDIAN(current, arg2); | 2243 | error = SET_ENDIAN(current, arg2); |
2242 | break; | 2244 | break; |
2243 | 2245 | ||
2246 | case PR_GET_SECCOMP: | ||
2247 | error = prctl_get_seccomp(); | ||
2248 | break; | ||
2249 | case PR_SET_SECCOMP: | ||
2250 | error = prctl_set_seccomp(arg2); | ||
2251 | break; | ||
2252 | |||
2244 | default: | 2253 | default: |
2245 | error = -EINVAL; | 2254 | error = -EINVAL; |
2246 | break; | 2255 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7e11e2c98bf9..b0ec498a18d9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -14,6 +14,7 @@ asmlinkage long sys_ni_syscall(void) | |||
14 | 14 | ||
15 | cond_syscall(sys_nfsservctl); | 15 | cond_syscall(sys_nfsservctl); |
16 | cond_syscall(sys_quotactl); | 16 | cond_syscall(sys_quotactl); |
17 | cond_syscall(sys32_quotactl); | ||
17 | cond_syscall(sys_acct); | 18 | cond_syscall(sys_acct); |
18 | cond_syscall(sys_lookup_dcookie); | 19 | cond_syscall(sys_lookup_dcookie); |
19 | cond_syscall(sys_swapon); | 20 | cond_syscall(sys_swapon); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d93e13d93f24..7dca326648d5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -61,6 +61,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | |||
61 | 61 | ||
62 | /* External variables not in a header file. */ | 62 | /* External variables not in a header file. */ |
63 | extern int C_A_D; | 63 | extern int C_A_D; |
64 | extern int print_fatal_signals; | ||
64 | extern int sysctl_overcommit_memory; | 65 | extern int sysctl_overcommit_memory; |
65 | extern int sysctl_overcommit_ratio; | 66 | extern int sysctl_overcommit_ratio; |
66 | extern int sysctl_panic_on_oom; | 67 | extern int sysctl_panic_on_oom; |
@@ -202,7 +203,10 @@ static ctl_table root_table[] = { | |||
202 | .mode = 0555, | 203 | .mode = 0555, |
203 | .child = dev_table, | 204 | .child = dev_table, |
204 | }, | 205 | }, |
205 | 206 | /* | |
207 | * NOTE: do not add new entries to this table unless you have read | ||
208 | * Documentation/sysctl/ctl_unnumbered.txt | ||
209 | */ | ||
206 | { .ctl_name = 0 } | 210 | { .ctl_name = 0 } |
207 | }; | 211 | }; |
208 | 212 | ||
@@ -340,6 +344,14 @@ static ctl_table kern_table[] = { | |||
340 | .proc_handler = &proc_dointvec, | 344 | .proc_handler = &proc_dointvec, |
341 | }, | 345 | }, |
342 | #endif | 346 | #endif |
347 | { | ||
348 | .ctl_name = CTL_UNNUMBERED, | ||
349 | .procname = "print-fatal-signals", | ||
350 | .data = &print_fatal_signals, | ||
351 | .maxlen = sizeof(int), | ||
352 | .mode = 0644, | ||
353 | .proc_handler = &proc_dointvec, | ||
354 | }, | ||
343 | #ifdef __sparc__ | 355 | #ifdef __sparc__ |
344 | { | 356 | { |
345 | .ctl_name = KERN_SPARC_REBOOT, | 357 | .ctl_name = KERN_SPARC_REBOOT, |
@@ -958,6 +970,17 @@ static ctl_table vm_table[] = { | |||
958 | .mode = 0644, | 970 | .mode = 0644, |
959 | .proc_handler = &proc_doulongvec_minmax, | 971 | .proc_handler = &proc_doulongvec_minmax, |
960 | }, | 972 | }, |
973 | #ifdef CONFIG_NUMA | ||
974 | { | ||
975 | .ctl_name = CTL_UNNUMBERED, | ||
976 | .procname = "numa_zonelist_order", | ||
977 | .data = &numa_zonelist_order, | ||
978 | .maxlen = NUMA_ZONELIST_ORDER_LEN, | ||
979 | .mode = 0644, | ||
980 | .proc_handler = &numa_zonelist_order_handler, | ||
981 | .strategy = &sysctl_string, | ||
982 | }, | ||
983 | #endif | ||
961 | #endif | 984 | #endif |
962 | #if defined(CONFIG_X86_32) || \ | 985 | #if defined(CONFIG_X86_32) || \ |
963 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) | 986 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) |
@@ -972,6 +995,14 @@ static ctl_table vm_table[] = { | |||
972 | .extra1 = &zero, | 995 | .extra1 = &zero, |
973 | }, | 996 | }, |
974 | #endif | 997 | #endif |
998 | /* | ||
999 | * NOTE: do not add new entries to this table unless you have read | ||
1000 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1001 | */ | ||
1002 | /* | ||
1003 | * NOTE: do not add new entries to this table unless you have read | ||
1004 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1005 | */ | ||
975 | { .ctl_name = 0 } | 1006 | { .ctl_name = 0 } |
976 | }; | 1007 | }; |
977 | 1008 | ||
@@ -1112,6 +1143,14 @@ static ctl_table fs_table[] = { | |||
1112 | .child = binfmt_misc_table, | 1143 | .child = binfmt_misc_table, |
1113 | }, | 1144 | }, |
1114 | #endif | 1145 | #endif |
1146 | /* | ||
1147 | * NOTE: do not add new entries to this table unless you have read | ||
1148 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1149 | */ | ||
1150 | /* | ||
1151 | * NOTE: do not add new entries to this table unless you have read | ||
1152 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1153 | */ | ||
1115 | { .ctl_name = 0 } | 1154 | { .ctl_name = 0 } |
1116 | }; | 1155 | }; |
1117 | 1156 | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 906cae771585..059431ed67db 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
196 | 196 | ||
197 | /* fill in basic acct fields */ | 197 | /* fill in basic acct fields */ |
198 | stats->version = TASKSTATS_VERSION; | 198 | stats->version = TASKSTATS_VERSION; |
199 | stats->nvcsw = tsk->nvcsw; | ||
200 | stats->nivcsw = tsk->nivcsw; | ||
199 | bacct_add_tsk(stats, tsk); | 201 | bacct_add_tsk(stats, tsk); |
200 | 202 | ||
201 | /* fill in extended acct fields */ | 203 | /* fill in extended acct fields */ |
@@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
242 | */ | 244 | */ |
243 | delayacct_add_tsk(stats, tsk); | 245 | delayacct_add_tsk(stats, tsk); |
244 | 246 | ||
247 | stats->nvcsw += tsk->nvcsw; | ||
248 | stats->nivcsw += tsk->nivcsw; | ||
245 | } while_each_thread(first, tsk); | 249 | } while_each_thread(first, tsk); |
246 | 250 | ||
247 | unlock_task_sighand(first, &flags); | 251 | unlock_task_sighand(first, &flags); |
diff --git a/kernel/time.c b/kernel/time.c index f04791f69408..ffe19149d770 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz); | |||
57 | */ | 57 | */ |
58 | asmlinkage long sys_time(time_t __user * tloc) | 58 | asmlinkage long sys_time(time_t __user * tloc) |
59 | { | 59 | { |
60 | time_t i; | 60 | /* |
61 | struct timeval tv; | 61 | * We read xtime.tv_sec atomically - it's updated |
62 | * atomically by update_wall_time(), so no need to | ||
63 | * even read-lock the xtime seqlock: | ||
64 | */ | ||
65 | time_t i = xtime.tv_sec; | ||
62 | 66 | ||
63 | do_gettimeofday(&tv); | 67 | smp_rmb(); /* sys_time() results are coherent */ |
64 | i = tv.tv_sec; | ||
65 | 68 | ||
66 | if (tloc) { | 69 | if (tloc) { |
67 | if (put_user(i,tloc)) | 70 | if (put_user(i, tloc)) |
68 | i = -EFAULT; | 71 | i = -EFAULT; |
69 | } | 72 | } |
70 | return i; | 73 | return i; |
@@ -373,12 +376,25 @@ void do_gettimeofday (struct timeval *tv) | |||
373 | 376 | ||
374 | tv->tv_sec = sec; | 377 | tv->tv_sec = sec; |
375 | tv->tv_usec = usec; | 378 | tv->tv_usec = usec; |
376 | } | ||
377 | 379 | ||
380 | /* | ||
381 | * Make sure xtime.tv_sec [returned by sys_time()] always | ||
382 | * follows the gettimeofday() result precisely. This | ||
383 | * condition is extremely unlikely, it can hit at most | ||
384 | * once per second: | ||
385 | */ | ||
386 | if (unlikely(xtime.tv_sec != tv->tv_sec)) { | ||
387 | unsigned long flags; | ||
388 | |||
389 | write_seqlock_irqsave(&xtime_lock, flags); | ||
390 | update_wall_time(); | ||
391 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
392 | } | ||
393 | } | ||
378 | EXPORT_SYMBOL(do_gettimeofday); | 394 | EXPORT_SYMBOL(do_gettimeofday); |
379 | 395 | ||
396 | #else /* CONFIG_TIME_INTERPOLATION */ | ||
380 | 397 | ||
381 | #else | ||
382 | #ifndef CONFIG_GENERIC_TIME | 398 | #ifndef CONFIG_GENERIC_TIME |
383 | /* | 399 | /* |
384 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval | 400 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval |
@@ -394,7 +410,7 @@ void getnstimeofday(struct timespec *tv) | |||
394 | } | 410 | } |
395 | EXPORT_SYMBOL_GPL(getnstimeofday); | 411 | EXPORT_SYMBOL_GPL(getnstimeofday); |
396 | #endif | 412 | #endif |
397 | #endif | 413 | #endif /* CONFIG_TIME_INTERPOLATION */ |
398 | 414 | ||
399 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. | 415 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. |
400 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 | 416 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 76212b2a99de..2ad1c37b8dfe 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -205,47 +205,6 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
205 | } | 205 | } |
206 | 206 | ||
207 | /** | 207 | /** |
208 | * clockevents_request_device | ||
209 | */ | ||
210 | struct clock_event_device *clockevents_request_device(unsigned int features, | ||
211 | cpumask_t cpumask) | ||
212 | { | ||
213 | struct clock_event_device *cur, *dev = NULL; | ||
214 | struct list_head *tmp; | ||
215 | |||
216 | spin_lock(&clockevents_lock); | ||
217 | |||
218 | list_for_each(tmp, &clockevent_devices) { | ||
219 | cur = list_entry(tmp, struct clock_event_device, list); | ||
220 | |||
221 | if ((cur->features & features) == features && | ||
222 | cpus_equal(cpumask, cur->cpumask)) { | ||
223 | if (!dev || dev->rating < cur->rating) | ||
224 | dev = cur; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | clockevents_exchange_device(NULL, dev); | ||
229 | |||
230 | spin_unlock(&clockevents_lock); | ||
231 | |||
232 | return dev; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * clockevents_release_device | ||
237 | */ | ||
238 | void clockevents_release_device(struct clock_event_device *dev) | ||
239 | { | ||
240 | spin_lock(&clockevents_lock); | ||
241 | |||
242 | clockevents_exchange_device(dev, NULL); | ||
243 | clockevents_notify_released(); | ||
244 | |||
245 | spin_unlock(&clockevents_lock); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * clockevents_notify - notification about relevant events | 208 | * clockevents_notify - notification about relevant events |
250 | */ | 209 | */ |
251 | void clockevents_notify(unsigned long reason, void *arg) | 210 | void clockevents_notify(unsigned long reason, void *arg) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cf53bb5814cb..438c6b723ee2 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -13,7 +13,7 @@ | |||
13 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
14 | #include <linux/jiffies.h> | 14 | #include <linux/jiffies.h> |
15 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
16 | 16 | #include <linux/capability.h> | |
17 | #include <asm/div64.h> | 17 | #include <asm/div64.h> |
18 | #include <asm/timex.h> | 18 | #include <asm/timex.h> |
19 | 19 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3d1042f82a68..728cedfd3cbd 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -36,9 +36,17 @@ EXPORT_SYMBOL(xtime_lock); | |||
36 | * at zero at system boot time, so wall_to_monotonic will be negative, | 36 | * at zero at system boot time, so wall_to_monotonic will be negative, |
37 | * however, we will ALWAYS keep the tv_nsec part positive so we can use | 37 | * however, we will ALWAYS keep the tv_nsec part positive so we can use |
38 | * the usual normalization. | 38 | * the usual normalization. |
39 | * | ||
40 | * wall_to_monotonic is moved after resume from suspend for the monotonic | ||
41 | * time not to jump. We need to add total_sleep_time to wall_to_monotonic | ||
42 | * to get the real boot based time offset. | ||
43 | * | ||
44 | * - wall_to_monotonic is no longer the boot time, getboottime must be | ||
45 | * used instead. | ||
39 | */ | 46 | */ |
40 | struct timespec xtime __attribute__ ((aligned (16))); | 47 | struct timespec xtime __attribute__ ((aligned (16))); |
41 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); | 48 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); |
49 | static unsigned long total_sleep_time; /* seconds */ | ||
42 | 50 | ||
43 | EXPORT_SYMBOL(xtime); | 51 | EXPORT_SYMBOL(xtime); |
44 | 52 | ||
@@ -251,6 +259,7 @@ void __init timekeeping_init(void) | |||
251 | xtime.tv_nsec = 0; | 259 | xtime.tv_nsec = 0; |
252 | set_normalized_timespec(&wall_to_monotonic, | 260 | set_normalized_timespec(&wall_to_monotonic, |
253 | -xtime.tv_sec, -xtime.tv_nsec); | 261 | -xtime.tv_sec, -xtime.tv_nsec); |
262 | total_sleep_time = 0; | ||
254 | 263 | ||
255 | write_sequnlock_irqrestore(&xtime_lock, flags); | 264 | write_sequnlock_irqrestore(&xtime_lock, flags); |
256 | } | 265 | } |
@@ -282,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
282 | 291 | ||
283 | xtime.tv_sec += sleep_length; | 292 | xtime.tv_sec += sleep_length; |
284 | wall_to_monotonic.tv_sec -= sleep_length; | 293 | wall_to_monotonic.tv_sec -= sleep_length; |
294 | total_sleep_time += sleep_length; | ||
285 | } | 295 | } |
286 | /* re-base the last cycle value */ | 296 | /* re-base the last cycle value */ |
287 | clock->cycle_last = clocksource_read(clock); | 297 | clock->cycle_last = clocksource_read(clock); |
@@ -476,3 +486,30 @@ void update_wall_time(void) | |||
476 | change_clocksource(); | 486 | change_clocksource(); |
477 | update_vsyscall(&xtime, clock); | 487 | update_vsyscall(&xtime, clock); |
478 | } | 488 | } |
489 | |||
490 | /** | ||
491 | * getboottime - Return the real time of system boot. | ||
492 | * @ts: pointer to the timespec to be set | ||
493 | * | ||
494 | * Returns the time of day in a timespec. | ||
495 | * | ||
496 | * This is based on the wall_to_monotonic offset and the total suspend | ||
497 | * time. Calls to settimeofday will affect the value returned (which | ||
498 | * basically means that however wrong your real time clock is at boot time, | ||
499 | * you get the right time here). | ||
500 | */ | ||
501 | void getboottime(struct timespec *ts) | ||
502 | { | ||
503 | set_normalized_timespec(ts, | ||
504 | - (wall_to_monotonic.tv_sec + total_sleep_time), | ||
505 | - wall_to_monotonic.tv_nsec); | ||
506 | } | ||
507 | |||
508 | /** | ||
509 | * monotonic_to_bootbased - Convert the monotonic time to boot based. | ||
510 | * @ts: pointer to the timespec to be converted | ||
511 | */ | ||
512 | void monotonic_to_bootbased(struct timespec *ts) | ||
513 | { | ||
514 | ts->tv_sec += total_sleep_time; | ||
515 | } | ||
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 321693724ad7..9b8a826236dd 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
@@ -68,6 +68,7 @@ struct entry { | |||
68 | * Number of timeout events: | 68 | * Number of timeout events: |
69 | */ | 69 | */ |
70 | unsigned long count; | 70 | unsigned long count; |
71 | unsigned int timer_flag; | ||
71 | 72 | ||
72 | /* | 73 | /* |
73 | * We save the command-line string to preserve | 74 | * We save the command-line string to preserve |
@@ -231,7 +232,8 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
231 | * incremented. Otherwise the timer is registered in a free slot. | 232 | * incremented. Otherwise the timer is registered in a free slot. |
232 | */ | 233 | */ |
233 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | 234 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, |
234 | void *timerf, char * comm) | 235 | void *timerf, char *comm, |
236 | unsigned int timer_flag) | ||
235 | { | 237 | { |
236 | /* | 238 | /* |
237 | * It doesnt matter which lock we take: | 239 | * It doesnt matter which lock we take: |
@@ -249,6 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
249 | input.start_func = startf; | 251 | input.start_func = startf; |
250 | input.expire_func = timerf; | 252 | input.expire_func = timerf; |
251 | input.pid = pid; | 253 | input.pid = pid; |
254 | input.timer_flag = timer_flag; | ||
252 | 255 | ||
253 | spin_lock_irqsave(lock, flags); | 256 | spin_lock_irqsave(lock, flags); |
254 | if (!active) | 257 | if (!active) |
@@ -295,7 +298,7 @@ static int tstats_show(struct seq_file *m, void *v) | |||
295 | period = ktime_to_timespec(time); | 298 | period = ktime_to_timespec(time); |
296 | ms = period.tv_nsec / 1000000; | 299 | ms = period.tv_nsec / 1000000; |
297 | 300 | ||
298 | seq_puts(m, "Timer Stats Version: v0.1\n"); | 301 | seq_puts(m, "Timer Stats Version: v0.2\n"); |
299 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | 302 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); |
300 | if (atomic_read(&overflow_count)) | 303 | if (atomic_read(&overflow_count)) |
301 | seq_printf(m, "Overflow: %d entries\n", | 304 | seq_printf(m, "Overflow: %d entries\n", |
@@ -303,8 +306,13 @@ static int tstats_show(struct seq_file *m, void *v) | |||
303 | 306 | ||
304 | for (i = 0; i < nr_entries; i++) { | 307 | for (i = 0; i < nr_entries; i++) { |
305 | entry = entries + i; | 308 | entry = entries + i; |
306 | seq_printf(m, "%4lu, %5d %-16s ", | 309 | if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) { |
310 | seq_printf(m, "%4luD, %5d %-16s ", | ||
307 | entry->count, entry->pid, entry->comm); | 311 | entry->count, entry->pid, entry->comm); |
312 | } else { | ||
313 | seq_printf(m, " %4lu, %5d %-16s ", | ||
314 | entry->count, entry->pid, entry->comm); | ||
315 | } | ||
308 | 316 | ||
309 | print_name_offset(m, (unsigned long)entry->start_func); | 317 | print_name_offset(m, (unsigned long)entry->start_func); |
310 | seq_puts(m, " ("); | 318 | seq_puts(m, " ("); |
diff --git a/kernel/timer.c b/kernel/timer.c index 1a69705c2fb9..1258371e0d2b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -305,6 +305,20 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | |||
305 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | 305 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
306 | timer->start_pid = current->pid; | 306 | timer->start_pid = current->pid; |
307 | } | 307 | } |
308 | |||
309 | static void timer_stats_account_timer(struct timer_list *timer) | ||
310 | { | ||
311 | unsigned int flag = 0; | ||
312 | |||
313 | if (unlikely(tbase_get_deferrable(timer->base))) | ||
314 | flag |= TIMER_STATS_FLAG_DEFERRABLE; | ||
315 | |||
316 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
317 | timer->function, timer->start_comm, flag); | ||
318 | } | ||
319 | |||
320 | #else | ||
321 | static void timer_stats_account_timer(struct timer_list *timer) {} | ||
308 | #endif | 322 | #endif |
309 | 323 | ||
310 | /** | 324 | /** |
@@ -1114,6 +1128,7 @@ int do_sysinfo(struct sysinfo *info) | |||
1114 | getnstimeofday(&tp); | 1128 | getnstimeofday(&tp); |
1115 | tp.tv_sec += wall_to_monotonic.tv_sec; | 1129 | tp.tv_sec += wall_to_monotonic.tv_sec; |
1116 | tp.tv_nsec += wall_to_monotonic.tv_nsec; | 1130 | tp.tv_nsec += wall_to_monotonic.tv_nsec; |
1131 | monotonic_to_bootbased(&tp); | ||
1117 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { | 1132 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { |
1118 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; | 1133 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; |
1119 | tp.tv_sec++; | 1134 | tp.tv_sec++; |
diff --git a/kernel/user.c b/kernel/user.c index 4869563080e9..98b82507797a 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -14,20 +14,19 @@ | |||
14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
15 | #include <linux/key.h> | 15 | #include <linux/key.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/module.h> | ||
18 | #include <linux/user_namespace.h> | ||
17 | 19 | ||
18 | /* | 20 | /* |
19 | * UID task count cache, to get fast user lookup in "alloc_uid" | 21 | * UID task count cache, to get fast user lookup in "alloc_uid" |
20 | * when changing user ID's (ie setuid() and friends). | 22 | * when changing user ID's (ie setuid() and friends). |
21 | */ | 23 | */ |
22 | 24 | ||
23 | #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) | ||
24 | #define UIDHASH_SZ (1 << UIDHASH_BITS) | ||
25 | #define UIDHASH_MASK (UIDHASH_SZ - 1) | 25 | #define UIDHASH_MASK (UIDHASH_SZ - 1) |
26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) | 26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) |
27 | #define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) | 27 | #define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid))) |
28 | 28 | ||
29 | static struct kmem_cache *uid_cachep; | 29 | static struct kmem_cache *uid_cachep; |
30 | static struct list_head uidhash_table[UIDHASH_SZ]; | ||
31 | 30 | ||
32 | /* | 31 | /* |
33 | * The uidhash_lock is mostly taken from process context, but it is | 32 | * The uidhash_lock is mostly taken from process context, but it is |
@@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid) | |||
94 | { | 93 | { |
95 | struct user_struct *ret; | 94 | struct user_struct *ret; |
96 | unsigned long flags; | 95 | unsigned long flags; |
96 | struct user_namespace *ns = current->nsproxy->user_ns; | ||
97 | 97 | ||
98 | spin_lock_irqsave(&uidhash_lock, flags); | 98 | spin_lock_irqsave(&uidhash_lock, flags); |
99 | ret = uid_hash_find(uid, uidhashentry(uid)); | 99 | ret = uid_hash_find(uid, uidhashentry(ns, uid)); |
100 | spin_unlock_irqrestore(&uidhash_lock, flags); | 100 | spin_unlock_irqrestore(&uidhash_lock, flags); |
101 | return ret; | 101 | return ret; |
102 | } | 102 | } |
@@ -120,9 +120,9 @@ void free_uid(struct user_struct *up) | |||
120 | } | 120 | } |
121 | } | 121 | } |
122 | 122 | ||
123 | struct user_struct * alloc_uid(uid_t uid) | 123 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) |
124 | { | 124 | { |
125 | struct list_head *hashent = uidhashentry(uid); | 125 | struct list_head *hashent = uidhashentry(ns, uid); |
126 | struct user_struct *up; | 126 | struct user_struct *up; |
127 | 127 | ||
128 | spin_lock_irq(&uidhash_lock); | 128 | spin_lock_irq(&uidhash_lock); |
@@ -211,11 +211,11 @@ static int __init uid_cache_init(void) | |||
211 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 211 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
212 | 212 | ||
213 | for(n = 0; n < UIDHASH_SZ; ++n) | 213 | for(n = 0; n < UIDHASH_SZ; ++n) |
214 | INIT_LIST_HEAD(uidhash_table + n); | 214 | INIT_LIST_HEAD(init_user_ns.uidhash_table + n); |
215 | 215 | ||
216 | /* Insert the root user immediately (init already runs as root) */ | 216 | /* Insert the root user immediately (init already runs as root) */ |
217 | spin_lock_irq(&uidhash_lock); | 217 | spin_lock_irq(&uidhash_lock); |
218 | uid_hash_insert(&root_user, uidhashentry(0)); | 218 | uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0)); |
219 | spin_unlock_irq(&uidhash_lock); | 219 | spin_unlock_irq(&uidhash_lock); |
220 | 220 | ||
221 | return 0; | 221 | return 0; |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c new file mode 100644 index 000000000000..d055d987850c --- /dev/null +++ b/kernel/user_namespace.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License as | ||
4 | * published by the Free Software Foundation, version 2 of the | ||
5 | * License. | ||
6 | */ | ||
7 | |||
8 | #include <linux/module.h> | ||
9 | #include <linux/version.h> | ||
10 | #include <linux/nsproxy.h> | ||
11 | #include <linux/user_namespace.h> | ||
12 | |||
13 | struct user_namespace init_user_ns = { | ||
14 | .kref = { | ||
15 | .refcount = ATOMIC_INIT(2), | ||
16 | }, | ||
17 | .root_user = &root_user, | ||
18 | }; | ||
19 | |||
20 | EXPORT_SYMBOL_GPL(init_user_ns); | ||
21 | |||
22 | #ifdef CONFIG_USER_NS | ||
23 | |||
24 | /* | ||
25 | * Clone a new ns copying an original user ns, setting refcount to 1 | ||
26 | * @old_ns: namespace to clone | ||
27 | * Return NULL on error (failure to kmalloc), new ns otherwise | ||
28 | */ | ||
29 | static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) | ||
30 | { | ||
31 | struct user_namespace *ns; | ||
32 | struct user_struct *new_user; | ||
33 | int n; | ||
34 | |||
35 | ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); | ||
36 | if (!ns) | ||
37 | return ERR_PTR(-ENOMEM); | ||
38 | |||
39 | kref_init(&ns->kref); | ||
40 | |||
41 | for (n = 0; n < UIDHASH_SZ; ++n) | ||
42 | INIT_LIST_HEAD(ns->uidhash_table + n); | ||
43 | |||
44 | /* Insert new root user. */ | ||
45 | ns->root_user = alloc_uid(ns, 0); | ||
46 | if (!ns->root_user) { | ||
47 | kfree(ns); | ||
48 | return ERR_PTR(-ENOMEM); | ||
49 | } | ||
50 | |||
51 | /* Reset current->user with a new one */ | ||
52 | new_user = alloc_uid(ns, current->uid); | ||
53 | if (!new_user) { | ||
54 | free_uid(ns->root_user); | ||
55 | kfree(ns); | ||
56 | return ERR_PTR(-ENOMEM); | ||
57 | } | ||
58 | |||
59 | switch_uid(new_user); | ||
60 | return ns; | ||
61 | } | ||
62 | |||
63 | struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) | ||
64 | { | ||
65 | struct user_namespace *new_ns; | ||
66 | |||
67 | BUG_ON(!old_ns); | ||
68 | get_user_ns(old_ns); | ||
69 | |||
70 | if (!(flags & CLONE_NEWUSER)) | ||
71 | return old_ns; | ||
72 | |||
73 | new_ns = clone_user_ns(old_ns); | ||
74 | |||
75 | put_user_ns(old_ns); | ||
76 | return new_ns; | ||
77 | } | ||
78 | |||
79 | void free_user_ns(struct kref *kref) | ||
80 | { | ||
81 | struct user_namespace *ns; | ||
82 | |||
83 | ns = container_of(kref, struct user_namespace, kref); | ||
84 | kfree(ns); | ||
85 | } | ||
86 | |||
87 | #endif /* CONFIG_USER_NS */ | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index 160c8c5136bd..9d8180a0f0d8 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/uts.h> | 13 | #include <linux/uts.h> |
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <linux/version.h> | 15 | #include <linux/version.h> |
16 | #include <linux/err.h> | ||
16 | 17 | ||
17 | /* | 18 | /* |
18 | * Clone a new ns copying an original utsname, setting refcount to 1 | 19 | * Clone a new ns copying an original utsname, setting refcount to 1 |
@@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
24 | struct uts_namespace *ns; | 25 | struct uts_namespace *ns; |
25 | 26 | ||
26 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); | 27 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); |
27 | if (ns) { | 28 | if (!ns) |
28 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 29 | return ERR_PTR(-ENOMEM); |
29 | kref_init(&ns->kref); | 30 | |
30 | } | 31 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
32 | kref_init(&ns->kref); | ||
31 | return ns; | 33 | return ns; |
32 | } | 34 | } |
33 | 35 | ||
@@ -37,7 +39,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
37 | * utsname of this process won't be seen by parent, and vice | 39 | * utsname of this process won't be seen by parent, and vice |
38 | * versa. | 40 | * versa. |
39 | */ | 41 | */ |
40 | struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) | 42 | struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) |
41 | { | 43 | { |
42 | struct uts_namespace *new_ns; | 44 | struct uts_namespace *new_ns; |
43 | 45 | ||
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index f22b9dbd2a9c..c76c06466bfd 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c | |||
@@ -18,10 +18,7 @@ | |||
18 | static void *get_uts(ctl_table *table, int write) | 18 | static void *get_uts(ctl_table *table, int write) |
19 | { | 19 | { |
20 | char *which = table->data; | 20 | char *which = table->data; |
21 | #ifdef CONFIG_UTS_NS | 21 | |
22 | struct uts_namespace *uts_ns = current->nsproxy->uts_ns; | ||
23 | which = (which - (char *)&init_uts_ns) + (char *)uts_ns; | ||
24 | #endif | ||
25 | if (!write) | 22 | if (!write) |
26 | down_read(&uts_sem); | 23 | down_read(&uts_sem); |
27 | else | 24 | else |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3bebf73be976..d7d3fa3072e5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) | |||
382 | EXPORT_SYMBOL_GPL(flush_workqueue); | 382 | EXPORT_SYMBOL_GPL(flush_workqueue); |
383 | 383 | ||
384 | /* | 384 | /* |
385 | * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit, | 385 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, |
386 | * so this work can't be re-armed in any way. | 386 | * so this work can't be re-armed in any way. |
387 | */ | 387 | */ |
388 | static int try_to_grab_pending(struct work_struct *work) | 388 | static int try_to_grab_pending(struct work_struct *work) |
389 | { | 389 | { |
390 | struct cpu_workqueue_struct *cwq; | 390 | struct cpu_workqueue_struct *cwq; |
391 | int ret = 0; | 391 | int ret = -1; |
392 | 392 | ||
393 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) | 393 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) |
394 | return 1; | 394 | return 0; |
395 | 395 | ||
396 | /* | 396 | /* |
397 | * The queueing is in progress, or it is already queued. Try to | 397 | * The queueing is in progress, or it is already queued. Try to |
@@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work) | |||
457 | wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); | 457 | wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); |
458 | } | 458 | } |
459 | 459 | ||
460 | static int __cancel_work_timer(struct work_struct *work, | ||
461 | struct timer_list* timer) | ||
462 | { | ||
463 | int ret; | ||
464 | |||
465 | do { | ||
466 | ret = (timer && likely(del_timer(timer))); | ||
467 | if (!ret) | ||
468 | ret = try_to_grab_pending(work); | ||
469 | wait_on_work(work); | ||
470 | } while (unlikely(ret < 0)); | ||
471 | |||
472 | work_clear_pending(work); | ||
473 | return ret; | ||
474 | } | ||
475 | |||
460 | /** | 476 | /** |
461 | * cancel_work_sync - block until a work_struct's callback has terminated | 477 | * cancel_work_sync - block until a work_struct's callback has terminated |
462 | * @work: the work which is to be flushed | 478 | * @work: the work which is to be flushed |
463 | * | 479 | * |
480 | * Returns true if @work was pending. | ||
481 | * | ||
464 | * cancel_work_sync() will cancel the work if it is queued. If the work's | 482 | * cancel_work_sync() will cancel the work if it is queued. If the work's |
465 | * callback appears to be running, cancel_work_sync() will block until it | 483 | * callback appears to be running, cancel_work_sync() will block until it |
466 | * has completed. | 484 | * has completed. |
@@ -476,31 +494,26 @@ static void wait_on_work(struct work_struct *work) | |||
476 | * The caller must ensure that workqueue_struct on which this work was last | 494 | * The caller must ensure that workqueue_struct on which this work was last |
477 | * queued can't be destroyed before this function returns. | 495 | * queued can't be destroyed before this function returns. |
478 | */ | 496 | */ |
479 | void cancel_work_sync(struct work_struct *work) | 497 | int cancel_work_sync(struct work_struct *work) |
480 | { | 498 | { |
481 | while (!try_to_grab_pending(work)) | 499 | return __cancel_work_timer(work, NULL); |
482 | cpu_relax(); | ||
483 | wait_on_work(work); | ||
484 | work_clear_pending(work); | ||
485 | } | 500 | } |
486 | EXPORT_SYMBOL_GPL(cancel_work_sync); | 501 | EXPORT_SYMBOL_GPL(cancel_work_sync); |
487 | 502 | ||
488 | /** | 503 | /** |
489 | * cancel_rearming_delayed_work - reliably kill off a delayed work. | 504 | * cancel_delayed_work_sync - reliably kill off a delayed work. |
490 | * @dwork: the delayed work struct | 505 | * @dwork: the delayed work struct |
491 | * | 506 | * |
507 | * Returns true if @dwork was pending. | ||
508 | * | ||
492 | * It is possible to use this function if @dwork rearms itself via queue_work() | 509 | * It is possible to use this function if @dwork rearms itself via queue_work() |
493 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). | 510 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). |
494 | */ | 511 | */ |
495 | void cancel_rearming_delayed_work(struct delayed_work *dwork) | 512 | int cancel_delayed_work_sync(struct delayed_work *dwork) |
496 | { | 513 | { |
497 | while (!del_timer(&dwork->timer) && | 514 | return __cancel_work_timer(&dwork->work, &dwork->timer); |
498 | !try_to_grab_pending(&dwork->work)) | ||
499 | cpu_relax(); | ||
500 | wait_on_work(&dwork->work); | ||
501 | work_clear_pending(&dwork->work); | ||
502 | } | 515 | } |
503 | EXPORT_SYMBOL(cancel_rearming_delayed_work); | 516 | EXPORT_SYMBOL(cancel_delayed_work_sync); |
504 | 517 | ||
505 | static struct workqueue_struct *keventd_wq __read_mostly; | 518 | static struct workqueue_struct *keventd_wq __read_mostly; |
506 | 519 | ||