diff options
Diffstat (limited to 'kernel')
47 files changed, 766 insertions, 339 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 642d4277c2ea..2a999836ca18 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -4,11 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ |
| 6 | exit.o itimer.o time.o softirq.o resource.o \ | 6 | exit.o itimer.o time.o softirq.o resource.o \ |
| 7 | sysctl.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ |
| 8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
| 9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
| 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
| 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o | 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ |
| 12 | utsname.o | ||
| 12 | 13 | ||
| 13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 14 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
| 14 | obj-y += time/ | 15 | obj-y += time/ |
| @@ -48,7 +49,6 @@ obj-$(CONFIG_SECCOMP) += seccomp.o | |||
| 48 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
| 49 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
| 50 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 51 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
| 51 | obj-$(CONFIG_UTS_NS) += utsname.o | ||
| 52 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 52 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
| 53 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 53 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
| 54 | 54 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index d13276d41410..eb0f9165b401 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -58,6 +58,7 @@ | |||
| 58 | #include <linux/selinux.h> | 58 | #include <linux/selinux.h> |
| 59 | #include <linux/inotify.h> | 59 | #include <linux/inotify.h> |
| 60 | #include <linux/freezer.h> | 60 | #include <linux/freezer.h> |
| 61 | #include <linux/tty.h> | ||
| 61 | 62 | ||
| 62 | #include "audit.h" | 63 | #include "audit.h" |
| 63 | 64 | ||
| @@ -391,6 +392,7 @@ static int kauditd_thread(void *dummy) | |||
| 391 | { | 392 | { |
| 392 | struct sk_buff *skb; | 393 | struct sk_buff *skb; |
| 393 | 394 | ||
| 395 | set_freezable(); | ||
| 394 | while (!kthread_should_stop()) { | 396 | while (!kthread_should_stop()) { |
| 395 | skb = skb_dequeue(&audit_skb_queue); | 397 | skb = skb_dequeue(&audit_skb_queue); |
| 396 | wake_up(&audit_backlog_wait); | 398 | wake_up(&audit_backlog_wait); |
| @@ -423,6 +425,31 @@ static int kauditd_thread(void *dummy) | |||
| 423 | return 0; | 425 | return 0; |
| 424 | } | 426 | } |
| 425 | 427 | ||
| 428 | static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) | ||
| 429 | { | ||
| 430 | struct task_struct *tsk; | ||
| 431 | int err; | ||
| 432 | |||
| 433 | read_lock(&tasklist_lock); | ||
| 434 | tsk = find_task_by_pid(pid); | ||
| 435 | err = -ESRCH; | ||
| 436 | if (!tsk) | ||
| 437 | goto out; | ||
| 438 | err = 0; | ||
| 439 | |||
| 440 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 441 | if (!tsk->signal->audit_tty) | ||
| 442 | err = -EPERM; | ||
| 443 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 444 | if (err) | ||
| 445 | goto out; | ||
| 446 | |||
| 447 | tty_audit_push_task(tsk, loginuid); | ||
| 448 | out: | ||
| 449 | read_unlock(&tasklist_lock); | ||
| 450 | return err; | ||
| 451 | } | ||
| 452 | |||
| 426 | int audit_send_list(void *_dest) | 453 | int audit_send_list(void *_dest) |
| 427 | { | 454 | { |
| 428 | struct audit_netlink_list *dest = _dest; | 455 | struct audit_netlink_list *dest = _dest; |
| @@ -511,6 +538,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
| 511 | case AUDIT_DEL: | 538 | case AUDIT_DEL: |
| 512 | case AUDIT_DEL_RULE: | 539 | case AUDIT_DEL_RULE: |
| 513 | case AUDIT_SIGNAL_INFO: | 540 | case AUDIT_SIGNAL_INFO: |
| 541 | case AUDIT_TTY_GET: | ||
| 542 | case AUDIT_TTY_SET: | ||
| 514 | if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) | 543 | if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) |
| 515 | err = -EPERM; | 544 | err = -EPERM; |
| 516 | break; | 545 | break; |
| @@ -622,6 +651,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 622 | err = audit_filter_user(&NETLINK_CB(skb), msg_type); | 651 | err = audit_filter_user(&NETLINK_CB(skb), msg_type); |
| 623 | if (err == 1) { | 652 | if (err == 1) { |
| 624 | err = 0; | 653 | err = 0; |
| 654 | if (msg_type == AUDIT_USER_TTY) { | ||
| 655 | err = audit_prepare_user_tty(pid, loginuid); | ||
| 656 | if (err) | ||
| 657 | break; | ||
| 658 | } | ||
| 625 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 659 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
| 626 | if (ab) { | 660 | if (ab) { |
| 627 | audit_log_format(ab, | 661 | audit_log_format(ab, |
| @@ -638,8 +672,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 638 | " subj=%s", ctx); | 672 | " subj=%s", ctx); |
| 639 | kfree(ctx); | 673 | kfree(ctx); |
| 640 | } | 674 | } |
| 641 | audit_log_format(ab, " msg='%.1024s'", | 675 | if (msg_type != AUDIT_USER_TTY) |
| 642 | (char *)data); | 676 | audit_log_format(ab, " msg='%.1024s'", |
| 677 | (char *)data); | ||
| 678 | else { | ||
| 679 | int size; | ||
| 680 | |||
| 681 | audit_log_format(ab, " msg="); | ||
| 682 | size = nlmsg_len(nlh); | ||
| 683 | audit_log_n_untrustedstring(ab, size, | ||
| 684 | data); | ||
| 685 | } | ||
| 643 | audit_set_pid(ab, pid); | 686 | audit_set_pid(ab, pid); |
| 644 | audit_log_end(ab); | 687 | audit_log_end(ab); |
| 645 | } | 688 | } |
| @@ -730,6 +773,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 730 | 0, 0, sig_data, sizeof(*sig_data) + len); | 773 | 0, 0, sig_data, sizeof(*sig_data) + len); |
| 731 | kfree(sig_data); | 774 | kfree(sig_data); |
| 732 | break; | 775 | break; |
| 776 | case AUDIT_TTY_GET: { | ||
| 777 | struct audit_tty_status s; | ||
| 778 | struct task_struct *tsk; | ||
| 779 | |||
| 780 | read_lock(&tasklist_lock); | ||
| 781 | tsk = find_task_by_pid(pid); | ||
| 782 | if (!tsk) | ||
| 783 | err = -ESRCH; | ||
| 784 | else { | ||
| 785 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 786 | s.enabled = tsk->signal->audit_tty != 0; | ||
| 787 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 788 | } | ||
| 789 | read_unlock(&tasklist_lock); | ||
| 790 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, | ||
| 791 | &s, sizeof(s)); | ||
| 792 | break; | ||
| 793 | } | ||
| 794 | case AUDIT_TTY_SET: { | ||
| 795 | struct audit_tty_status *s; | ||
| 796 | struct task_struct *tsk; | ||
| 797 | |||
| 798 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) | ||
| 799 | return -EINVAL; | ||
| 800 | s = data; | ||
| 801 | if (s->enabled != 0 && s->enabled != 1) | ||
| 802 | return -EINVAL; | ||
| 803 | read_lock(&tasklist_lock); | ||
| 804 | tsk = find_task_by_pid(pid); | ||
| 805 | if (!tsk) | ||
| 806 | err = -ESRCH; | ||
| 807 | else { | ||
| 808 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 809 | tsk->signal->audit_tty = s->enabled != 0; | ||
| 810 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 811 | } | ||
| 812 | read_unlock(&tasklist_lock); | ||
| 813 | break; | ||
| 814 | } | ||
| 733 | default: | 815 | default: |
| 734 | err = -EINVAL; | 816 | err = -EINVAL; |
| 735 | break; | 817 | break; |
| @@ -1185,7 +1267,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
| 1185 | } | 1267 | } |
| 1186 | 1268 | ||
| 1187 | /** | 1269 | /** |
| 1188 | * audit_log_n_unstrustedstring - log a string that may contain random characters | 1270 | * audit_log_n_untrustedstring - log a string that may contain random characters |
| 1189 | * @ab: audit_buffer | 1271 | * @ab: audit_buffer |
| 1190 | * @len: lenth of string (not including trailing null) | 1272 | * @len: lenth of string (not including trailing null) |
| 1191 | * @string: string to be logged | 1273 | * @string: string to be logged |
| @@ -1201,25 +1283,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
| 1201 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, | 1283 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, |
| 1202 | const char *string) | 1284 | const char *string) |
| 1203 | { | 1285 | { |
| 1204 | const unsigned char *p = string; | 1286 | const unsigned char *p; |
| 1205 | 1287 | ||
| 1206 | while (*p) { | 1288 | for (p = string; p < (const unsigned char *)string + len && *p; p++) { |
| 1207 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { | 1289 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { |
| 1208 | audit_log_hex(ab, string, len); | 1290 | audit_log_hex(ab, string, len); |
| 1209 | return string + len + 1; | 1291 | return string + len + 1; |
| 1210 | } | 1292 | } |
| 1211 | p++; | ||
| 1212 | } | 1293 | } |
| 1213 | audit_log_n_string(ab, len, string); | 1294 | audit_log_n_string(ab, len, string); |
| 1214 | return p + 1; | 1295 | return p + 1; |
| 1215 | } | 1296 | } |
| 1216 | 1297 | ||
| 1217 | /** | 1298 | /** |
| 1218 | * audit_log_unstrustedstring - log a string that may contain random characters | 1299 | * audit_log_untrustedstring - log a string that may contain random characters |
| 1219 | * @ab: audit_buffer | 1300 | * @ab: audit_buffer |
| 1220 | * @string: string to be logged | 1301 | * @string: string to be logged |
| 1221 | * | 1302 | * |
| 1222 | * Same as audit_log_n_unstrustedstring(), except that strlen is used to | 1303 | * Same as audit_log_n_untrustedstring(), except that strlen is used to |
| 1223 | * determine string length. | 1304 | * determine string length. |
| 1224 | */ | 1305 | */ |
| 1225 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | 1306 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) |
diff --git a/kernel/audit.h b/kernel/audit.h index 815d6f5c04ee..95877435c347 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | |||
| 115 | extern void audit_send_reply(int pid, int seq, int type, | 115 | extern void audit_send_reply(int pid, int seq, int type, |
| 116 | int done, int multi, | 116 | int done, int multi, |
| 117 | void *payload, int size); | 117 | void *payload, int size); |
| 118 | extern void audit_log_lost(const char *message); | ||
| 119 | extern void audit_panic(const char *message); | 118 | extern void audit_panic(const char *message); |
| 120 | 119 | ||
| 121 | struct audit_netlink_list { | 120 | struct audit_netlink_list { |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e36481ed61b4..b7640a5f382a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -71,9 +71,6 @@ | |||
| 71 | 71 | ||
| 72 | extern struct list_head audit_filter_list[]; | 72 | extern struct list_head audit_filter_list[]; |
| 73 | 73 | ||
| 74 | /* No syscall auditing will take place unless audit_enabled != 0. */ | ||
| 75 | extern int audit_enabled; | ||
| 76 | |||
| 77 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context | 74 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context |
| 78 | * for saving names from getname(). */ | 75 | * for saving names from getname(). */ |
| 79 | #define AUDIT_NAMES 20 | 76 | #define AUDIT_NAMES 20 |
| @@ -2040,7 +2037,7 @@ int __audit_signal_info(int sig, struct task_struct *t) | |||
| 2040 | 2037 | ||
| 2041 | /** | 2038 | /** |
| 2042 | * audit_core_dumps - record information about processes that end abnormally | 2039 | * audit_core_dumps - record information about processes that end abnormally |
| 2043 | * @sig: signal value | 2040 | * @signr: signal value |
| 2044 | * | 2041 | * |
| 2045 | * If a process ends with a core dump, something fishy is going on and we | 2042 | * If a process ends with a core dump, something fishy is going on and we |
| 2046 | * should record the event for investigation. | 2043 | * should record the event for investigation. |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c4d123f74bd3..b4796d850140 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -981,10 +981,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 981 | mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); | 981 | mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); |
| 982 | if (!mmarray) | 982 | if (!mmarray) |
| 983 | goto done; | 983 | goto done; |
| 984 | write_lock_irq(&tasklist_lock); /* block fork */ | 984 | read_lock(&tasklist_lock); /* block fork */ |
| 985 | if (atomic_read(&cs->count) <= ntasks) | 985 | if (atomic_read(&cs->count) <= ntasks) |
| 986 | break; /* got enough */ | 986 | break; /* got enough */ |
| 987 | write_unlock_irq(&tasklist_lock); /* try again */ | 987 | read_unlock(&tasklist_lock); /* try again */ |
| 988 | kfree(mmarray); | 988 | kfree(mmarray); |
| 989 | } | 989 | } |
| 990 | 990 | ||
| @@ -1006,7 +1006,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 1006 | continue; | 1006 | continue; |
| 1007 | mmarray[n++] = mm; | 1007 | mmarray[n++] = mm; |
| 1008 | } while_each_thread(g, p); | 1008 | } while_each_thread(g, p); |
| 1009 | write_unlock_irq(&tasklist_lock); | 1009 | read_unlock(&tasklist_lock); |
| 1010 | 1010 | ||
| 1011 | /* | 1011 | /* |
| 1012 | * Now that we've dropped the tasklist spinlock, we can | 1012 | * Now that we've dropped the tasklist spinlock, we can |
diff --git a/kernel/exit.c b/kernel/exit.c index ca6a11b73023..e8af8d0c2483 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/mempolicy.h> | 31 | #include <linux/mempolicy.h> |
| 32 | #include <linux/taskstats_kern.h> | 32 | #include <linux/taskstats_kern.h> |
| 33 | #include <linux/delayacct.h> | 33 | #include <linux/delayacct.h> |
| 34 | #include <linux/freezer.h> | ||
| 34 | #include <linux/cpuset.h> | 35 | #include <linux/cpuset.h> |
| 35 | #include <linux/syscalls.h> | 36 | #include <linux/syscalls.h> |
| 36 | #include <linux/signal.h> | 37 | #include <linux/signal.h> |
| @@ -387,6 +388,11 @@ void daemonize(const char *name, ...) | |||
| 387 | * they would be locked into memory. | 388 | * they would be locked into memory. |
| 388 | */ | 389 | */ |
| 389 | exit_mm(current); | 390 | exit_mm(current); |
| 391 | /* | ||
| 392 | * We don't want to have TIF_FREEZE set if the system-wide hibernation | ||
| 393 | * or suspend transition begins right now. | ||
| 394 | */ | ||
| 395 | current->flags |= PF_NOFREEZE; | ||
| 390 | 396 | ||
| 391 | set_special_pids(1, 1); | 397 | set_special_pids(1, 1); |
| 392 | proc_clear_tty(current); | 398 | proc_clear_tty(current); |
| @@ -858,6 +864,34 @@ static void exit_notify(struct task_struct *tsk) | |||
| 858 | release_task(tsk); | 864 | release_task(tsk); |
| 859 | } | 865 | } |
| 860 | 866 | ||
| 867 | #ifdef CONFIG_DEBUG_STACK_USAGE | ||
| 868 | static void check_stack_usage(void) | ||
| 869 | { | ||
| 870 | static DEFINE_SPINLOCK(low_water_lock); | ||
| 871 | static int lowest_to_date = THREAD_SIZE; | ||
| 872 | unsigned long *n = end_of_stack(current); | ||
| 873 | unsigned long free; | ||
| 874 | |||
| 875 | while (*n == 0) | ||
| 876 | n++; | ||
| 877 | free = (unsigned long)n - (unsigned long)end_of_stack(current); | ||
| 878 | |||
| 879 | if (free >= lowest_to_date) | ||
| 880 | return; | ||
| 881 | |||
| 882 | spin_lock(&low_water_lock); | ||
| 883 | if (free < lowest_to_date) { | ||
| 884 | printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " | ||
| 885 | "left\n", | ||
| 886 | current->comm, free); | ||
| 887 | lowest_to_date = free; | ||
| 888 | } | ||
| 889 | spin_unlock(&low_water_lock); | ||
| 890 | } | ||
| 891 | #else | ||
| 892 | static inline void check_stack_usage(void) {} | ||
| 893 | #endif | ||
| 894 | |||
| 861 | fastcall NORET_TYPE void do_exit(long code) | 895 | fastcall NORET_TYPE void do_exit(long code) |
| 862 | { | 896 | { |
| 863 | struct task_struct *tsk = current; | 897 | struct task_struct *tsk = current; |
| @@ -937,6 +971,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 937 | if (unlikely(tsk->compat_robust_list)) | 971 | if (unlikely(tsk->compat_robust_list)) |
| 938 | compat_exit_robust_list(tsk); | 972 | compat_exit_robust_list(tsk); |
| 939 | #endif | 973 | #endif |
| 974 | if (group_dead) | ||
| 975 | tty_audit_exit(); | ||
| 940 | if (unlikely(tsk->audit_context)) | 976 | if (unlikely(tsk->audit_context)) |
| 941 | audit_free(tsk); | 977 | audit_free(tsk); |
| 942 | 978 | ||
| @@ -949,6 +985,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 949 | exit_sem(tsk); | 985 | exit_sem(tsk); |
| 950 | __exit_files(tsk); | 986 | __exit_files(tsk); |
| 951 | __exit_fs(tsk); | 987 | __exit_fs(tsk); |
| 988 | check_stack_usage(); | ||
| 952 | exit_thread(); | 989 | exit_thread(); |
| 953 | cpuset_exit(tsk); | 990 | cpuset_exit(tsk); |
| 954 | exit_keys(tsk); | 991 | exit_keys(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index da3a155bba0d..ba39bdb2a7b8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/delayacct.h> | 49 | #include <linux/delayacct.h> |
| 50 | #include <linux/taskstats_kern.h> | 50 | #include <linux/taskstats_kern.h> |
| 51 | #include <linux/random.h> | 51 | #include <linux/random.h> |
| 52 | #include <linux/tty.h> | ||
| 52 | 53 | ||
| 53 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
| 54 | #include <asm/pgalloc.h> | 55 | #include <asm/pgalloc.h> |
| @@ -897,6 +898,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
| 897 | } | 898 | } |
| 898 | acct_init_pacct(&sig->pacct); | 899 | acct_init_pacct(&sig->pacct); |
| 899 | 900 | ||
| 901 | tty_audit_fork(sig); | ||
| 902 | |||
| 900 | return 0; | 903 | return 0; |
| 901 | } | 904 | } |
| 902 | 905 | ||
| @@ -920,7 +923,7 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
| 920 | { | 923 | { |
| 921 | unsigned long new_flags = p->flags; | 924 | unsigned long new_flags = p->flags; |
| 922 | 925 | ||
| 923 | new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); | 926 | new_flags &= ~PF_SUPERPRIV; |
| 924 | new_flags |= PF_FORKNOEXEC; | 927 | new_flags |= PF_FORKNOEXEC; |
| 925 | if (!(clone_flags & CLONE_PTRACE)) | 928 | if (!(clone_flags & CLONE_PTRACE)) |
| 926 | p->ptrace = 0; | 929 | p->ptrace = 0; |
| @@ -999,7 +1002,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 999 | if (atomic_read(&p->user->processes) >= | 1002 | if (atomic_read(&p->user->processes) >= |
| 1000 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1003 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { |
| 1001 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1004 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
| 1002 | p->user != &root_user) | 1005 | p->user != current->nsproxy->user_ns->root_user) |
| 1003 | goto bad_fork_free; | 1006 | goto bad_fork_free; |
| 1004 | } | 1007 | } |
| 1005 | 1008 | ||
| @@ -1059,6 +1062,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1059 | 1062 | ||
| 1060 | p->lock_depth = -1; /* -1 = no lock */ | 1063 | p->lock_depth = -1; /* -1 = no lock */ |
| 1061 | do_posix_clock_monotonic_gettime(&p->start_time); | 1064 | do_posix_clock_monotonic_gettime(&p->start_time); |
| 1065 | p->real_start_time = p->start_time; | ||
| 1066 | monotonic_to_bootbased(&p->real_start_time); | ||
| 1062 | p->security = NULL; | 1067 | p->security = NULL; |
| 1063 | p->io_context = NULL; | 1068 | p->io_context = NULL; |
| 1064 | p->io_wait = NULL; | 1069 | p->io_wait = NULL; |
| @@ -1601,7 +1606,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1601 | err = -EINVAL; | 1606 | err = -EINVAL; |
| 1602 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1607 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
| 1603 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1608 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
| 1604 | CLONE_NEWUTS|CLONE_NEWIPC)) | 1609 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) |
| 1605 | goto bad_unshare_out; | 1610 | goto bad_unshare_out; |
| 1606 | 1611 | ||
| 1607 | if ((err = unshare_thread(unshare_flags))) | 1612 | if ((err = unshare_thread(unshare_flags))) |
diff --git a/kernel/futex.c b/kernel/futex.c index 45490bec5831..5c3f45d07c53 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -121,6 +121,24 @@ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; | |||
| 121 | static struct vfsmount *futex_mnt; | 121 | static struct vfsmount *futex_mnt; |
| 122 | 122 | ||
| 123 | /* | 123 | /* |
| 124 | * Take mm->mmap_sem, when futex is shared | ||
| 125 | */ | ||
| 126 | static inline void futex_lock_mm(struct rw_semaphore *fshared) | ||
| 127 | { | ||
| 128 | if (fshared) | ||
| 129 | down_read(fshared); | ||
| 130 | } | ||
| 131 | |||
| 132 | /* | ||
| 133 | * Release mm->mmap_sem, when the futex is shared | ||
| 134 | */ | ||
| 135 | static inline void futex_unlock_mm(struct rw_semaphore *fshared) | ||
| 136 | { | ||
| 137 | if (fshared) | ||
| 138 | up_read(fshared); | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 124 | * We hash on the keys returned from get_futex_key (see below). | 142 | * We hash on the keys returned from get_futex_key (see below). |
| 125 | */ | 143 | */ |
| 126 | static struct futex_hash_bucket *hash_futex(union futex_key *key) | 144 | static struct futex_hash_bucket *hash_futex(union futex_key *key) |
| @@ -287,7 +305,18 @@ void drop_futex_key_refs(union futex_key *key) | |||
| 287 | } | 305 | } |
| 288 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); | 306 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); |
| 289 | 307 | ||
| 290 | static inline int get_futex_value_locked(u32 *dest, u32 __user *from) | 308 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) |
| 309 | { | ||
| 310 | u32 curval; | ||
| 311 | |||
| 312 | pagefault_disable(); | ||
| 313 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
| 314 | pagefault_enable(); | ||
| 315 | |||
| 316 | return curval; | ||
| 317 | } | ||
| 318 | |||
| 319 | static int get_futex_value_locked(u32 *dest, u32 __user *from) | ||
| 291 | { | 320 | { |
| 292 | int ret; | 321 | int ret; |
| 293 | 322 | ||
| @@ -620,9 +649,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
| 620 | 649 | ||
| 621 | newval = FUTEX_WAITERS | new_owner->pid; | 650 | newval = FUTEX_WAITERS | new_owner->pid; |
| 622 | 651 | ||
| 623 | pagefault_disable(); | 652 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
| 624 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
| 625 | pagefault_enable(); | ||
| 626 | 653 | ||
| 627 | if (curval == -EFAULT) | 654 | if (curval == -EFAULT) |
| 628 | ret = -EFAULT; | 655 | ret = -EFAULT; |
| @@ -659,9 +686,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) | |||
| 659 | * There is no waiter, so we unlock the futex. The owner died | 686 | * There is no waiter, so we unlock the futex. The owner died |
| 660 | * bit has not to be preserved here. We are the owner: | 687 | * bit has not to be preserved here. We are the owner: |
| 661 | */ | 688 | */ |
| 662 | pagefault_disable(); | 689 | oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); |
| 663 | oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); | ||
| 664 | pagefault_enable(); | ||
| 665 | 690 | ||
| 666 | if (oldval == -EFAULT) | 691 | if (oldval == -EFAULT) |
| 667 | return oldval; | 692 | return oldval; |
| @@ -700,8 +725,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 700 | union futex_key key; | 725 | union futex_key key; |
| 701 | int ret; | 726 | int ret; |
| 702 | 727 | ||
| 703 | if (fshared) | 728 | futex_lock_mm(fshared); |
| 704 | down_read(fshared); | ||
| 705 | 729 | ||
| 706 | ret = get_futex_key(uaddr, fshared, &key); | 730 | ret = get_futex_key(uaddr, fshared, &key); |
| 707 | if (unlikely(ret != 0)) | 731 | if (unlikely(ret != 0)) |
| @@ -725,8 +749,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 725 | 749 | ||
| 726 | spin_unlock(&hb->lock); | 750 | spin_unlock(&hb->lock); |
| 727 | out: | 751 | out: |
| 728 | if (fshared) | 752 | futex_unlock_mm(fshared); |
| 729 | up_read(fshared); | ||
| 730 | return ret; | 753 | return ret; |
| 731 | } | 754 | } |
| 732 | 755 | ||
| @@ -746,8 +769,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
| 746 | int ret, op_ret, attempt = 0; | 769 | int ret, op_ret, attempt = 0; |
| 747 | 770 | ||
| 748 | retryfull: | 771 | retryfull: |
| 749 | if (fshared) | 772 | futex_lock_mm(fshared); |
| 750 | down_read(fshared); | ||
| 751 | 773 | ||
| 752 | ret = get_futex_key(uaddr1, fshared, &key1); | 774 | ret = get_futex_key(uaddr1, fshared, &key1); |
| 753 | if (unlikely(ret != 0)) | 775 | if (unlikely(ret != 0)) |
| @@ -793,7 +815,7 @@ retry: | |||
| 793 | */ | 815 | */ |
| 794 | if (attempt++) { | 816 | if (attempt++) { |
| 795 | ret = futex_handle_fault((unsigned long)uaddr2, | 817 | ret = futex_handle_fault((unsigned long)uaddr2, |
| 796 | fshared, attempt); | 818 | fshared, attempt); |
| 797 | if (ret) | 819 | if (ret) |
| 798 | goto out; | 820 | goto out; |
| 799 | goto retry; | 821 | goto retry; |
| @@ -803,8 +825,7 @@ retry: | |||
| 803 | * If we would have faulted, release mmap_sem, | 825 | * If we would have faulted, release mmap_sem, |
| 804 | * fault it in and start all over again. | 826 | * fault it in and start all over again. |
| 805 | */ | 827 | */ |
| 806 | if (fshared) | 828 | futex_unlock_mm(fshared); |
| 807 | up_read(fshared); | ||
| 808 | 829 | ||
| 809 | ret = get_user(dummy, uaddr2); | 830 | ret = get_user(dummy, uaddr2); |
| 810 | if (ret) | 831 | if (ret) |
| @@ -841,8 +862,8 @@ retry: | |||
| 841 | if (hb1 != hb2) | 862 | if (hb1 != hb2) |
| 842 | spin_unlock(&hb2->lock); | 863 | spin_unlock(&hb2->lock); |
| 843 | out: | 864 | out: |
| 844 | if (fshared) | 865 | futex_unlock_mm(fshared); |
| 845 | up_read(fshared); | 866 | |
| 846 | return ret; | 867 | return ret; |
| 847 | } | 868 | } |
| 848 | 869 | ||
| @@ -861,8 +882,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
| 861 | int ret, drop_count = 0; | 882 | int ret, drop_count = 0; |
| 862 | 883 | ||
| 863 | retry: | 884 | retry: |
| 864 | if (fshared) | 885 | futex_lock_mm(fshared); |
| 865 | down_read(fshared); | ||
| 866 | 886 | ||
| 867 | ret = get_futex_key(uaddr1, fshared, &key1); | 887 | ret = get_futex_key(uaddr1, fshared, &key1); |
| 868 | if (unlikely(ret != 0)) | 888 | if (unlikely(ret != 0)) |
| @@ -890,8 +910,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
| 890 | * If we would have faulted, release mmap_sem, fault | 910 | * If we would have faulted, release mmap_sem, fault |
| 891 | * it in and start all over again. | 911 | * it in and start all over again. |
| 892 | */ | 912 | */ |
| 893 | if (fshared) | 913 | futex_unlock_mm(fshared); |
| 894 | up_read(fshared); | ||
| 895 | 914 | ||
| 896 | ret = get_user(curval, uaddr1); | 915 | ret = get_user(curval, uaddr1); |
| 897 | 916 | ||
| @@ -944,8 +963,7 @@ out_unlock: | |||
| 944 | drop_futex_key_refs(&key1); | 963 | drop_futex_key_refs(&key1); |
| 945 | 964 | ||
| 946 | out: | 965 | out: |
| 947 | if (fshared) | 966 | futex_unlock_mm(fshared); |
| 948 | up_read(fshared); | ||
| 949 | return ret; | 967 | return ret; |
| 950 | } | 968 | } |
| 951 | 969 | ||
| @@ -1113,10 +1131,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
| 1113 | while (!ret) { | 1131 | while (!ret) { |
| 1114 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | 1132 | newval = (uval & FUTEX_OWNER_DIED) | newtid; |
| 1115 | 1133 | ||
| 1116 | pagefault_disable(); | 1134 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
| 1117 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | ||
| 1118 | uval, newval); | ||
| 1119 | pagefault_enable(); | ||
| 1120 | 1135 | ||
| 1121 | if (curval == -EFAULT) | 1136 | if (curval == -EFAULT) |
| 1122 | ret = -EFAULT; | 1137 | ret = -EFAULT; |
| @@ -1134,6 +1149,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
| 1134 | #define ARG3_SHARED 1 | 1149 | #define ARG3_SHARED 1 |
| 1135 | 1150 | ||
| 1136 | static long futex_wait_restart(struct restart_block *restart); | 1151 | static long futex_wait_restart(struct restart_block *restart); |
| 1152 | |||
| 1137 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | 1153 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, |
| 1138 | u32 val, ktime_t *abs_time) | 1154 | u32 val, ktime_t *abs_time) |
| 1139 | { | 1155 | { |
| @@ -1148,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1148 | 1164 | ||
| 1149 | q.pi_state = NULL; | 1165 | q.pi_state = NULL; |
| 1150 | retry: | 1166 | retry: |
| 1151 | if (fshared) | 1167 | futex_lock_mm(fshared); |
| 1152 | down_read(fshared); | ||
| 1153 | 1168 | ||
| 1154 | ret = get_futex_key(uaddr, fshared, &q.key); | 1169 | ret = get_futex_key(uaddr, fshared, &q.key); |
| 1155 | if (unlikely(ret != 0)) | 1170 | if (unlikely(ret != 0)) |
| @@ -1186,8 +1201,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1186 | * If we would have faulted, release mmap_sem, fault it in and | 1201 | * If we would have faulted, release mmap_sem, fault it in and |
| 1187 | * start all over again. | 1202 | * start all over again. |
| 1188 | */ | 1203 | */ |
| 1189 | if (fshared) | 1204 | futex_unlock_mm(fshared); |
| 1190 | up_read(fshared); | ||
| 1191 | 1205 | ||
| 1192 | ret = get_user(uval, uaddr); | 1206 | ret = get_user(uval, uaddr); |
| 1193 | 1207 | ||
| @@ -1206,8 +1220,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1206 | * Now the futex is queued and we have checked the data, we | 1220 | * Now the futex is queued and we have checked the data, we |
| 1207 | * don't want to hold mmap_sem while we sleep. | 1221 | * don't want to hold mmap_sem while we sleep. |
| 1208 | */ | 1222 | */ |
| 1209 | if (fshared) | 1223 | futex_unlock_mm(fshared); |
| 1210 | up_read(fshared); | ||
| 1211 | 1224 | ||
| 1212 | /* | 1225 | /* |
| 1213 | * There might have been scheduling since the queue_me(), as we | 1226 | * There might have been scheduling since the queue_me(), as we |
| @@ -1285,8 +1298,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1285 | queue_unlock(&q, hb); | 1298 | queue_unlock(&q, hb); |
| 1286 | 1299 | ||
| 1287 | out_release_sem: | 1300 | out_release_sem: |
| 1288 | if (fshared) | 1301 | futex_unlock_mm(fshared); |
| 1289 | up_read(fshared); | ||
| 1290 | return ret; | 1302 | return ret; |
| 1291 | } | 1303 | } |
| 1292 | 1304 | ||
| @@ -1333,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1333 | 1345 | ||
| 1334 | q.pi_state = NULL; | 1346 | q.pi_state = NULL; |
| 1335 | retry: | 1347 | retry: |
| 1336 | if (fshared) | 1348 | futex_lock_mm(fshared); |
| 1337 | down_read(fshared); | ||
| 1338 | 1349 | ||
| 1339 | ret = get_futex_key(uaddr, fshared, &q.key); | 1350 | ret = get_futex_key(uaddr, fshared, &q.key); |
| 1340 | if (unlikely(ret != 0)) | 1351 | if (unlikely(ret != 0)) |
| @@ -1353,9 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1353 | */ | 1364 | */ |
| 1354 | newval = current->pid; | 1365 | newval = current->pid; |
| 1355 | 1366 | ||
| 1356 | pagefault_disable(); | 1367 | curval = cmpxchg_futex_value_locked(uaddr, 0, newval); |
| 1357 | curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); | ||
| 1358 | pagefault_enable(); | ||
| 1359 | 1368 | ||
| 1360 | if (unlikely(curval == -EFAULT)) | 1369 | if (unlikely(curval == -EFAULT)) |
| 1361 | goto uaddr_faulted; | 1370 | goto uaddr_faulted; |
| @@ -1398,9 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1398 | lock_taken = 1; | 1407 | lock_taken = 1; |
| 1399 | } | 1408 | } |
| 1400 | 1409 | ||
| 1401 | pagefault_disable(); | 1410 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
| 1402 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
| 1403 | pagefault_enable(); | ||
| 1404 | 1411 | ||
| 1405 | if (unlikely(curval == -EFAULT)) | 1412 | if (unlikely(curval == -EFAULT)) |
| 1406 | goto uaddr_faulted; | 1413 | goto uaddr_faulted; |
| @@ -1428,8 +1435,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1428 | * exit to complete. | 1435 | * exit to complete. |
| 1429 | */ | 1436 | */ |
| 1430 | queue_unlock(&q, hb); | 1437 | queue_unlock(&q, hb); |
| 1431 | if (fshared) | 1438 | futex_unlock_mm(fshared); |
| 1432 | up_read(fshared); | ||
| 1433 | cond_resched(); | 1439 | cond_resched(); |
| 1434 | goto retry; | 1440 | goto retry; |
| 1435 | 1441 | ||
| @@ -1465,8 +1471,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1465 | * Now the futex is queued and we have checked the data, we | 1471 | * Now the futex is queued and we have checked the data, we |
| 1466 | * don't want to hold mmap_sem while we sleep. | 1472 | * don't want to hold mmap_sem while we sleep. |
| 1467 | */ | 1473 | */ |
| 1468 | if (fshared) | 1474 | futex_unlock_mm(fshared); |
| 1469 | up_read(fshared); | ||
| 1470 | 1475 | ||
| 1471 | WARN_ON(!q.pi_state); | 1476 | WARN_ON(!q.pi_state); |
| 1472 | /* | 1477 | /* |
| @@ -1480,8 +1485,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1480 | ret = ret ? 0 : -EWOULDBLOCK; | 1485 | ret = ret ? 0 : -EWOULDBLOCK; |
| 1481 | } | 1486 | } |
| 1482 | 1487 | ||
| 1483 | if (fshared) | 1488 | futex_lock_mm(fshared); |
| 1484 | down_read(fshared); | ||
| 1485 | spin_lock(q.lock_ptr); | 1489 | spin_lock(q.lock_ptr); |
| 1486 | 1490 | ||
| 1487 | if (!ret) { | 1491 | if (!ret) { |
| @@ -1518,8 +1522,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1518 | 1522 | ||
| 1519 | /* Unqueue and drop the lock */ | 1523 | /* Unqueue and drop the lock */ |
| 1520 | unqueue_me_pi(&q); | 1524 | unqueue_me_pi(&q); |
| 1521 | if (fshared) | 1525 | futex_unlock_mm(fshared); |
| 1522 | up_read(fshared); | ||
| 1523 | 1526 | ||
| 1524 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | 1527 | return ret != -EINTR ? ret : -ERESTARTNOINTR; |
| 1525 | 1528 | ||
| @@ -1527,8 +1530,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1527 | queue_unlock(&q, hb); | 1530 | queue_unlock(&q, hb); |
| 1528 | 1531 | ||
| 1529 | out_release_sem: | 1532 | out_release_sem: |
| 1530 | if (fshared) | 1533 | futex_unlock_mm(fshared); |
| 1531 | up_read(fshared); | ||
| 1532 | return ret; | 1534 | return ret; |
| 1533 | 1535 | ||
| 1534 | uaddr_faulted: | 1536 | uaddr_faulted: |
| @@ -1550,8 +1552,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1550 | goto retry_unlocked; | 1552 | goto retry_unlocked; |
| 1551 | } | 1553 | } |
| 1552 | 1554 | ||
| 1553 | if (fshared) | 1555 | futex_unlock_mm(fshared); |
| 1554 | up_read(fshared); | ||
| 1555 | 1556 | ||
| 1556 | ret = get_user(uval, uaddr); | 1557 | ret = get_user(uval, uaddr); |
| 1557 | if (!ret && (uval != -EFAULT)) | 1558 | if (!ret && (uval != -EFAULT)) |
| @@ -1585,8 +1586,7 @@ retry: | |||
| 1585 | /* | 1586 | /* |
| 1586 | * First take all the futex related locks: | 1587 | * First take all the futex related locks: |
| 1587 | */ | 1588 | */ |
| 1588 | if (fshared) | 1589 | futex_lock_mm(fshared); |
| 1589 | down_read(fshared); | ||
| 1590 | 1590 | ||
| 1591 | ret = get_futex_key(uaddr, fshared, &key); | 1591 | ret = get_futex_key(uaddr, fshared, &key); |
| 1592 | if (unlikely(ret != 0)) | 1592 | if (unlikely(ret != 0)) |
| @@ -1601,11 +1601,9 @@ retry_unlocked: | |||
| 1601 | * again. If it succeeds then we can return without waking | 1601 | * again. If it succeeds then we can return without waking |
| 1602 | * anyone else up: | 1602 | * anyone else up: |
| 1603 | */ | 1603 | */ |
| 1604 | if (!(uval & FUTEX_OWNER_DIED)) { | 1604 | if (!(uval & FUTEX_OWNER_DIED)) |
| 1605 | pagefault_disable(); | 1605 | uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0); |
| 1606 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1606 | |
| 1607 | pagefault_enable(); | ||
| 1608 | } | ||
| 1609 | 1607 | ||
| 1610 | if (unlikely(uval == -EFAULT)) | 1608 | if (unlikely(uval == -EFAULT)) |
| 1611 | goto pi_faulted; | 1609 | goto pi_faulted; |
| @@ -1647,8 +1645,7 @@ retry_unlocked: | |||
| 1647 | out_unlock: | 1645 | out_unlock: |
| 1648 | spin_unlock(&hb->lock); | 1646 | spin_unlock(&hb->lock); |
| 1649 | out: | 1647 | out: |
| 1650 | if (fshared) | 1648 | futex_unlock_mm(fshared); |
| 1651 | up_read(fshared); | ||
| 1652 | 1649 | ||
| 1653 | return ret; | 1650 | return ret; |
| 1654 | 1651 | ||
| @@ -1671,8 +1668,7 @@ pi_faulted: | |||
| 1671 | goto retry_unlocked; | 1668 | goto retry_unlocked; |
| 1672 | } | 1669 | } |
| 1673 | 1670 | ||
| 1674 | if (fshared) | 1671 | futex_unlock_mm(fshared); |
| 1675 | up_read(fshared); | ||
| 1676 | 1672 | ||
| 1677 | ret = get_user(uval, uaddr); | 1673 | ret = get_user(uval, uaddr); |
| 1678 | if (!ret && (uval != -EFAULT)) | 1674 | if (!ret && (uval != -EFAULT)) |
| @@ -1729,8 +1725,8 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
| 1729 | 1725 | ||
| 1730 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { | 1726 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { |
| 1731 | printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " | 1727 | printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " |
| 1732 | "will be removed from the kernel in June 2007\n", | 1728 | "will be removed from the kernel in June 2007\n", |
| 1733 | current->comm); | 1729 | current->comm); |
| 1734 | } | 1730 | } |
| 1735 | 1731 | ||
| 1736 | ret = -EINVAL; | 1732 | ret = -EINVAL; |
| @@ -1908,10 +1904,8 @@ retry: | |||
| 1908 | * Wake robust non-PI futexes here. The wakeup of | 1904 | * Wake robust non-PI futexes here. The wakeup of |
| 1909 | * PI futexes happens in exit_pi_state(): | 1905 | * PI futexes happens in exit_pi_state(): |
| 1910 | */ | 1906 | */ |
| 1911 | if (!pi) { | 1907 | if (!pi && (uval & FUTEX_WAITERS)) |
| 1912 | if (uval & FUTEX_WAITERS) | ||
| 1913 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); | 1908 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); |
| 1914 | } | ||
| 1915 | } | 1909 | } |
| 1916 | return 0; | 1910 | return 0; |
| 1917 | } | 1911 | } |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 23c03f43e196..72d034258ba1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -1406,7 +1406,7 @@ static void migrate_hrtimers(int cpu) | |||
| 1406 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | 1406 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, |
| 1407 | unsigned long action, void *hcpu) | 1407 | unsigned long action, void *hcpu) |
| 1408 | { | 1408 | { |
| 1409 | long cpu = (long)hcpu; | 1409 | unsigned int cpu = (long)hcpu; |
| 1410 | 1410 | ||
| 1411 | switch (action) { | 1411 | switch (action) { |
| 1412 | 1412 | ||
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index bd9e272d55e9..32b161972fad 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -172,7 +172,17 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 172 | irqreturn_t action_ret) | 172 | irqreturn_t action_ret) |
| 173 | { | 173 | { |
| 174 | if (unlikely(action_ret != IRQ_HANDLED)) { | 174 | if (unlikely(action_ret != IRQ_HANDLED)) { |
| 175 | desc->irqs_unhandled++; | 175 | /* |
| 176 | * If we are seeing only the odd spurious IRQ caused by | ||
| 177 | * bus asynchronicity then don't eventually trigger an error, | ||
| 178 | * otherwise the couter becomes a doomsday timer for otherwise | ||
| 179 | * working systems | ||
| 180 | */ | ||
| 181 | if (jiffies - desc->last_unhandled > HZ/10) | ||
| 182 | desc->irqs_unhandled = 1; | ||
| 183 | else | ||
| 184 | desc->irqs_unhandled++; | ||
| 185 | desc->last_unhandled = jiffies; | ||
| 176 | if (unlikely(action_ret != IRQ_NONE)) | 186 | if (unlikely(action_ret != IRQ_NONE)) |
| 177 | report_bad_irq(irq, desc, action_ret); | 187 | report_bad_irq(irq, desc, action_ret); |
| 178 | } | 188 | } |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fed54418626c..474219a41929 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
| @@ -152,7 +152,7 @@ static unsigned int get_symbol_offset(unsigned long pos) | |||
| 152 | /* Lookup the address for this symbol. Returns 0 if not found. */ | 152 | /* Lookup the address for this symbol. Returns 0 if not found. */ |
| 153 | unsigned long kallsyms_lookup_name(const char *name) | 153 | unsigned long kallsyms_lookup_name(const char *name) |
| 154 | { | 154 | { |
| 155 | char namebuf[KSYM_NAME_LEN+1]; | 155 | char namebuf[KSYM_NAME_LEN]; |
| 156 | unsigned long i; | 156 | unsigned long i; |
| 157 | unsigned int off; | 157 | unsigned int off; |
| 158 | 158 | ||
| @@ -248,7 +248,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
| 248 | { | 248 | { |
| 249 | const char *msym; | 249 | const char *msym; |
| 250 | 250 | ||
| 251 | namebuf[KSYM_NAME_LEN] = 0; | 251 | namebuf[KSYM_NAME_LEN - 1] = 0; |
| 252 | namebuf[0] = 0; | 252 | namebuf[0] = 0; |
| 253 | 253 | ||
| 254 | if (is_ksym_addr(addr)) { | 254 | if (is_ksym_addr(addr)) { |
| @@ -265,7 +265,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
| 265 | /* see if it's in a module */ | 265 | /* see if it's in a module */ |
| 266 | msym = module_address_lookup(addr, symbolsize, offset, modname); | 266 | msym = module_address_lookup(addr, symbolsize, offset, modname); |
| 267 | if (msym) | 267 | if (msym) |
| 268 | return strncpy(namebuf, msym, KSYM_NAME_LEN); | 268 | return strncpy(namebuf, msym, KSYM_NAME_LEN - 1); |
| 269 | 269 | ||
| 270 | return NULL; | 270 | return NULL; |
| 271 | } | 271 | } |
| @@ -273,7 +273,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
| 273 | int lookup_symbol_name(unsigned long addr, char *symname) | 273 | int lookup_symbol_name(unsigned long addr, char *symname) |
| 274 | { | 274 | { |
| 275 | symname[0] = '\0'; | 275 | symname[0] = '\0'; |
| 276 | symname[KSYM_NAME_LEN] = '\0'; | 276 | symname[KSYM_NAME_LEN - 1] = '\0'; |
| 277 | 277 | ||
| 278 | if (is_ksym_addr(addr)) { | 278 | if (is_ksym_addr(addr)) { |
| 279 | unsigned long pos; | 279 | unsigned long pos; |
| @@ -291,7 +291,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, | |||
| 291 | unsigned long *offset, char *modname, char *name) | 291 | unsigned long *offset, char *modname, char *name) |
| 292 | { | 292 | { |
| 293 | name[0] = '\0'; | 293 | name[0] = '\0'; |
| 294 | name[KSYM_NAME_LEN] = '\0'; | 294 | name[KSYM_NAME_LEN - 1] = '\0'; |
| 295 | 295 | ||
| 296 | if (is_ksym_addr(addr)) { | 296 | if (is_ksym_addr(addr)) { |
| 297 | unsigned long pos; | 297 | unsigned long pos; |
| @@ -312,18 +312,17 @@ int sprint_symbol(char *buffer, unsigned long address) | |||
| 312 | char *modname; | 312 | char *modname; |
| 313 | const char *name; | 313 | const char *name; |
| 314 | unsigned long offset, size; | 314 | unsigned long offset, size; |
| 315 | char namebuf[KSYM_NAME_LEN+1]; | 315 | char namebuf[KSYM_NAME_LEN]; |
| 316 | 316 | ||
| 317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | 317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); |
| 318 | if (!name) | 318 | if (!name) |
| 319 | return sprintf(buffer, "0x%lx", address); | 319 | return sprintf(buffer, "0x%lx", address); |
| 320 | else { | 320 | |
| 321 | if (modname) | 321 | if (modname) |
| 322 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, | 322 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, |
| 323 | size, modname); | 323 | size, modname); |
| 324 | else | 324 | else |
| 325 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); | 325 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); |
| 326 | } | ||
| 327 | } | 326 | } |
| 328 | 327 | ||
| 329 | /* Look up a kernel symbol and print it to the kernel messages. */ | 328 | /* Look up a kernel symbol and print it to the kernel messages. */ |
| @@ -343,8 +342,8 @@ struct kallsym_iter | |||
| 343 | unsigned long value; | 342 | unsigned long value; |
| 344 | unsigned int nameoff; /* If iterating in core kernel symbols */ | 343 | unsigned int nameoff; /* If iterating in core kernel symbols */ |
| 345 | char type; | 344 | char type; |
| 346 | char name[KSYM_NAME_LEN+1]; | 345 | char name[KSYM_NAME_LEN]; |
| 347 | char module_name[MODULE_NAME_LEN + 1]; | 346 | char module_name[MODULE_NAME_LEN]; |
| 348 | int exported; | 347 | int exported; |
| 349 | }; | 348 | }; |
| 350 | 349 | ||
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index cee419143fd4..bc41ad0f24f8 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
| 26 | #include <linux/kfifo.h> | 26 | #include <linux/kfifo.h> |
| 27 | #include <linux/log2.h> | ||
| 27 | 28 | ||
| 28 | /** | 29 | /** |
| 29 | * kfifo_init - allocates a new FIFO using a preallocated buffer | 30 | * kfifo_init - allocates a new FIFO using a preallocated buffer |
| @@ -41,7 +42,7 @@ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, | |||
| 41 | struct kfifo *fifo; | 42 | struct kfifo *fifo; |
| 42 | 43 | ||
| 43 | /* size must be a power of 2 */ | 44 | /* size must be a power of 2 */ |
| 44 | BUG_ON(size & (size - 1)); | 45 | BUG_ON(!is_power_of_2(size)); |
| 45 | 46 | ||
| 46 | fifo = kmalloc(sizeof(struct kfifo), gfp_mask); | 47 | fifo = kmalloc(sizeof(struct kfifo), gfp_mask); |
| 47 | if (!fifo) | 48 | if (!fifo) |
diff --git a/kernel/kthread.c b/kernel/kthread.c index bbd51b81a3e8..a404f7ee7395 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
| @@ -215,7 +215,7 @@ int kthread_stop(struct task_struct *k) | |||
| 215 | EXPORT_SYMBOL(kthread_stop); | 215 | EXPORT_SYMBOL(kthread_stop); |
| 216 | 216 | ||
| 217 | 217 | ||
| 218 | static __init void kthreadd_setup(void) | 218 | static noinline __init_refok void kthreadd_setup(void) |
| 219 | { | 219 | { |
| 220 | struct task_struct *tsk = current; | 220 | struct task_struct *tsk = current; |
| 221 | 221 | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 1a5ff2211d88..edba2ffb43de 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -379,7 +379,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4 | |||
| 379 | 379 | ||
| 380 | static void print_lock_name(struct lock_class *class) | 380 | static void print_lock_name(struct lock_class *class) |
| 381 | { | 381 | { |
| 382 | char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4; | 382 | char str[KSYM_NAME_LEN], c1, c2, c3, c4; |
| 383 | const char *name; | 383 | const char *name; |
| 384 | 384 | ||
| 385 | get_usage_chars(class, &c1, &c2, &c3, &c4); | 385 | get_usage_chars(class, &c1, &c2, &c3, &c4); |
| @@ -401,7 +401,7 @@ static void print_lock_name(struct lock_class *class) | |||
| 401 | static void print_lockdep_cache(struct lockdep_map *lock) | 401 | static void print_lockdep_cache(struct lockdep_map *lock) |
| 402 | { | 402 | { |
| 403 | const char *name; | 403 | const char *name; |
| 404 | char str[KSYM_NAME_LEN + 1]; | 404 | char str[KSYM_NAME_LEN]; |
| 405 | 405 | ||
| 406 | name = lock->name; | 406 | name = lock->name; |
| 407 | if (!name) | 407 | if (!name) |
diff --git a/kernel/module.c b/kernel/module.c index 015d60cfd90e..33c04ad51175 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -61,10 +61,8 @@ extern int module_sysfs_initialized; | |||
| 61 | /* If this is set, the section belongs in the init part of the module */ | 61 | /* If this is set, the section belongs in the init part of the module */ |
| 62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
| 63 | 63 | ||
| 64 | /* Protects module list */ | 64 | /* List of modules, protected by module_mutex or preempt_disable |
| 65 | static DEFINE_SPINLOCK(modlist_lock); | 65 | * (add/delete uses stop_machine). */ |
| 66 | |||
| 67 | /* List of modules, protected by module_mutex AND modlist_lock */ | ||
| 68 | static DEFINE_MUTEX(module_mutex); | 66 | static DEFINE_MUTEX(module_mutex); |
| 69 | static LIST_HEAD(modules); | 67 | static LIST_HEAD(modules); |
| 70 | 68 | ||
| @@ -760,14 +758,13 @@ static void print_unload_info(struct seq_file *m, struct module *mod) | |||
| 760 | void __symbol_put(const char *symbol) | 758 | void __symbol_put(const char *symbol) |
| 761 | { | 759 | { |
| 762 | struct module *owner; | 760 | struct module *owner; |
| 763 | unsigned long flags; | ||
| 764 | const unsigned long *crc; | 761 | const unsigned long *crc; |
| 765 | 762 | ||
| 766 | spin_lock_irqsave(&modlist_lock, flags); | 763 | preempt_disable(); |
| 767 | if (!__find_symbol(symbol, &owner, &crc, 1)) | 764 | if (!__find_symbol(symbol, &owner, &crc, 1)) |
| 768 | BUG(); | 765 | BUG(); |
| 769 | module_put(owner); | 766 | module_put(owner); |
| 770 | spin_unlock_irqrestore(&modlist_lock, flags); | 767 | preempt_enable(); |
| 771 | } | 768 | } |
| 772 | EXPORT_SYMBOL(__symbol_put); | 769 | EXPORT_SYMBOL(__symbol_put); |
| 773 | 770 | ||
| @@ -1228,14 +1225,14 @@ static void free_module(struct module *mod) | |||
| 1228 | void *__symbol_get(const char *symbol) | 1225 | void *__symbol_get(const char *symbol) |
| 1229 | { | 1226 | { |
| 1230 | struct module *owner; | 1227 | struct module *owner; |
| 1231 | unsigned long value, flags; | 1228 | unsigned long value; |
| 1232 | const unsigned long *crc; | 1229 | const unsigned long *crc; |
| 1233 | 1230 | ||
| 1234 | spin_lock_irqsave(&modlist_lock, flags); | 1231 | preempt_disable(); |
| 1235 | value = __find_symbol(symbol, &owner, &crc, 1); | 1232 | value = __find_symbol(symbol, &owner, &crc, 1); |
| 1236 | if (value && !strong_try_module_get(owner)) | 1233 | if (value && !strong_try_module_get(owner)) |
| 1237 | value = 0; | 1234 | value = 0; |
| 1238 | spin_unlock_irqrestore(&modlist_lock, flags); | 1235 | preempt_enable(); |
| 1239 | 1236 | ||
| 1240 | return (void *)value; | 1237 | return (void *)value; |
| 1241 | } | 1238 | } |
| @@ -2136,7 +2133,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) | |||
| 2136 | sym = get_ksymbol(mod, addr, NULL, NULL); | 2133 | sym = get_ksymbol(mod, addr, NULL, NULL); |
| 2137 | if (!sym) | 2134 | if (!sym) |
| 2138 | goto out; | 2135 | goto out; |
| 2139 | strlcpy(symname, sym, KSYM_NAME_LEN + 1); | 2136 | strlcpy(symname, sym, KSYM_NAME_LEN); |
| 2140 | mutex_unlock(&module_mutex); | 2137 | mutex_unlock(&module_mutex); |
| 2141 | return 0; | 2138 | return 0; |
| 2142 | } | 2139 | } |
| @@ -2161,9 +2158,9 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | |||
| 2161 | if (!sym) | 2158 | if (!sym) |
| 2162 | goto out; | 2159 | goto out; |
| 2163 | if (modname) | 2160 | if (modname) |
| 2164 | strlcpy(modname, mod->name, MODULE_NAME_LEN + 1); | 2161 | strlcpy(modname, mod->name, MODULE_NAME_LEN); |
| 2165 | if (name) | 2162 | if (name) |
| 2166 | strlcpy(name, sym, KSYM_NAME_LEN + 1); | 2163 | strlcpy(name, sym, KSYM_NAME_LEN); |
| 2167 | mutex_unlock(&module_mutex); | 2164 | mutex_unlock(&module_mutex); |
| 2168 | return 0; | 2165 | return 0; |
| 2169 | } | 2166 | } |
| @@ -2184,8 +2181,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | |||
| 2184 | *value = mod->symtab[symnum].st_value; | 2181 | *value = mod->symtab[symnum].st_value; |
| 2185 | *type = mod->symtab[symnum].st_info; | 2182 | *type = mod->symtab[symnum].st_info; |
| 2186 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, | 2183 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, |
| 2187 | KSYM_NAME_LEN + 1); | 2184 | KSYM_NAME_LEN); |
| 2188 | strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); | 2185 | strlcpy(module_name, mod->name, MODULE_NAME_LEN); |
| 2189 | *exported = is_exported(name, mod); | 2186 | *exported = is_exported(name, mod); |
| 2190 | mutex_unlock(&module_mutex); | 2187 | mutex_unlock(&module_mutex); |
| 2191 | return 0; | 2188 | return 0; |
| @@ -2232,26 +2229,13 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
| 2232 | /* Called by the /proc file system to return a list of modules. */ | 2229 | /* Called by the /proc file system to return a list of modules. */ |
| 2233 | static void *m_start(struct seq_file *m, loff_t *pos) | 2230 | static void *m_start(struct seq_file *m, loff_t *pos) |
| 2234 | { | 2231 | { |
| 2235 | struct list_head *i; | ||
| 2236 | loff_t n = 0; | ||
| 2237 | |||
| 2238 | mutex_lock(&module_mutex); | 2232 | mutex_lock(&module_mutex); |
| 2239 | list_for_each(i, &modules) { | 2233 | return seq_list_start(&modules, *pos); |
| 2240 | if (n++ == *pos) | ||
| 2241 | break; | ||
| 2242 | } | ||
| 2243 | if (i == &modules) | ||
| 2244 | return NULL; | ||
| 2245 | return i; | ||
| 2246 | } | 2234 | } |
| 2247 | 2235 | ||
| 2248 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | 2236 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) |
| 2249 | { | 2237 | { |
| 2250 | struct list_head *i = p; | 2238 | return seq_list_next(p, &modules, pos); |
| 2251 | (*pos)++; | ||
| 2252 | if (i->next == &modules) | ||
| 2253 | return NULL; | ||
| 2254 | return i->next; | ||
| 2255 | } | 2239 | } |
| 2256 | 2240 | ||
| 2257 | static void m_stop(struct seq_file *m, void *p) | 2241 | static void m_stop(struct seq_file *m, void *p) |
| @@ -2321,11 +2305,10 @@ const struct seq_operations modules_op = { | |||
| 2321 | /* Given an address, look for it in the module exception tables. */ | 2305 | /* Given an address, look for it in the module exception tables. */ |
| 2322 | const struct exception_table_entry *search_module_extables(unsigned long addr) | 2306 | const struct exception_table_entry *search_module_extables(unsigned long addr) |
| 2323 | { | 2307 | { |
| 2324 | unsigned long flags; | ||
| 2325 | const struct exception_table_entry *e = NULL; | 2308 | const struct exception_table_entry *e = NULL; |
| 2326 | struct module *mod; | 2309 | struct module *mod; |
| 2327 | 2310 | ||
| 2328 | spin_lock_irqsave(&modlist_lock, flags); | 2311 | preempt_disable(); |
| 2329 | list_for_each_entry(mod, &modules, list) { | 2312 | list_for_each_entry(mod, &modules, list) { |
| 2330 | if (mod->num_exentries == 0) | 2313 | if (mod->num_exentries == 0) |
| 2331 | continue; | 2314 | continue; |
| @@ -2336,7 +2319,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
| 2336 | if (e) | 2319 | if (e) |
| 2337 | break; | 2320 | break; |
| 2338 | } | 2321 | } |
| 2339 | spin_unlock_irqrestore(&modlist_lock, flags); | 2322 | preempt_enable(); |
| 2340 | 2323 | ||
| 2341 | /* Now, if we found one, we are running inside it now, hence | 2324 | /* Now, if we found one, we are running inside it now, hence |
| 2342 | we cannot unload the module, hence no refcnt needed. */ | 2325 | we cannot unload the module, hence no refcnt needed. */ |
| @@ -2348,25 +2331,24 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
| 2348 | */ | 2331 | */ |
| 2349 | int is_module_address(unsigned long addr) | 2332 | int is_module_address(unsigned long addr) |
| 2350 | { | 2333 | { |
| 2351 | unsigned long flags; | ||
| 2352 | struct module *mod; | 2334 | struct module *mod; |
| 2353 | 2335 | ||
| 2354 | spin_lock_irqsave(&modlist_lock, flags); | 2336 | preempt_disable(); |
| 2355 | 2337 | ||
| 2356 | list_for_each_entry(mod, &modules, list) { | 2338 | list_for_each_entry(mod, &modules, list) { |
| 2357 | if (within(addr, mod->module_core, mod->core_size)) { | 2339 | if (within(addr, mod->module_core, mod->core_size)) { |
| 2358 | spin_unlock_irqrestore(&modlist_lock, flags); | 2340 | preempt_enable(); |
| 2359 | return 1; | 2341 | return 1; |
| 2360 | } | 2342 | } |
| 2361 | } | 2343 | } |
| 2362 | 2344 | ||
| 2363 | spin_unlock_irqrestore(&modlist_lock, flags); | 2345 | preempt_enable(); |
| 2364 | 2346 | ||
| 2365 | return 0; | 2347 | return 0; |
| 2366 | } | 2348 | } |
| 2367 | 2349 | ||
| 2368 | 2350 | ||
| 2369 | /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ | 2351 | /* Is this a valid kernel address? */ |
| 2370 | struct module *__module_text_address(unsigned long addr) | 2352 | struct module *__module_text_address(unsigned long addr) |
| 2371 | { | 2353 | { |
| 2372 | struct module *mod; | 2354 | struct module *mod; |
| @@ -2381,11 +2363,10 @@ struct module *__module_text_address(unsigned long addr) | |||
| 2381 | struct module *module_text_address(unsigned long addr) | 2363 | struct module *module_text_address(unsigned long addr) |
| 2382 | { | 2364 | { |
| 2383 | struct module *mod; | 2365 | struct module *mod; |
| 2384 | unsigned long flags; | ||
| 2385 | 2366 | ||
| 2386 | spin_lock_irqsave(&modlist_lock, flags); | 2367 | preempt_disable(); |
| 2387 | mod = __module_text_address(addr); | 2368 | mod = __module_text_address(addr); |
| 2388 | spin_unlock_irqrestore(&modlist_lock, flags); | 2369 | preempt_enable(); |
| 2389 | 2370 | ||
| 2390 | return mod; | 2371 | return mod; |
| 2391 | } | 2372 | } |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 9e83b589f754..10f0bbba382b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
| @@ -21,6 +21,8 @@ | |||
| 21 | #include <linux/utsname.h> | 21 | #include <linux/utsname.h> |
| 22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
| 23 | 23 | ||
| 24 | static struct kmem_cache *nsproxy_cachep; | ||
| 25 | |||
| 24 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); | 26 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); |
| 25 | 27 | ||
| 26 | static inline void get_nsproxy(struct nsproxy *ns) | 28 | static inline void get_nsproxy(struct nsproxy *ns) |
| @@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig) | |||
| 43 | { | 45 | { |
| 44 | struct nsproxy *ns; | 46 | struct nsproxy *ns; |
| 45 | 47 | ||
| 46 | ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); | 48 | ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); |
| 47 | if (ns) | 49 | if (ns) { |
| 50 | memcpy(ns, orig, sizeof(struct nsproxy)); | ||
| 48 | atomic_set(&ns->count, 1); | 51 | atomic_set(&ns->count, 1); |
| 52 | } | ||
| 49 | return ns; | 53 | return ns; |
| 50 | } | 54 | } |
| 51 | 55 | ||
| @@ -54,33 +58,51 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig) | |||
| 54 | * Return the newly created nsproxy. Do not attach this to the task, | 58 | * Return the newly created nsproxy. Do not attach this to the task, |
| 55 | * leave it to the caller to do proper locking and attach it to task. | 59 | * leave it to the caller to do proper locking and attach it to task. |
| 56 | */ | 60 | */ |
| 57 | static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, | 61 | static struct nsproxy *create_new_namespaces(unsigned long flags, |
| 58 | struct fs_struct *new_fs) | 62 | struct task_struct *tsk, struct fs_struct *new_fs) |
| 59 | { | 63 | { |
| 60 | struct nsproxy *new_nsp; | 64 | struct nsproxy *new_nsp; |
| 65 | int err; | ||
| 61 | 66 | ||
| 62 | new_nsp = clone_nsproxy(tsk->nsproxy); | 67 | new_nsp = clone_nsproxy(tsk->nsproxy); |
| 63 | if (!new_nsp) | 68 | if (!new_nsp) |
| 64 | return ERR_PTR(-ENOMEM); | 69 | return ERR_PTR(-ENOMEM); |
| 65 | 70 | ||
| 66 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); | 71 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); |
| 67 | if (IS_ERR(new_nsp->mnt_ns)) | 72 | if (IS_ERR(new_nsp->mnt_ns)) { |
| 73 | err = PTR_ERR(new_nsp->mnt_ns); | ||
| 68 | goto out_ns; | 74 | goto out_ns; |
| 75 | } | ||
| 69 | 76 | ||
| 70 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); | 77 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); |
| 71 | if (IS_ERR(new_nsp->uts_ns)) | 78 | if (IS_ERR(new_nsp->uts_ns)) { |
| 79 | err = PTR_ERR(new_nsp->uts_ns); | ||
| 72 | goto out_uts; | 80 | goto out_uts; |
| 81 | } | ||
| 73 | 82 | ||
| 74 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); | 83 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); |
| 75 | if (IS_ERR(new_nsp->ipc_ns)) | 84 | if (IS_ERR(new_nsp->ipc_ns)) { |
| 85 | err = PTR_ERR(new_nsp->ipc_ns); | ||
| 76 | goto out_ipc; | 86 | goto out_ipc; |
| 87 | } | ||
| 77 | 88 | ||
| 78 | new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); | 89 | new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); |
| 79 | if (IS_ERR(new_nsp->pid_ns)) | 90 | if (IS_ERR(new_nsp->pid_ns)) { |
| 91 | err = PTR_ERR(new_nsp->pid_ns); | ||
| 80 | goto out_pid; | 92 | goto out_pid; |
| 93 | } | ||
| 94 | |||
| 95 | new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); | ||
| 96 | if (IS_ERR(new_nsp->user_ns)) { | ||
| 97 | err = PTR_ERR(new_nsp->user_ns); | ||
| 98 | goto out_user; | ||
| 99 | } | ||
| 81 | 100 | ||
| 82 | return new_nsp; | 101 | return new_nsp; |
| 83 | 102 | ||
| 103 | out_user: | ||
| 104 | if (new_nsp->pid_ns) | ||
| 105 | put_pid_ns(new_nsp->pid_ns); | ||
| 84 | out_pid: | 106 | out_pid: |
| 85 | if (new_nsp->ipc_ns) | 107 | if (new_nsp->ipc_ns) |
| 86 | put_ipc_ns(new_nsp->ipc_ns); | 108 | put_ipc_ns(new_nsp->ipc_ns); |
| @@ -91,15 +113,15 @@ out_uts: | |||
| 91 | if (new_nsp->mnt_ns) | 113 | if (new_nsp->mnt_ns) |
| 92 | put_mnt_ns(new_nsp->mnt_ns); | 114 | put_mnt_ns(new_nsp->mnt_ns); |
| 93 | out_ns: | 115 | out_ns: |
| 94 | kfree(new_nsp); | 116 | kmem_cache_free(nsproxy_cachep, new_nsp); |
| 95 | return ERR_PTR(-ENOMEM); | 117 | return ERR_PTR(err); |
| 96 | } | 118 | } |
| 97 | 119 | ||
| 98 | /* | 120 | /* |
| 99 | * called from clone. This now handles copy for nsproxy and all | 121 | * called from clone. This now handles copy for nsproxy and all |
| 100 | * namespaces therein. | 122 | * namespaces therein. |
| 101 | */ | 123 | */ |
| 102 | int copy_namespaces(int flags, struct task_struct *tsk) | 124 | int copy_namespaces(unsigned long flags, struct task_struct *tsk) |
| 103 | { | 125 | { |
| 104 | struct nsproxy *old_ns = tsk->nsproxy; | 126 | struct nsproxy *old_ns = tsk->nsproxy; |
| 105 | struct nsproxy *new_ns; | 127 | struct nsproxy *new_ns; |
| @@ -110,7 +132,7 @@ int copy_namespaces(int flags, struct task_struct *tsk) | |||
| 110 | 132 | ||
| 111 | get_nsproxy(old_ns); | 133 | get_nsproxy(old_ns); |
| 112 | 134 | ||
| 113 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) | 135 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER))) |
| 114 | return 0; | 136 | return 0; |
| 115 | 137 | ||
| 116 | if (!capable(CAP_SYS_ADMIN)) { | 138 | if (!capable(CAP_SYS_ADMIN)) { |
| @@ -140,7 +162,9 @@ void free_nsproxy(struct nsproxy *ns) | |||
| 140 | put_ipc_ns(ns->ipc_ns); | 162 | put_ipc_ns(ns->ipc_ns); |
| 141 | if (ns->pid_ns) | 163 | if (ns->pid_ns) |
| 142 | put_pid_ns(ns->pid_ns); | 164 | put_pid_ns(ns->pid_ns); |
| 143 | kfree(ns); | 165 | if (ns->user_ns) |
| 166 | put_user_ns(ns->user_ns); | ||
| 167 | kmem_cache_free(nsproxy_cachep, ns); | ||
| 144 | } | 168 | } |
| 145 | 169 | ||
| 146 | /* | 170 | /* |
| @@ -152,19 +176,10 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
| 152 | { | 176 | { |
| 153 | int err = 0; | 177 | int err = 0; |
| 154 | 178 | ||
| 155 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) | 179 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
| 180 | CLONE_NEWUSER))) | ||
| 156 | return 0; | 181 | return 0; |
| 157 | 182 | ||
| 158 | #ifndef CONFIG_IPC_NS | ||
| 159 | if (unshare_flags & CLONE_NEWIPC) | ||
| 160 | return -EINVAL; | ||
| 161 | #endif | ||
| 162 | |||
| 163 | #ifndef CONFIG_UTS_NS | ||
| 164 | if (unshare_flags & CLONE_NEWUTS) | ||
| 165 | return -EINVAL; | ||
| 166 | #endif | ||
| 167 | |||
| 168 | if (!capable(CAP_SYS_ADMIN)) | 183 | if (!capable(CAP_SYS_ADMIN)) |
| 169 | return -EPERM; | 184 | return -EPERM; |
| 170 | 185 | ||
| @@ -174,3 +189,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
| 174 | err = PTR_ERR(*new_nsp); | 189 | err = PTR_ERR(*new_nsp); |
| 175 | return err; | 190 | return err; |
| 176 | } | 191 | } |
| 192 | |||
| 193 | static int __init nsproxy_cache_init(void) | ||
| 194 | { | ||
| 195 | nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), | ||
| 196 | 0, SLAB_PANIC, NULL, NULL); | ||
| 197 | return 0; | ||
| 198 | } | ||
| 199 | |||
| 200 | module_init(nsproxy_cache_init); | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 623d1828259a..f64f4c1ac11f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -159,14 +159,15 @@ const char *print_tainted(void) | |||
| 159 | { | 159 | { |
| 160 | static char buf[20]; | 160 | static char buf[20]; |
| 161 | if (tainted) { | 161 | if (tainted) { |
| 162 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c", | 162 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c", |
| 163 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', | 163 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', |
| 164 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', | 164 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', |
| 165 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', | 165 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', |
| 166 | tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', | 166 | tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', |
| 167 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', | 167 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', |
| 168 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', | 168 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', |
| 169 | tainted & TAINT_USER ? 'U' : ' '); | 169 | tainted & TAINT_USER ? 'U' : ' ', |
| 170 | tainted & TAINT_DIE ? 'D' : ' '); | ||
| 170 | } | 171 | } |
| 171 | else | 172 | else |
| 172 | snprintf(buf, sizeof(buf), "Not tainted"); | 173 | snprintf(buf, sizeof(buf), "Not tainted"); |
diff --git a/kernel/pid.c b/kernel/pid.c index eb66bd2953ab..c6e3f9ffff87 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -365,7 +365,7 @@ struct pid *find_ge_pid(int nr) | |||
| 365 | } | 365 | } |
| 366 | EXPORT_SYMBOL_GPL(find_get_pid); | 366 | EXPORT_SYMBOL_GPL(find_get_pid); |
| 367 | 367 | ||
| 368 | struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) | 368 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) |
| 369 | { | 369 | { |
| 370 | BUG_ON(!old_ns); | 370 | BUG_ON(!old_ns); |
| 371 | get_pid_ns(old_ns); | 371 | get_pid_ns(old_ns); |
diff --git a/kernel/printk.c b/kernel/printk.c index 0bbdeac2810c..051d27e36a6c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -449,13 +449,16 @@ static int printk_time = 1; | |||
| 449 | #else | 449 | #else |
| 450 | static int printk_time = 0; | 450 | static int printk_time = 0; |
| 451 | #endif | 451 | #endif |
| 452 | module_param(printk_time, int, S_IRUGO | S_IWUSR); | 452 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); |
| 453 | 453 | ||
| 454 | static int __init printk_time_setup(char *str) | 454 | static int __init printk_time_setup(char *str) |
| 455 | { | 455 | { |
| 456 | if (*str) | 456 | if (*str) |
| 457 | return 0; | 457 | return 0; |
| 458 | printk_time = 1; | 458 | printk_time = 1; |
| 459 | printk(KERN_NOTICE "The 'time' option is deprecated and " | ||
| 460 | "is scheduled for removal in early 2008\n"); | ||
| 461 | printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n"); | ||
| 459 | return 1; | 462 | return 1; |
| 460 | } | 463 | } |
| 461 | 464 | ||
| @@ -483,6 +486,9 @@ static int have_callable_console(void) | |||
| 483 | * @fmt: format string | 486 | * @fmt: format string |
| 484 | * | 487 | * |
| 485 | * This is printk(). It can be called from any context. We want it to work. | 488 | * This is printk(). It can be called from any context. We want it to work. |
| 489 | * Be aware of the fact that if oops_in_progress is not set, we might try to | ||
| 490 | * wake klogd up which could deadlock on runqueue lock if printk() is called | ||
| 491 | * from scheduler code. | ||
| 486 | * | 492 | * |
| 487 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and | 493 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and |
| 488 | * call the console drivers. If we fail to get the semaphore we place the output | 494 | * call the console drivers. If we fail to get the semaphore we place the output |
| @@ -654,7 +660,7 @@ static void call_console_drivers(unsigned long start, unsigned long end) | |||
| 654 | */ | 660 | */ |
| 655 | static int __init console_setup(char *str) | 661 | static int __init console_setup(char *str) |
| 656 | { | 662 | { |
| 657 | char name[sizeof(console_cmdline[0].name)]; | 663 | char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ |
| 658 | char *s, *options; | 664 | char *s, *options; |
| 659 | int idx; | 665 | int idx; |
| 660 | 666 | ||
| @@ -662,27 +668,27 @@ static int __init console_setup(char *str) | |||
| 662 | * Decode str into name, index, options. | 668 | * Decode str into name, index, options. |
| 663 | */ | 669 | */ |
| 664 | if (str[0] >= '0' && str[0] <= '9') { | 670 | if (str[0] >= '0' && str[0] <= '9') { |
| 665 | strcpy(name, "ttyS"); | 671 | strcpy(buf, "ttyS"); |
| 666 | strncpy(name + 4, str, sizeof(name) - 5); | 672 | strncpy(buf + 4, str, sizeof(buf) - 5); |
| 667 | } else { | 673 | } else { |
| 668 | strncpy(name, str, sizeof(name) - 1); | 674 | strncpy(buf, str, sizeof(buf) - 1); |
| 669 | } | 675 | } |
| 670 | name[sizeof(name) - 1] = 0; | 676 | buf[sizeof(buf) - 1] = 0; |
| 671 | if ((options = strchr(str, ',')) != NULL) | 677 | if ((options = strchr(str, ',')) != NULL) |
| 672 | *(options++) = 0; | 678 | *(options++) = 0; |
| 673 | #ifdef __sparc__ | 679 | #ifdef __sparc__ |
| 674 | if (!strcmp(str, "ttya")) | 680 | if (!strcmp(str, "ttya")) |
| 675 | strcpy(name, "ttyS0"); | 681 | strcpy(buf, "ttyS0"); |
| 676 | if (!strcmp(str, "ttyb")) | 682 | if (!strcmp(str, "ttyb")) |
| 677 | strcpy(name, "ttyS1"); | 683 | strcpy(buf, "ttyS1"); |
| 678 | #endif | 684 | #endif |
| 679 | for (s = name; *s; s++) | 685 | for (s = buf; *s; s++) |
| 680 | if ((*s >= '0' && *s <= '9') || *s == ',') | 686 | if ((*s >= '0' && *s <= '9') || *s == ',') |
| 681 | break; | 687 | break; |
| 682 | idx = simple_strtoul(s, NULL, 10); | 688 | idx = simple_strtoul(s, NULL, 10); |
| 683 | *s = 0; | 689 | *s = 0; |
| 684 | 690 | ||
| 685 | add_preferred_console(name, idx, options); | 691 | add_preferred_console(buf, idx, options); |
| 686 | return 1; | 692 | return 1; |
| 687 | } | 693 | } |
| 688 | __setup("console=", console_setup); | 694 | __setup("console=", console_setup); |
| @@ -709,7 +715,7 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
| 709 | * See if this tty is not yet registered, and | 715 | * See if this tty is not yet registered, and |
| 710 | * if we have a slot free. | 716 | * if we have a slot free. |
| 711 | */ | 717 | */ |
| 712 | for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | 718 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) |
| 713 | if (strcmp(console_cmdline[i].name, name) == 0 && | 719 | if (strcmp(console_cmdline[i].name, name) == 0 && |
| 714 | console_cmdline[i].index == idx) { | 720 | console_cmdline[i].index == idx) { |
| 715 | selected_console = i; | 721 | selected_console = i; |
| @@ -726,6 +732,25 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
| 726 | return 0; | 732 | return 0; |
| 727 | } | 733 | } |
| 728 | 734 | ||
| 735 | int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) | ||
| 736 | { | ||
| 737 | struct console_cmdline *c; | ||
| 738 | int i; | ||
| 739 | |||
| 740 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | ||
| 741 | if (strcmp(console_cmdline[i].name, name) == 0 && | ||
| 742 | console_cmdline[i].index == idx) { | ||
| 743 | c = &console_cmdline[i]; | ||
| 744 | memcpy(c->name, name_new, sizeof(c->name)); | ||
| 745 | c->name[sizeof(c->name) - 1] = 0; | ||
| 746 | c->options = options; | ||
| 747 | c->index = idx_new; | ||
| 748 | return i; | ||
| 749 | } | ||
| 750 | /* not found */ | ||
| 751 | return -1; | ||
| 752 | } | ||
| 753 | |||
| 729 | #ifndef CONFIG_DISABLE_CONSOLE_SUSPEND | 754 | #ifndef CONFIG_DISABLE_CONSOLE_SUSPEND |
| 730 | /** | 755 | /** |
| 731 | * suspend_console - suspend the console subsystem | 756 | * suspend_console - suspend the console subsystem |
| @@ -942,6 +967,9 @@ void register_console(struct console *console) | |||
| 942 | if (preferred_console < 0 || bootconsole || !console_drivers) | 967 | if (preferred_console < 0 || bootconsole || !console_drivers) |
| 943 | preferred_console = selected_console; | 968 | preferred_console = selected_console; |
| 944 | 969 | ||
| 970 | if (console->early_setup) | ||
| 971 | console->early_setup(); | ||
| 972 | |||
| 945 | /* | 973 | /* |
| 946 | * See if we want to use this console driver. If we | 974 | * See if we want to use this console driver. If we |
| 947 | * didn't select a console we take the first one | 975 | * didn't select a console we take the first one |
| @@ -985,12 +1013,15 @@ void register_console(struct console *console) | |||
| 985 | if (!(console->flags & CON_ENABLED)) | 1013 | if (!(console->flags & CON_ENABLED)) |
| 986 | return; | 1014 | return; |
| 987 | 1015 | ||
| 988 | if (bootconsole) { | 1016 | if (bootconsole && (console->flags & CON_CONSDEV)) { |
| 989 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", | 1017 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", |
| 990 | bootconsole->name, bootconsole->index, | 1018 | bootconsole->name, bootconsole->index, |
| 991 | console->name, console->index); | 1019 | console->name, console->index); |
| 992 | unregister_console(bootconsole); | 1020 | unregister_console(bootconsole); |
| 993 | console->flags &= ~CON_PRINTBUFFER; | 1021 | console->flags &= ~CON_PRINTBUFFER; |
| 1022 | } else { | ||
| 1023 | printk(KERN_INFO "console [%s%d] enabled\n", | ||
| 1024 | console->name, console->index); | ||
| 994 | } | 1025 | } |
| 995 | 1026 | ||
| 996 | /* | 1027 | /* |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ad7949a589dd..4a1745f1dadf 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -161,6 +161,7 @@ int ptrace_may_attach(struct task_struct *task) | |||
| 161 | int ptrace_attach(struct task_struct *task) | 161 | int ptrace_attach(struct task_struct *task) |
| 162 | { | 162 | { |
| 163 | int retval; | 163 | int retval; |
| 164 | unsigned long flags; | ||
| 164 | 165 | ||
| 165 | audit_ptrace(task); | 166 | audit_ptrace(task); |
| 166 | 167 | ||
| @@ -181,9 +182,7 @@ repeat: | |||
| 181 | * cpu's that may have task_lock). | 182 | * cpu's that may have task_lock). |
| 182 | */ | 183 | */ |
| 183 | task_lock(task); | 184 | task_lock(task); |
| 184 | local_irq_disable(); | 185 | if (!write_trylock_irqsave(&tasklist_lock, flags)) { |
| 185 | if (!write_trylock(&tasklist_lock)) { | ||
| 186 | local_irq_enable(); | ||
| 187 | task_unlock(task); | 186 | task_unlock(task); |
| 188 | do { | 187 | do { |
| 189 | cpu_relax(); | 188 | cpu_relax(); |
| @@ -211,7 +210,7 @@ repeat: | |||
| 211 | force_sig_specific(SIGSTOP, task); | 210 | force_sig_specific(SIGSTOP, task); |
| 212 | 211 | ||
| 213 | bad: | 212 | bad: |
| 214 | write_unlock_irq(&tasklist_lock); | 213 | write_unlock_irqrestore(&tasklist_lock, flags); |
| 215 | task_unlock(task); | 214 | task_unlock(task); |
| 216 | out: | 215 | out: |
| 217 | return retval; | 216 | return retval; |
| @@ -491,3 +490,22 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) | |||
| 491 | return ret; | 490 | return ret; |
| 492 | } | 491 | } |
| 493 | #endif /* __ARCH_SYS_PTRACE */ | 492 | #endif /* __ARCH_SYS_PTRACE */ |
| 493 | |||
| 494 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | ||
| 495 | { | ||
| 496 | unsigned long tmp; | ||
| 497 | int copied; | ||
| 498 | |||
| 499 | copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0); | ||
| 500 | if (copied != sizeof(tmp)) | ||
| 501 | return -EIO; | ||
| 502 | return put_user(tmp, (unsigned long __user *)data); | ||
| 503 | } | ||
| 504 | |||
| 505 | int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) | ||
| 506 | { | ||
| 507 | int copied; | ||
| 508 | |||
| 509 | copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); | ||
| 510 | return (copied == sizeof(data)) ? 0 : -EIO; | ||
| 511 | } | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 55ba82a85a66..ddff33247785 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/moduleparam.h> | 40 | #include <linux/moduleparam.h> |
| 41 | #include <linux/percpu.h> | 41 | #include <linux/percpu.h> |
| 42 | #include <linux/notifier.h> | 42 | #include <linux/notifier.h> |
| 43 | #include <linux/freezer.h> | ||
| 43 | #include <linux/cpu.h> | 44 | #include <linux/cpu.h> |
| 44 | #include <linux/random.h> | 45 | #include <linux/random.h> |
| 45 | #include <linux/delay.h> | 46 | #include <linux/delay.h> |
| @@ -518,7 +519,6 @@ rcu_torture_writer(void *arg) | |||
| 518 | 519 | ||
| 519 | VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); | 520 | VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); |
| 520 | set_user_nice(current, 19); | 521 | set_user_nice(current, 19); |
| 521 | current->flags |= PF_NOFREEZE; | ||
| 522 | 522 | ||
| 523 | do { | 523 | do { |
| 524 | schedule_timeout_uninterruptible(1); | 524 | schedule_timeout_uninterruptible(1); |
| @@ -558,7 +558,6 @@ rcu_torture_fakewriter(void *arg) | |||
| 558 | 558 | ||
| 559 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); | 559 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); |
| 560 | set_user_nice(current, 19); | 560 | set_user_nice(current, 19); |
| 561 | current->flags |= PF_NOFREEZE; | ||
| 562 | 561 | ||
| 563 | do { | 562 | do { |
| 564 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); | 563 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); |
| @@ -589,7 +588,6 @@ rcu_torture_reader(void *arg) | |||
| 589 | 588 | ||
| 590 | VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); | 589 | VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); |
| 591 | set_user_nice(current, 19); | 590 | set_user_nice(current, 19); |
| 592 | current->flags |= PF_NOFREEZE; | ||
| 593 | 591 | ||
| 594 | do { | 592 | do { |
| 595 | idx = cur_ops->readlock(); | 593 | idx = cur_ops->readlock(); |
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index da8d6bf46457..5aedbee014df 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c | |||
| @@ -29,12 +29,6 @@ | |||
| 29 | 29 | ||
| 30 | #include "rtmutex_common.h" | 30 | #include "rtmutex_common.h" |
| 31 | 31 | ||
| 32 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
| 33 | # include "rtmutex-debug.h" | ||
| 34 | #else | ||
| 35 | # include "rtmutex.h" | ||
| 36 | #endif | ||
| 37 | |||
| 38 | # define TRACE_WARN_ON(x) WARN_ON(x) | 32 | # define TRACE_WARN_ON(x) WARN_ON(x) |
| 39 | # define TRACE_BUG_ON(x) BUG_ON(x) | 33 | # define TRACE_BUG_ON(x) BUG_ON(x) |
| 40 | 34 | ||
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 015fc633c96c..e3055ba69159 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
| @@ -260,6 +260,7 @@ static int test_func(void *data) | |||
| 260 | int ret; | 260 | int ret; |
| 261 | 261 | ||
| 262 | current->flags |= PF_MUTEX_TESTER; | 262 | current->flags |= PF_MUTEX_TESTER; |
| 263 | set_freezable(); | ||
| 263 | allow_signal(SIGHUP); | 264 | allow_signal(SIGHUP); |
| 264 | 265 | ||
| 265 | for(;;) { | 266 | for(;;) { |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 17d28ce20300..8cd9bd2cdb34 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
| @@ -17,12 +17,6 @@ | |||
| 17 | 17 | ||
| 18 | #include "rtmutex_common.h" | 18 | #include "rtmutex_common.h" |
| 19 | 19 | ||
| 20 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
| 21 | # include "rtmutex-debug.h" | ||
| 22 | #else | ||
| 23 | # include "rtmutex.h" | ||
| 24 | #endif | ||
| 25 | |||
| 26 | /* | 20 | /* |
| 27 | * lock->owner state tracking: | 21 | * lock->owner state tracking: |
| 28 | * | 22 | * |
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 9c75856e791e..2d3b83593ca3 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h | |||
| @@ -103,7 +103,7 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) | |||
| 103 | 103 | ||
| 104 | static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) | 104 | static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) |
| 105 | { | 105 | { |
| 106 | return (struct task_struct *) | 106 | return (struct task_struct *) |
| 107 | ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); | 107 | ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); |
| 108 | } | 108 | } |
| 109 | 109 | ||
| @@ -120,4 +120,11 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |||
| 120 | struct task_struct *proxy_owner); | 120 | struct task_struct *proxy_owner); |
| 121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, | 121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
| 122 | struct task_struct *proxy_owner); | 122 | struct task_struct *proxy_owner); |
| 123 | |||
| 124 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
| 125 | # include "rtmutex-debug.h" | ||
| 126 | #else | ||
| 127 | # include "rtmutex.h" | ||
| 128 | #endif | ||
| 129 | |||
| 123 | #endif | 130 | #endif |
diff --git a/kernel/sched.c b/kernel/sched.c index 3332bbb5d5cf..cb31fb4a1379 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -736,7 +736,9 @@ static void update_curr_load(struct rq *rq, u64 now) | |||
| 736 | * | 736 | * |
| 737 | * The "10% effect" is relative and cumulative: from _any_ nice level, | 737 | * The "10% effect" is relative and cumulative: from _any_ nice level, |
| 738 | * if you go up 1 level, it's -10% CPU usage, if you go down 1 level | 738 | * if you go up 1 level, it's -10% CPU usage, if you go down 1 level |
| 739 | * it's +10% CPU usage. | 739 | * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. |
| 740 | * If a task goes up by ~10% and another task goes down by ~10% then | ||
| 741 | * the relative distance between them is ~25%.) | ||
| 740 | */ | 742 | */ |
| 741 | static const int prio_to_weight[40] = { | 743 | static const int prio_to_weight[40] = { |
| 742 | /* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, | 744 | /* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, |
| @@ -746,15 +748,22 @@ static const int prio_to_weight[40] = { | |||
| 746 | /* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, | 748 | /* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, |
| 747 | }; | 749 | }; |
| 748 | 750 | ||
| 751 | /* | ||
| 752 | * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated. | ||
| 753 | * | ||
| 754 | * In cases where the weight does not change often, we can use the | ||
| 755 | * precalculated inverse to speed up arithmetics by turning divisions | ||
| 756 | * into multiplications: | ||
| 757 | */ | ||
| 749 | static const u32 prio_to_wmult[40] = { | 758 | static const u32 prio_to_wmult[40] = { |
| 750 | 48356, 60446, 75558, 94446, 118058, 147573, | 759 | /* -20 */ 48356, 60446, 75558, 94446, 118058, |
| 751 | 184467, 230589, 288233, 360285, 450347, | 760 | /* -15 */ 147573, 184467, 230589, 288233, 360285, |
| 752 | 562979, 703746, 879575, 1099582, 1374389, | 761 | /* -10 */ 450347, 562979, 703746, 879575, 1099582, |
| 753 | 1717986, 2147483, 2684354, 3355443, 4194304, | 762 | /* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, |
| 754 | 5244160, 6557201, 8196502, 10250518, 12782640, | 763 | /* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, |
| 755 | 16025997, 19976592, 24970740, 31350126, 39045157, | 764 | /* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, |
| 756 | 49367440, 61356675, 76695844, 95443717, 119304647, | 765 | /* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, |
| 757 | 148102320, 186737708, 238609294, 286331153, | 766 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, |
| 758 | }; | 767 | }; |
| 759 | 768 | ||
| 760 | static inline void | 769 | static inline void |
| @@ -4903,8 +4912,6 @@ static int migration_thread(void *data) | |||
| 4903 | struct migration_req *req; | 4912 | struct migration_req *req; |
| 4904 | struct list_head *head; | 4913 | struct list_head *head; |
| 4905 | 4914 | ||
| 4906 | try_to_freeze(); | ||
| 4907 | |||
| 4908 | spin_lock_irq(&rq->lock); | 4915 | spin_lock_irq(&rq->lock); |
| 4909 | 4916 | ||
| 4910 | if (cpu_is_offline(cpu)) { | 4917 | if (cpu_is_offline(cpu)) { |
| @@ -5138,7 +5145,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5138 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); | 5145 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); |
| 5139 | if (IS_ERR(p)) | 5146 | if (IS_ERR(p)) |
| 5140 | return NOTIFY_BAD; | 5147 | return NOTIFY_BAD; |
| 5141 | p->flags |= PF_NOFREEZE; | ||
| 5142 | kthread_bind(p, cpu); | 5148 | kthread_bind(p, cpu); |
| 5143 | /* Must be high prio: stop_machine expects to yield to it. */ | 5149 | /* Must be high prio: stop_machine expects to yield to it. */ |
| 5144 | rq = task_rq_lock(p, &flags); | 5150 | rq = task_rq_lock(p, &flags); |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index c3391b6020e8..ad64fcb731f2 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
| 11 | 11 | ||
| 12 | /* #define SECCOMP_DEBUG 1 */ | 12 | /* #define SECCOMP_DEBUG 1 */ |
| 13 | #define NR_SECCOMP_MODES 1 | ||
| 13 | 14 | ||
| 14 | /* | 15 | /* |
| 15 | * Secure computing mode 1 allows only read/write/exit/sigreturn. | 16 | * Secure computing mode 1 allows only read/write/exit/sigreturn. |
| @@ -54,3 +55,31 @@ void __secure_computing(int this_syscall) | |||
| 54 | #endif | 55 | #endif |
| 55 | do_exit(SIGKILL); | 56 | do_exit(SIGKILL); |
| 56 | } | 57 | } |
| 58 | |||
| 59 | long prctl_get_seccomp(void) | ||
| 60 | { | ||
| 61 | return current->seccomp.mode; | ||
| 62 | } | ||
| 63 | |||
| 64 | long prctl_set_seccomp(unsigned long seccomp_mode) | ||
| 65 | { | ||
| 66 | long ret; | ||
| 67 | |||
| 68 | /* can set it only once to be even more secure */ | ||
| 69 | ret = -EPERM; | ||
| 70 | if (unlikely(current->seccomp.mode)) | ||
| 71 | goto out; | ||
| 72 | |||
| 73 | ret = -EINVAL; | ||
| 74 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | ||
| 75 | current->seccomp.mode = seccomp_mode; | ||
| 76 | set_thread_flag(TIF_SECCOMP); | ||
| 77 | #ifdef TIF_NOTSC | ||
| 78 | disable_TSC(); | ||
| 79 | #endif | ||
| 80 | ret = 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | out: | ||
| 84 | return ret; | ||
| 85 | } | ||
diff --git a/kernel/signal.c b/kernel/signal.c index f9405609774e..39d122753bac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -718,6 +718,37 @@ out_set: | |||
| 718 | #define LEGACY_QUEUE(sigptr, sig) \ | 718 | #define LEGACY_QUEUE(sigptr, sig) \ |
| 719 | (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) | 719 | (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) |
| 720 | 720 | ||
| 721 | int print_fatal_signals; | ||
| 722 | |||
| 723 | static void print_fatal_signal(struct pt_regs *regs, int signr) | ||
| 724 | { | ||
| 725 | printk("%s/%d: potentially unexpected fatal signal %d.\n", | ||
| 726 | current->comm, current->pid, signr); | ||
| 727 | |||
| 728 | #ifdef __i386__ | ||
| 729 | printk("code at %08lx: ", regs->eip); | ||
| 730 | { | ||
| 731 | int i; | ||
| 732 | for (i = 0; i < 16; i++) { | ||
| 733 | unsigned char insn; | ||
| 734 | |||
| 735 | __get_user(insn, (unsigned char *)(regs->eip + i)); | ||
| 736 | printk("%02x ", insn); | ||
| 737 | } | ||
| 738 | } | ||
| 739 | #endif | ||
| 740 | printk("\n"); | ||
| 741 | show_regs(regs); | ||
| 742 | } | ||
| 743 | |||
| 744 | static int __init setup_print_fatal_signals(char *str) | ||
| 745 | { | ||
| 746 | get_option (&str, &print_fatal_signals); | ||
| 747 | |||
| 748 | return 1; | ||
| 749 | } | ||
| 750 | |||
| 751 | __setup("print-fatal-signals=", setup_print_fatal_signals); | ||
| 721 | 752 | ||
| 722 | static int | 753 | static int |
| 723 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) | 754 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| @@ -1855,6 +1886,8 @@ relock: | |||
| 1855 | * Anything else is fatal, maybe with a core dump. | 1886 | * Anything else is fatal, maybe with a core dump. |
| 1856 | */ | 1887 | */ |
| 1857 | current->flags |= PF_SIGNALED; | 1888 | current->flags |= PF_SIGNALED; |
| 1889 | if ((signr != SIGKILL) && print_fatal_signals) | ||
| 1890 | print_fatal_signal(regs, signr); | ||
| 1858 | if (sig_kernel_coredump(signr)) { | 1891 | if (sig_kernel_coredump(signr)) { |
| 1859 | /* | 1892 | /* |
| 1860 | * If it was able to dump core, this kills all | 1893 | * If it was able to dump core, this kills all |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 73217a9e2875..0f546ddea43d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/notifier.h> | 14 | #include <linux/notifier.h> |
| 15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
| 16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
| 17 | #include <linux/freezer.h> | ||
| 17 | #include <linux/kthread.h> | 18 | #include <linux/kthread.h> |
| 18 | #include <linux/rcupdate.h> | 19 | #include <linux/rcupdate.h> |
| 19 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
| @@ -488,8 +489,6 @@ void __init softirq_init(void) | |||
| 488 | 489 | ||
| 489 | static int ksoftirqd(void * __bind_cpu) | 490 | static int ksoftirqd(void * __bind_cpu) |
| 490 | { | 491 | { |
| 491 | current->flags |= PF_NOFREEZE; | ||
| 492 | |||
| 493 | set_current_state(TASK_INTERRUPTIBLE); | 492 | set_current_state(TASK_INTERRUPTIBLE); |
| 494 | 493 | ||
| 495 | while (!kthread_should_stop()) { | 494 | while (!kthread_should_stop()) { |
| @@ -614,12 +613,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
| 614 | kthread_bind(per_cpu(ksoftirqd, hotcpu), | 613 | kthread_bind(per_cpu(ksoftirqd, hotcpu), |
| 615 | any_online_cpu(cpu_online_map)); | 614 | any_online_cpu(cpu_online_map)); |
| 616 | case CPU_DEAD: | 615 | case CPU_DEAD: |
| 617 | case CPU_DEAD_FROZEN: | 616 | case CPU_DEAD_FROZEN: { |
| 617 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 618 | |||
| 618 | p = per_cpu(ksoftirqd, hotcpu); | 619 | p = per_cpu(ksoftirqd, hotcpu); |
| 619 | per_cpu(ksoftirqd, hotcpu) = NULL; | 620 | per_cpu(ksoftirqd, hotcpu) = NULL; |
| 621 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
| 620 | kthread_stop(p); | 622 | kthread_stop(p); |
| 621 | takeover_tasklets(hotcpu); | 623 | takeover_tasklets(hotcpu); |
| 622 | break; | 624 | break; |
| 625 | } | ||
| 623 | #endif /* CONFIG_HOTPLUG_CPU */ | 626 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 624 | } | 627 | } |
| 625 | return NOTIFY_OK; | 628 | return NOTIFY_OK; |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 0131e296ffb4..708d4882c0c3 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/cpu.h> | 10 | #include <linux/cpu.h> |
| 11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
| 12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
| 13 | #include <linux/freezer.h> | ||
| 13 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
| 14 | #include <linux/notifier.h> | 15 | #include <linux/notifier.h> |
| 15 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| @@ -116,7 +117,6 @@ static int watchdog(void * __bind_cpu) | |||
| 116 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 117 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
| 117 | 118 | ||
| 118 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 119 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
| 119 | current->flags |= PF_NOFREEZE; | ||
| 120 | 120 | ||
| 121 | /* initialize timestamp */ | 121 | /* initialize timestamp */ |
| 122 | touch_softlockup_watchdog(); | 122 | touch_softlockup_watchdog(); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index fcee2a8e6da3..319821ef78af 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -93,10 +93,6 @@ static void stopmachine_set_state(enum stopmachine_state state) | |||
| 93 | static int stop_machine(void) | 93 | static int stop_machine(void) |
| 94 | { | 94 | { |
| 95 | int i, ret = 0; | 95 | int i, ret = 0; |
| 96 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 97 | |||
| 98 | /* One high-prio thread per cpu. We'll do this one. */ | ||
| 99 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
| 100 | 96 | ||
| 101 | atomic_set(&stopmachine_thread_ack, 0); | 97 | atomic_set(&stopmachine_thread_ack, 0); |
| 102 | stopmachine_num_threads = 0; | 98 | stopmachine_num_threads = 0; |
| @@ -189,6 +185,10 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, | |||
| 189 | 185 | ||
| 190 | p = kthread_create(do_stop, &smdata, "kstopmachine"); | 186 | p = kthread_create(do_stop, &smdata, "kstopmachine"); |
| 191 | if (!IS_ERR(p)) { | 187 | if (!IS_ERR(p)) { |
| 188 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 189 | |||
| 190 | /* One high-prio thread per cpu. We'll do this one. */ | ||
| 191 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
| 192 | kthread_bind(p, cpu); | 192 | kthread_bind(p, cpu); |
| 193 | wake_up_process(p); | 193 | wake_up_process(p); |
| 194 | wait_for_completion(&smdata.done); | 194 | wait_for_completion(&smdata.done); |
diff --git a/kernel/sys.c b/kernel/sys.c index 872271ccc384..4d141ae3e802 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -31,10 +31,12 @@ | |||
| 31 | #include <linux/cn_proc.h> | 31 | #include <linux/cn_proc.h> |
| 32 | #include <linux/getcpu.h> | 32 | #include <linux/getcpu.h> |
| 33 | #include <linux/task_io_accounting_ops.h> | 33 | #include <linux/task_io_accounting_ops.h> |
| 34 | #include <linux/seccomp.h> | ||
| 34 | 35 | ||
| 35 | #include <linux/compat.h> | 36 | #include <linux/compat.h> |
| 36 | #include <linux/syscalls.h> | 37 | #include <linux/syscalls.h> |
| 37 | #include <linux/kprobes.h> | 38 | #include <linux/kprobes.h> |
| 39 | #include <linux/user_namespace.h> | ||
| 38 | 40 | ||
| 39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
| 40 | #include <asm/io.h> | 42 | #include <asm/io.h> |
| @@ -1078,13 +1080,13 @@ static int set_user(uid_t new_ruid, int dumpclear) | |||
| 1078 | { | 1080 | { |
| 1079 | struct user_struct *new_user; | 1081 | struct user_struct *new_user; |
| 1080 | 1082 | ||
| 1081 | new_user = alloc_uid(new_ruid); | 1083 | new_user = alloc_uid(current->nsproxy->user_ns, new_ruid); |
| 1082 | if (!new_user) | 1084 | if (!new_user) |
| 1083 | return -EAGAIN; | 1085 | return -EAGAIN; |
| 1084 | 1086 | ||
| 1085 | if (atomic_read(&new_user->processes) >= | 1087 | if (atomic_read(&new_user->processes) >= |
| 1086 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && | 1088 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && |
| 1087 | new_user != &root_user) { | 1089 | new_user != current->nsproxy->user_ns->root_user) { |
| 1088 | free_uid(new_user); | 1090 | free_uid(new_user); |
| 1089 | return -EAGAIN; | 1091 | return -EAGAIN; |
| 1090 | } | 1092 | } |
| @@ -2241,6 +2243,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
| 2241 | error = SET_ENDIAN(current, arg2); | 2243 | error = SET_ENDIAN(current, arg2); |
| 2242 | break; | 2244 | break; |
| 2243 | 2245 | ||
| 2246 | case PR_GET_SECCOMP: | ||
| 2247 | error = prctl_get_seccomp(); | ||
| 2248 | break; | ||
| 2249 | case PR_SET_SECCOMP: | ||
| 2250 | error = prctl_set_seccomp(arg2); | ||
| 2251 | break; | ||
| 2252 | |||
| 2244 | default: | 2253 | default: |
| 2245 | error = -EINVAL; | 2254 | error = -EINVAL; |
| 2246 | break; | 2255 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7e11e2c98bf9..b0ec498a18d9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
| @@ -14,6 +14,7 @@ asmlinkage long sys_ni_syscall(void) | |||
| 14 | 14 | ||
| 15 | cond_syscall(sys_nfsservctl); | 15 | cond_syscall(sys_nfsservctl); |
| 16 | cond_syscall(sys_quotactl); | 16 | cond_syscall(sys_quotactl); |
| 17 | cond_syscall(sys32_quotactl); | ||
| 17 | cond_syscall(sys_acct); | 18 | cond_syscall(sys_acct); |
| 18 | cond_syscall(sys_lookup_dcookie); | 19 | cond_syscall(sys_lookup_dcookie); |
| 19 | cond_syscall(sys_swapon); | 20 | cond_syscall(sys_swapon); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d93e13d93f24..7063ebc6db05 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/utsname.h> | 29 | #include <linux/utsname.h> |
| 30 | #include <linux/capability.h> | 30 | #include <linux/capability.h> |
| 31 | #include <linux/smp_lock.h> | 31 | #include <linux/smp_lock.h> |
| 32 | #include <linux/fs.h> | ||
| 32 | #include <linux/init.h> | 33 | #include <linux/init.h> |
| 33 | #include <linux/kernel.h> | 34 | #include <linux/kernel.h> |
| 34 | #include <linux/kobject.h> | 35 | #include <linux/kobject.h> |
| @@ -49,9 +50,6 @@ | |||
| 49 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
| 50 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
| 51 | 52 | ||
| 52 | extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | ||
| 53 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
| 54 | |||
| 55 | #ifdef CONFIG_X86 | 53 | #ifdef CONFIG_X86 |
| 56 | #include <asm/nmi.h> | 54 | #include <asm/nmi.h> |
| 57 | #include <asm/stacktrace.h> | 55 | #include <asm/stacktrace.h> |
| @@ -61,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | |||
| 61 | 59 | ||
| 62 | /* External variables not in a header file. */ | 60 | /* External variables not in a header file. */ |
| 63 | extern int C_A_D; | 61 | extern int C_A_D; |
| 62 | extern int print_fatal_signals; | ||
| 64 | extern int sysctl_overcommit_memory; | 63 | extern int sysctl_overcommit_memory; |
| 65 | extern int sysctl_overcommit_ratio; | 64 | extern int sysctl_overcommit_ratio; |
| 66 | extern int sysctl_panic_on_oom; | 65 | extern int sysctl_panic_on_oom; |
| @@ -202,7 +201,10 @@ static ctl_table root_table[] = { | |||
| 202 | .mode = 0555, | 201 | .mode = 0555, |
| 203 | .child = dev_table, | 202 | .child = dev_table, |
| 204 | }, | 203 | }, |
| 205 | 204 | /* | |
| 205 | * NOTE: do not add new entries to this table unless you have read | ||
| 206 | * Documentation/sysctl/ctl_unnumbered.txt | ||
| 207 | */ | ||
| 206 | { .ctl_name = 0 } | 208 | { .ctl_name = 0 } |
| 207 | }; | 209 | }; |
| 208 | 210 | ||
| @@ -340,6 +342,14 @@ static ctl_table kern_table[] = { | |||
| 340 | .proc_handler = &proc_dointvec, | 342 | .proc_handler = &proc_dointvec, |
| 341 | }, | 343 | }, |
| 342 | #endif | 344 | #endif |
| 345 | { | ||
| 346 | .ctl_name = CTL_UNNUMBERED, | ||
| 347 | .procname = "print-fatal-signals", | ||
| 348 | .data = &print_fatal_signals, | ||
| 349 | .maxlen = sizeof(int), | ||
| 350 | .mode = 0644, | ||
| 351 | .proc_handler = &proc_dointvec, | ||
| 352 | }, | ||
| 343 | #ifdef __sparc__ | 353 | #ifdef __sparc__ |
| 344 | { | 354 | { |
| 345 | .ctl_name = KERN_SPARC_REBOOT, | 355 | .ctl_name = KERN_SPARC_REBOOT, |
| @@ -814,6 +824,14 @@ static ctl_table vm_table[] = { | |||
| 814 | .mode = 0644, | 824 | .mode = 0644, |
| 815 | .proc_handler = &proc_dointvec, | 825 | .proc_handler = &proc_dointvec, |
| 816 | }, | 826 | }, |
| 827 | { | ||
| 828 | .ctl_name = CTL_UNNUMBERED, | ||
| 829 | .procname = "hugepages_treat_as_movable", | ||
| 830 | .data = &hugepages_treat_as_movable, | ||
| 831 | .maxlen = sizeof(int), | ||
| 832 | .mode = 0644, | ||
| 833 | .proc_handler = &hugetlb_treat_movable_handler, | ||
| 834 | }, | ||
| 817 | #endif | 835 | #endif |
| 818 | { | 836 | { |
| 819 | .ctl_name = VM_LOWMEM_RESERVE_RATIO, | 837 | .ctl_name = VM_LOWMEM_RESERVE_RATIO, |
| @@ -958,6 +976,17 @@ static ctl_table vm_table[] = { | |||
| 958 | .mode = 0644, | 976 | .mode = 0644, |
| 959 | .proc_handler = &proc_doulongvec_minmax, | 977 | .proc_handler = &proc_doulongvec_minmax, |
| 960 | }, | 978 | }, |
| 979 | #ifdef CONFIG_NUMA | ||
| 980 | { | ||
| 981 | .ctl_name = CTL_UNNUMBERED, | ||
| 982 | .procname = "numa_zonelist_order", | ||
| 983 | .data = &numa_zonelist_order, | ||
| 984 | .maxlen = NUMA_ZONELIST_ORDER_LEN, | ||
| 985 | .mode = 0644, | ||
| 986 | .proc_handler = &numa_zonelist_order_handler, | ||
| 987 | .strategy = &sysctl_string, | ||
| 988 | }, | ||
| 989 | #endif | ||
| 961 | #endif | 990 | #endif |
| 962 | #if defined(CONFIG_X86_32) || \ | 991 | #if defined(CONFIG_X86_32) || \ |
| 963 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) | 992 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) |
| @@ -972,6 +1001,10 @@ static ctl_table vm_table[] = { | |||
| 972 | .extra1 = &zero, | 1001 | .extra1 = &zero, |
| 973 | }, | 1002 | }, |
| 974 | #endif | 1003 | #endif |
| 1004 | /* | ||
| 1005 | * NOTE: do not add new entries to this table unless you have read | ||
| 1006 | * Documentation/sysctl/ctl_unnumbered.txt | ||
| 1007 | */ | ||
| 975 | { .ctl_name = 0 } | 1008 | { .ctl_name = 0 } |
| 976 | }; | 1009 | }; |
| 977 | 1010 | ||
| @@ -1112,6 +1145,10 @@ static ctl_table fs_table[] = { | |||
| 1112 | .child = binfmt_misc_table, | 1145 | .child = binfmt_misc_table, |
| 1113 | }, | 1146 | }, |
| 1114 | #endif | 1147 | #endif |
| 1148 | /* | ||
| 1149 | * NOTE: do not add new entries to this table unless you have read | ||
| 1150 | * Documentation/sysctl/ctl_unnumbered.txt | ||
| 1151 | */ | ||
| 1115 | { .ctl_name = 0 } | 1152 | { .ctl_name = 0 } |
| 1116 | }; | 1153 | }; |
| 1117 | 1154 | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 906cae771585..059431ed67db 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
| 196 | 196 | ||
| 197 | /* fill in basic acct fields */ | 197 | /* fill in basic acct fields */ |
| 198 | stats->version = TASKSTATS_VERSION; | 198 | stats->version = TASKSTATS_VERSION; |
| 199 | stats->nvcsw = tsk->nvcsw; | ||
| 200 | stats->nivcsw = tsk->nivcsw; | ||
| 199 | bacct_add_tsk(stats, tsk); | 201 | bacct_add_tsk(stats, tsk); |
| 200 | 202 | ||
| 201 | /* fill in extended acct fields */ | 203 | /* fill in extended acct fields */ |
| @@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
| 242 | */ | 244 | */ |
| 243 | delayacct_add_tsk(stats, tsk); | 245 | delayacct_add_tsk(stats, tsk); |
| 244 | 246 | ||
| 247 | stats->nvcsw += tsk->nvcsw; | ||
| 248 | stats->nivcsw += tsk->nivcsw; | ||
| 245 | } while_each_thread(first, tsk); | 249 | } while_each_thread(first, tsk); |
| 246 | 250 | ||
| 247 | unlock_task_sighand(first, &flags); | 251 | unlock_task_sighand(first, &flags); |
diff --git a/kernel/time.c b/kernel/time.c index f04791f69408..ffe19149d770 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz); | |||
| 57 | */ | 57 | */ |
| 58 | asmlinkage long sys_time(time_t __user * tloc) | 58 | asmlinkage long sys_time(time_t __user * tloc) |
| 59 | { | 59 | { |
| 60 | time_t i; | 60 | /* |
| 61 | struct timeval tv; | 61 | * We read xtime.tv_sec atomically - it's updated |
| 62 | * atomically by update_wall_time(), so no need to | ||
| 63 | * even read-lock the xtime seqlock: | ||
| 64 | */ | ||
| 65 | time_t i = xtime.tv_sec; | ||
| 62 | 66 | ||
| 63 | do_gettimeofday(&tv); | 67 | smp_rmb(); /* sys_time() results are coherent */ |
| 64 | i = tv.tv_sec; | ||
| 65 | 68 | ||
| 66 | if (tloc) { | 69 | if (tloc) { |
| 67 | if (put_user(i,tloc)) | 70 | if (put_user(i, tloc)) |
| 68 | i = -EFAULT; | 71 | i = -EFAULT; |
| 69 | } | 72 | } |
| 70 | return i; | 73 | return i; |
| @@ -373,12 +376,25 @@ void do_gettimeofday (struct timeval *tv) | |||
| 373 | 376 | ||
| 374 | tv->tv_sec = sec; | 377 | tv->tv_sec = sec; |
| 375 | tv->tv_usec = usec; | 378 | tv->tv_usec = usec; |
| 376 | } | ||
| 377 | 379 | ||
| 380 | /* | ||
| 381 | * Make sure xtime.tv_sec [returned by sys_time()] always | ||
| 382 | * follows the gettimeofday() result precisely. This | ||
| 383 | * condition is extremely unlikely, it can hit at most | ||
| 384 | * once per second: | ||
| 385 | */ | ||
| 386 | if (unlikely(xtime.tv_sec != tv->tv_sec)) { | ||
| 387 | unsigned long flags; | ||
| 388 | |||
| 389 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 390 | update_wall_time(); | ||
| 391 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 392 | } | ||
| 393 | } | ||
| 378 | EXPORT_SYMBOL(do_gettimeofday); | 394 | EXPORT_SYMBOL(do_gettimeofday); |
| 379 | 395 | ||
| 396 | #else /* CONFIG_TIME_INTERPOLATION */ | ||
| 380 | 397 | ||
| 381 | #else | ||
| 382 | #ifndef CONFIG_GENERIC_TIME | 398 | #ifndef CONFIG_GENERIC_TIME |
| 383 | /* | 399 | /* |
| 384 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval | 400 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval |
| @@ -394,7 +410,7 @@ void getnstimeofday(struct timespec *tv) | |||
| 394 | } | 410 | } |
| 395 | EXPORT_SYMBOL_GPL(getnstimeofday); | 411 | EXPORT_SYMBOL_GPL(getnstimeofday); |
| 396 | #endif | 412 | #endif |
| 397 | #endif | 413 | #endif /* CONFIG_TIME_INTERPOLATION */ |
| 398 | 414 | ||
| 399 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. | 415 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. |
| 400 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 | 416 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 76212b2a99de..2ad1c37b8dfe 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -205,47 +205,6 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | /** | 207 | /** |
| 208 | * clockevents_request_device | ||
| 209 | */ | ||
| 210 | struct clock_event_device *clockevents_request_device(unsigned int features, | ||
| 211 | cpumask_t cpumask) | ||
| 212 | { | ||
| 213 | struct clock_event_device *cur, *dev = NULL; | ||
| 214 | struct list_head *tmp; | ||
| 215 | |||
| 216 | spin_lock(&clockevents_lock); | ||
| 217 | |||
| 218 | list_for_each(tmp, &clockevent_devices) { | ||
| 219 | cur = list_entry(tmp, struct clock_event_device, list); | ||
| 220 | |||
| 221 | if ((cur->features & features) == features && | ||
| 222 | cpus_equal(cpumask, cur->cpumask)) { | ||
| 223 | if (!dev || dev->rating < cur->rating) | ||
| 224 | dev = cur; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 228 | clockevents_exchange_device(NULL, dev); | ||
| 229 | |||
| 230 | spin_unlock(&clockevents_lock); | ||
| 231 | |||
| 232 | return dev; | ||
| 233 | } | ||
| 234 | |||
| 235 | /** | ||
| 236 | * clockevents_release_device | ||
| 237 | */ | ||
| 238 | void clockevents_release_device(struct clock_event_device *dev) | ||
| 239 | { | ||
| 240 | spin_lock(&clockevents_lock); | ||
| 241 | |||
| 242 | clockevents_exchange_device(dev, NULL); | ||
| 243 | clockevents_notify_released(); | ||
| 244 | |||
| 245 | spin_unlock(&clockevents_lock); | ||
| 246 | } | ||
| 247 | |||
| 248 | /** | ||
| 249 | * clockevents_notify - notification about relevant events | 208 | * clockevents_notify - notification about relevant events |
| 250 | */ | 209 | */ |
| 251 | void clockevents_notify(unsigned long reason, void *arg) | 210 | void clockevents_notify(unsigned long reason, void *arg) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cf53bb5814cb..438c6b723ee2 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
| 14 | #include <linux/jiffies.h> | 14 | #include <linux/jiffies.h> |
| 15 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
| 16 | 16 | #include <linux/capability.h> | |
| 17 | #include <asm/div64.h> | 17 | #include <asm/div64.h> |
| 18 | #include <asm/timex.h> | 18 | #include <asm/timex.h> |
| 19 | 19 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3d1042f82a68..728cedfd3cbd 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -36,9 +36,17 @@ EXPORT_SYMBOL(xtime_lock); | |||
| 36 | * at zero at system boot time, so wall_to_monotonic will be negative, | 36 | * at zero at system boot time, so wall_to_monotonic will be negative, |
| 37 | * however, we will ALWAYS keep the tv_nsec part positive so we can use | 37 | * however, we will ALWAYS keep the tv_nsec part positive so we can use |
| 38 | * the usual normalization. | 38 | * the usual normalization. |
| 39 | * | ||
| 40 | * wall_to_monotonic is moved after resume from suspend for the monotonic | ||
| 41 | * time not to jump. We need to add total_sleep_time to wall_to_monotonic | ||
| 42 | * to get the real boot based time offset. | ||
| 43 | * | ||
| 44 | * - wall_to_monotonic is no longer the boot time, getboottime must be | ||
| 45 | * used instead. | ||
| 39 | */ | 46 | */ |
| 40 | struct timespec xtime __attribute__ ((aligned (16))); | 47 | struct timespec xtime __attribute__ ((aligned (16))); |
| 41 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); | 48 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); |
| 49 | static unsigned long total_sleep_time; /* seconds */ | ||
| 42 | 50 | ||
| 43 | EXPORT_SYMBOL(xtime); | 51 | EXPORT_SYMBOL(xtime); |
| 44 | 52 | ||
| @@ -251,6 +259,7 @@ void __init timekeeping_init(void) | |||
| 251 | xtime.tv_nsec = 0; | 259 | xtime.tv_nsec = 0; |
| 252 | set_normalized_timespec(&wall_to_monotonic, | 260 | set_normalized_timespec(&wall_to_monotonic, |
| 253 | -xtime.tv_sec, -xtime.tv_nsec); | 261 | -xtime.tv_sec, -xtime.tv_nsec); |
| 262 | total_sleep_time = 0; | ||
| 254 | 263 | ||
| 255 | write_sequnlock_irqrestore(&xtime_lock, flags); | 264 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 256 | } | 265 | } |
| @@ -282,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
| 282 | 291 | ||
| 283 | xtime.tv_sec += sleep_length; | 292 | xtime.tv_sec += sleep_length; |
| 284 | wall_to_monotonic.tv_sec -= sleep_length; | 293 | wall_to_monotonic.tv_sec -= sleep_length; |
| 294 | total_sleep_time += sleep_length; | ||
| 285 | } | 295 | } |
| 286 | /* re-base the last cycle value */ | 296 | /* re-base the last cycle value */ |
| 287 | clock->cycle_last = clocksource_read(clock); | 297 | clock->cycle_last = clocksource_read(clock); |
| @@ -476,3 +486,30 @@ void update_wall_time(void) | |||
| 476 | change_clocksource(); | 486 | change_clocksource(); |
| 477 | update_vsyscall(&xtime, clock); | 487 | update_vsyscall(&xtime, clock); |
| 478 | } | 488 | } |
| 489 | |||
| 490 | /** | ||
| 491 | * getboottime - Return the real time of system boot. | ||
| 492 | * @ts: pointer to the timespec to be set | ||
| 493 | * | ||
| 494 | * Returns the time of day in a timespec. | ||
| 495 | * | ||
| 496 | * This is based on the wall_to_monotonic offset and the total suspend | ||
| 497 | * time. Calls to settimeofday will affect the value returned (which | ||
| 498 | * basically means that however wrong your real time clock is at boot time, | ||
| 499 | * you get the right time here). | ||
| 500 | */ | ||
| 501 | void getboottime(struct timespec *ts) | ||
| 502 | { | ||
| 503 | set_normalized_timespec(ts, | ||
| 504 | - (wall_to_monotonic.tv_sec + total_sleep_time), | ||
| 505 | - wall_to_monotonic.tv_nsec); | ||
| 506 | } | ||
| 507 | |||
| 508 | /** | ||
| 509 | * monotonic_to_bootbased - Convert the monotonic time to boot based. | ||
| 510 | * @ts: pointer to the timespec to be converted | ||
| 511 | */ | ||
| 512 | void monotonic_to_bootbased(struct timespec *ts) | ||
| 513 | { | ||
| 514 | ts->tv_sec += total_sleep_time; | ||
| 515 | } | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 8bbcfb77f7d2..e5edc3a22a08 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
| @@ -38,7 +38,7 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); | |||
| 38 | 38 | ||
| 39 | static void print_name_offset(struct seq_file *m, void *sym) | 39 | static void print_name_offset(struct seq_file *m, void *sym) |
| 40 | { | 40 | { |
| 41 | char symname[KSYM_NAME_LEN+1]; | 41 | char symname[KSYM_NAME_LEN]; |
| 42 | 42 | ||
| 43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) | 43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) |
| 44 | SEQ_printf(m, "<%p>", sym); | 44 | SEQ_printf(m, "<%p>", sym); |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 321693724ad7..8ed62fda16c6 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
| @@ -68,6 +68,7 @@ struct entry { | |||
| 68 | * Number of timeout events: | 68 | * Number of timeout events: |
| 69 | */ | 69 | */ |
| 70 | unsigned long count; | 70 | unsigned long count; |
| 71 | unsigned int timer_flag; | ||
| 71 | 72 | ||
| 72 | /* | 73 | /* |
| 73 | * We save the command-line string to preserve | 74 | * We save the command-line string to preserve |
| @@ -231,7 +232,8 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
| 231 | * incremented. Otherwise the timer is registered in a free slot. | 232 | * incremented. Otherwise the timer is registered in a free slot. |
| 232 | */ | 233 | */ |
| 233 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | 234 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, |
| 234 | void *timerf, char * comm) | 235 | void *timerf, char *comm, |
| 236 | unsigned int timer_flag) | ||
| 235 | { | 237 | { |
| 236 | /* | 238 | /* |
| 237 | * It doesnt matter which lock we take: | 239 | * It doesnt matter which lock we take: |
| @@ -249,6 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
| 249 | input.start_func = startf; | 251 | input.start_func = startf; |
| 250 | input.expire_func = timerf; | 252 | input.expire_func = timerf; |
| 251 | input.pid = pid; | 253 | input.pid = pid; |
| 254 | input.timer_flag = timer_flag; | ||
| 252 | 255 | ||
| 253 | spin_lock_irqsave(lock, flags); | 256 | spin_lock_irqsave(lock, flags); |
| 254 | if (!active) | 257 | if (!active) |
| @@ -266,7 +269,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
| 266 | 269 | ||
| 267 | static void print_name_offset(struct seq_file *m, unsigned long addr) | 270 | static void print_name_offset(struct seq_file *m, unsigned long addr) |
| 268 | { | 271 | { |
| 269 | char symname[KSYM_NAME_LEN+1]; | 272 | char symname[KSYM_NAME_LEN]; |
| 270 | 273 | ||
| 271 | if (lookup_symbol_name(addr, symname) < 0) | 274 | if (lookup_symbol_name(addr, symname) < 0) |
| 272 | seq_printf(m, "<%p>", (void *)addr); | 275 | seq_printf(m, "<%p>", (void *)addr); |
| @@ -295,7 +298,7 @@ static int tstats_show(struct seq_file *m, void *v) | |||
| 295 | period = ktime_to_timespec(time); | 298 | period = ktime_to_timespec(time); |
| 296 | ms = period.tv_nsec / 1000000; | 299 | ms = period.tv_nsec / 1000000; |
| 297 | 300 | ||
| 298 | seq_puts(m, "Timer Stats Version: v0.1\n"); | 301 | seq_puts(m, "Timer Stats Version: v0.2\n"); |
| 299 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | 302 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); |
| 300 | if (atomic_read(&overflow_count)) | 303 | if (atomic_read(&overflow_count)) |
| 301 | seq_printf(m, "Overflow: %d entries\n", | 304 | seq_printf(m, "Overflow: %d entries\n", |
| @@ -303,8 +306,13 @@ static int tstats_show(struct seq_file *m, void *v) | |||
| 303 | 306 | ||
| 304 | for (i = 0; i < nr_entries; i++) { | 307 | for (i = 0; i < nr_entries; i++) { |
| 305 | entry = entries + i; | 308 | entry = entries + i; |
| 306 | seq_printf(m, "%4lu, %5d %-16s ", | 309 | if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) { |
| 310 | seq_printf(m, "%4luD, %5d %-16s ", | ||
| 307 | entry->count, entry->pid, entry->comm); | 311 | entry->count, entry->pid, entry->comm); |
| 312 | } else { | ||
| 313 | seq_printf(m, " %4lu, %5d %-16s ", | ||
| 314 | entry->count, entry->pid, entry->comm); | ||
| 315 | } | ||
| 308 | 316 | ||
| 309 | print_name_offset(m, (unsigned long)entry->start_func); | 317 | print_name_offset(m, (unsigned long)entry->start_func); |
| 310 | seq_puts(m, " ("); | 318 | seq_puts(m, " ("); |
diff --git a/kernel/timer.c b/kernel/timer.c index 1a69705c2fb9..b7792fb03387 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -305,6 +305,20 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | |||
| 305 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | 305 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
| 306 | timer->start_pid = current->pid; | 306 | timer->start_pid = current->pid; |
| 307 | } | 307 | } |
| 308 | |||
| 309 | static void timer_stats_account_timer(struct timer_list *timer) | ||
| 310 | { | ||
| 311 | unsigned int flag = 0; | ||
| 312 | |||
| 313 | if (unlikely(tbase_get_deferrable(timer->base))) | ||
| 314 | flag |= TIMER_STATS_FLAG_DEFERRABLE; | ||
| 315 | |||
| 316 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
| 317 | timer->function, timer->start_comm, flag); | ||
| 318 | } | ||
| 319 | |||
| 320 | #else | ||
| 321 | static void timer_stats_account_timer(struct timer_list *timer) {} | ||
| 308 | #endif | 322 | #endif |
| 309 | 323 | ||
| 310 | /** | 324 | /** |
| @@ -1114,6 +1128,7 @@ int do_sysinfo(struct sysinfo *info) | |||
| 1114 | getnstimeofday(&tp); | 1128 | getnstimeofday(&tp); |
| 1115 | tp.tv_sec += wall_to_monotonic.tv_sec; | 1129 | tp.tv_sec += wall_to_monotonic.tv_sec; |
| 1116 | tp.tv_nsec += wall_to_monotonic.tv_nsec; | 1130 | tp.tv_nsec += wall_to_monotonic.tv_nsec; |
| 1131 | monotonic_to_bootbased(&tp); | ||
| 1117 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { | 1132 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { |
| 1118 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; | 1133 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; |
| 1119 | tp.tv_sec++; | 1134 | tp.tv_sec++; |
| @@ -1206,7 +1221,8 @@ static int __devinit init_timers_cpu(int cpu) | |||
| 1206 | /* | 1221 | /* |
| 1207 | * The APs use this path later in boot | 1222 | * The APs use this path later in boot |
| 1208 | */ | 1223 | */ |
| 1209 | base = kmalloc_node(sizeof(*base), GFP_KERNEL, | 1224 | base = kmalloc_node(sizeof(*base), |
| 1225 | GFP_KERNEL | __GFP_ZERO, | ||
| 1210 | cpu_to_node(cpu)); | 1226 | cpu_to_node(cpu)); |
| 1211 | if (!base) | 1227 | if (!base) |
| 1212 | return -ENOMEM; | 1228 | return -ENOMEM; |
| @@ -1217,7 +1233,6 @@ static int __devinit init_timers_cpu(int cpu) | |||
| 1217 | kfree(base); | 1233 | kfree(base); |
| 1218 | return -ENOMEM; | 1234 | return -ENOMEM; |
| 1219 | } | 1235 | } |
| 1220 | memset(base, 0, sizeof(*base)); | ||
| 1221 | per_cpu(tvec_bases, cpu) = base; | 1236 | per_cpu(tvec_bases, cpu) = base; |
| 1222 | } else { | 1237 | } else { |
| 1223 | /* | 1238 | /* |
diff --git a/kernel/user.c b/kernel/user.c index 4869563080e9..98b82507797a 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
| @@ -14,20 +14,19 @@ | |||
| 14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
| 15 | #include <linux/key.h> | 15 | #include <linux/key.h> |
| 16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/user_namespace.h> | ||
| 17 | 19 | ||
| 18 | /* | 20 | /* |
| 19 | * UID task count cache, to get fast user lookup in "alloc_uid" | 21 | * UID task count cache, to get fast user lookup in "alloc_uid" |
| 20 | * when changing user ID's (ie setuid() and friends). | 22 | * when changing user ID's (ie setuid() and friends). |
| 21 | */ | 23 | */ |
| 22 | 24 | ||
| 23 | #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) | ||
| 24 | #define UIDHASH_SZ (1 << UIDHASH_BITS) | ||
| 25 | #define UIDHASH_MASK (UIDHASH_SZ - 1) | 25 | #define UIDHASH_MASK (UIDHASH_SZ - 1) |
| 26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) | 26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) |
| 27 | #define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) | 27 | #define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid))) |
| 28 | 28 | ||
| 29 | static struct kmem_cache *uid_cachep; | 29 | static struct kmem_cache *uid_cachep; |
| 30 | static struct list_head uidhash_table[UIDHASH_SZ]; | ||
| 31 | 30 | ||
| 32 | /* | 31 | /* |
| 33 | * The uidhash_lock is mostly taken from process context, but it is | 32 | * The uidhash_lock is mostly taken from process context, but it is |
| @@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid) | |||
| 94 | { | 93 | { |
| 95 | struct user_struct *ret; | 94 | struct user_struct *ret; |
| 96 | unsigned long flags; | 95 | unsigned long flags; |
| 96 | struct user_namespace *ns = current->nsproxy->user_ns; | ||
| 97 | 97 | ||
| 98 | spin_lock_irqsave(&uidhash_lock, flags); | 98 | spin_lock_irqsave(&uidhash_lock, flags); |
| 99 | ret = uid_hash_find(uid, uidhashentry(uid)); | 99 | ret = uid_hash_find(uid, uidhashentry(ns, uid)); |
| 100 | spin_unlock_irqrestore(&uidhash_lock, flags); | 100 | spin_unlock_irqrestore(&uidhash_lock, flags); |
| 101 | return ret; | 101 | return ret; |
| 102 | } | 102 | } |
| @@ -120,9 +120,9 @@ void free_uid(struct user_struct *up) | |||
| 120 | } | 120 | } |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | struct user_struct * alloc_uid(uid_t uid) | 123 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) |
| 124 | { | 124 | { |
| 125 | struct list_head *hashent = uidhashentry(uid); | 125 | struct list_head *hashent = uidhashentry(ns, uid); |
| 126 | struct user_struct *up; | 126 | struct user_struct *up; |
| 127 | 127 | ||
| 128 | spin_lock_irq(&uidhash_lock); | 128 | spin_lock_irq(&uidhash_lock); |
| @@ -211,11 +211,11 @@ static int __init uid_cache_init(void) | |||
| 211 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 211 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
| 212 | 212 | ||
| 213 | for(n = 0; n < UIDHASH_SZ; ++n) | 213 | for(n = 0; n < UIDHASH_SZ; ++n) |
| 214 | INIT_LIST_HEAD(uidhash_table + n); | 214 | INIT_LIST_HEAD(init_user_ns.uidhash_table + n); |
| 215 | 215 | ||
| 216 | /* Insert the root user immediately (init already runs as root) */ | 216 | /* Insert the root user immediately (init already runs as root) */ |
| 217 | spin_lock_irq(&uidhash_lock); | 217 | spin_lock_irq(&uidhash_lock); |
| 218 | uid_hash_insert(&root_user, uidhashentry(0)); | 218 | uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0)); |
| 219 | spin_unlock_irq(&uidhash_lock); | 219 | spin_unlock_irq(&uidhash_lock); |
| 220 | 220 | ||
| 221 | return 0; | 221 | return 0; |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c new file mode 100644 index 000000000000..d055d987850c --- /dev/null +++ b/kernel/user_namespace.c | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or | ||
| 3 | * modify it under the terms of the GNU General Public License as | ||
| 4 | * published by the Free Software Foundation, version 2 of the | ||
| 5 | * License. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/version.h> | ||
| 10 | #include <linux/nsproxy.h> | ||
| 11 | #include <linux/user_namespace.h> | ||
| 12 | |||
| 13 | struct user_namespace init_user_ns = { | ||
| 14 | .kref = { | ||
| 15 | .refcount = ATOMIC_INIT(2), | ||
| 16 | }, | ||
| 17 | .root_user = &root_user, | ||
| 18 | }; | ||
| 19 | |||
| 20 | EXPORT_SYMBOL_GPL(init_user_ns); | ||
| 21 | |||
| 22 | #ifdef CONFIG_USER_NS | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Clone a new ns copying an original user ns, setting refcount to 1 | ||
| 26 | * @old_ns: namespace to clone | ||
| 27 | * Return NULL on error (failure to kmalloc), new ns otherwise | ||
| 28 | */ | ||
| 29 | static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) | ||
| 30 | { | ||
| 31 | struct user_namespace *ns; | ||
| 32 | struct user_struct *new_user; | ||
| 33 | int n; | ||
| 34 | |||
| 35 | ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); | ||
| 36 | if (!ns) | ||
| 37 | return ERR_PTR(-ENOMEM); | ||
| 38 | |||
| 39 | kref_init(&ns->kref); | ||
| 40 | |||
| 41 | for (n = 0; n < UIDHASH_SZ; ++n) | ||
| 42 | INIT_LIST_HEAD(ns->uidhash_table + n); | ||
| 43 | |||
| 44 | /* Insert new root user. */ | ||
| 45 | ns->root_user = alloc_uid(ns, 0); | ||
| 46 | if (!ns->root_user) { | ||
| 47 | kfree(ns); | ||
| 48 | return ERR_PTR(-ENOMEM); | ||
| 49 | } | ||
| 50 | |||
| 51 | /* Reset current->user with a new one */ | ||
| 52 | new_user = alloc_uid(ns, current->uid); | ||
| 53 | if (!new_user) { | ||
| 54 | free_uid(ns->root_user); | ||
| 55 | kfree(ns); | ||
| 56 | return ERR_PTR(-ENOMEM); | ||
| 57 | } | ||
| 58 | |||
| 59 | switch_uid(new_user); | ||
| 60 | return ns; | ||
| 61 | } | ||
| 62 | |||
| 63 | struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) | ||
| 64 | { | ||
| 65 | struct user_namespace *new_ns; | ||
| 66 | |||
| 67 | BUG_ON(!old_ns); | ||
| 68 | get_user_ns(old_ns); | ||
| 69 | |||
| 70 | if (!(flags & CLONE_NEWUSER)) | ||
| 71 | return old_ns; | ||
| 72 | |||
| 73 | new_ns = clone_user_ns(old_ns); | ||
| 74 | |||
| 75 | put_user_ns(old_ns); | ||
| 76 | return new_ns; | ||
| 77 | } | ||
| 78 | |||
| 79 | void free_user_ns(struct kref *kref) | ||
| 80 | { | ||
| 81 | struct user_namespace *ns; | ||
| 82 | |||
| 83 | ns = container_of(kref, struct user_namespace, kref); | ||
| 84 | kfree(ns); | ||
| 85 | } | ||
| 86 | |||
| 87 | #endif /* CONFIG_USER_NS */ | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index 160c8c5136bd..9d8180a0f0d8 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/uts.h> | 13 | #include <linux/uts.h> |
| 14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
| 15 | #include <linux/version.h> | 15 | #include <linux/version.h> |
| 16 | #include <linux/err.h> | ||
| 16 | 17 | ||
| 17 | /* | 18 | /* |
| 18 | * Clone a new ns copying an original utsname, setting refcount to 1 | 19 | * Clone a new ns copying an original utsname, setting refcount to 1 |
| @@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
| 24 | struct uts_namespace *ns; | 25 | struct uts_namespace *ns; |
| 25 | 26 | ||
| 26 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); | 27 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); |
| 27 | if (ns) { | 28 | if (!ns) |
| 28 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 29 | return ERR_PTR(-ENOMEM); |
| 29 | kref_init(&ns->kref); | 30 | |
| 30 | } | 31 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
| 32 | kref_init(&ns->kref); | ||
| 31 | return ns; | 33 | return ns; |
| 32 | } | 34 | } |
| 33 | 35 | ||
| @@ -37,7 +39,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
| 37 | * utsname of this process won't be seen by parent, and vice | 39 | * utsname of this process won't be seen by parent, and vice |
| 38 | * versa. | 40 | * versa. |
| 39 | */ | 41 | */ |
| 40 | struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) | 42 | struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) |
| 41 | { | 43 | { |
| 42 | struct uts_namespace *new_ns; | 44 | struct uts_namespace *new_ns; |
| 43 | 45 | ||
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index f22b9dbd2a9c..c76c06466bfd 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c | |||
| @@ -18,10 +18,7 @@ | |||
| 18 | static void *get_uts(ctl_table *table, int write) | 18 | static void *get_uts(ctl_table *table, int write) |
| 19 | { | 19 | { |
| 20 | char *which = table->data; | 20 | char *which = table->data; |
| 21 | #ifdef CONFIG_UTS_NS | 21 | |
| 22 | struct uts_namespace *uts_ns = current->nsproxy->uts_ns; | ||
| 23 | which = (which - (char *)&init_uts_ns) + (char *)uts_ns; | ||
| 24 | #endif | ||
| 25 | if (!write) | 22 | if (!write) |
| 26 | down_read(&uts_sem); | 23 | down_read(&uts_sem); |
| 27 | else | 24 | else |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3bebf73be976..58e5c152a6bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -282,8 +282,8 @@ static int worker_thread(void *__cwq) | |||
| 282 | struct cpu_workqueue_struct *cwq = __cwq; | 282 | struct cpu_workqueue_struct *cwq = __cwq; |
| 283 | DEFINE_WAIT(wait); | 283 | DEFINE_WAIT(wait); |
| 284 | 284 | ||
| 285 | if (!cwq->wq->freezeable) | 285 | if (cwq->wq->freezeable) |
| 286 | current->flags |= PF_NOFREEZE; | 286 | set_freezable(); |
| 287 | 287 | ||
| 288 | set_user_nice(current, -5); | 288 | set_user_nice(current, -5); |
| 289 | 289 | ||
| @@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) | |||
| 382 | EXPORT_SYMBOL_GPL(flush_workqueue); | 382 | EXPORT_SYMBOL_GPL(flush_workqueue); |
| 383 | 383 | ||
| 384 | /* | 384 | /* |
| 385 | * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit, | 385 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, |
| 386 | * so this work can't be re-armed in any way. | 386 | * so this work can't be re-armed in any way. |
| 387 | */ | 387 | */ |
| 388 | static int try_to_grab_pending(struct work_struct *work) | 388 | static int try_to_grab_pending(struct work_struct *work) |
| 389 | { | 389 | { |
| 390 | struct cpu_workqueue_struct *cwq; | 390 | struct cpu_workqueue_struct *cwq; |
| 391 | int ret = 0; | 391 | int ret = -1; |
| 392 | 392 | ||
| 393 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) | 393 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) |
| 394 | return 1; | 394 | return 0; |
| 395 | 395 | ||
| 396 | /* | 396 | /* |
| 397 | * The queueing is in progress, or it is already queued. Try to | 397 | * The queueing is in progress, or it is already queued. Try to |
| @@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work) | |||
| 457 | wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); | 457 | wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); |
| 458 | } | 458 | } |
| 459 | 459 | ||
| 460 | static int __cancel_work_timer(struct work_struct *work, | ||
| 461 | struct timer_list* timer) | ||
| 462 | { | ||
| 463 | int ret; | ||
| 464 | |||
| 465 | do { | ||
| 466 | ret = (timer && likely(del_timer(timer))); | ||
| 467 | if (!ret) | ||
| 468 | ret = try_to_grab_pending(work); | ||
| 469 | wait_on_work(work); | ||
| 470 | } while (unlikely(ret < 0)); | ||
| 471 | |||
| 472 | work_clear_pending(work); | ||
| 473 | return ret; | ||
| 474 | } | ||
| 475 | |||
| 460 | /** | 476 | /** |
| 461 | * cancel_work_sync - block until a work_struct's callback has terminated | 477 | * cancel_work_sync - block until a work_struct's callback has terminated |
| 462 | * @work: the work which is to be flushed | 478 | * @work: the work which is to be flushed |
| 463 | * | 479 | * |
| 480 | * Returns true if @work was pending. | ||
| 481 | * | ||
| 464 | * cancel_work_sync() will cancel the work if it is queued. If the work's | 482 | * cancel_work_sync() will cancel the work if it is queued. If the work's |
| 465 | * callback appears to be running, cancel_work_sync() will block until it | 483 | * callback appears to be running, cancel_work_sync() will block until it |
| 466 | * has completed. | 484 | * has completed. |
| @@ -476,31 +494,26 @@ static void wait_on_work(struct work_struct *work) | |||
| 476 | * The caller must ensure that workqueue_struct on which this work was last | 494 | * The caller must ensure that workqueue_struct on which this work was last |
| 477 | * queued can't be destroyed before this function returns. | 495 | * queued can't be destroyed before this function returns. |
| 478 | */ | 496 | */ |
| 479 | void cancel_work_sync(struct work_struct *work) | 497 | int cancel_work_sync(struct work_struct *work) |
| 480 | { | 498 | { |
| 481 | while (!try_to_grab_pending(work)) | 499 | return __cancel_work_timer(work, NULL); |
| 482 | cpu_relax(); | ||
| 483 | wait_on_work(work); | ||
| 484 | work_clear_pending(work); | ||
| 485 | } | 500 | } |
| 486 | EXPORT_SYMBOL_GPL(cancel_work_sync); | 501 | EXPORT_SYMBOL_GPL(cancel_work_sync); |
| 487 | 502 | ||
| 488 | /** | 503 | /** |
| 489 | * cancel_rearming_delayed_work - reliably kill off a delayed work. | 504 | * cancel_delayed_work_sync - reliably kill off a delayed work. |
| 490 | * @dwork: the delayed work struct | 505 | * @dwork: the delayed work struct |
| 491 | * | 506 | * |
| 507 | * Returns true if @dwork was pending. | ||
| 508 | * | ||
| 492 | * It is possible to use this function if @dwork rearms itself via queue_work() | 509 | * It is possible to use this function if @dwork rearms itself via queue_work() |
| 493 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). | 510 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). |
| 494 | */ | 511 | */ |
| 495 | void cancel_rearming_delayed_work(struct delayed_work *dwork) | 512 | int cancel_delayed_work_sync(struct delayed_work *dwork) |
| 496 | { | 513 | { |
| 497 | while (!del_timer(&dwork->timer) && | 514 | return __cancel_work_timer(&dwork->work, &dwork->timer); |
| 498 | !try_to_grab_pending(&dwork->work)) | ||
| 499 | cpu_relax(); | ||
| 500 | wait_on_work(&dwork->work); | ||
| 501 | work_clear_pending(&dwork->work); | ||
| 502 | } | 515 | } |
| 503 | EXPORT_SYMBOL(cancel_rearming_delayed_work); | 516 | EXPORT_SYMBOL(cancel_delayed_work_sync); |
| 504 | 517 | ||
| 505 | static struct workqueue_struct *keventd_wq __read_mostly; | 518 | static struct workqueue_struct *keventd_wq __read_mostly; |
| 506 | 519 | ||
| @@ -739,18 +752,17 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
| 739 | if (cwq->thread == NULL) | 752 | if (cwq->thread == NULL) |
| 740 | return; | 753 | return; |
| 741 | 754 | ||
| 755 | flush_cpu_workqueue(cwq); | ||
| 742 | /* | 756 | /* |
| 743 | * If the caller is CPU_DEAD the single flush_cpu_workqueue() | 757 | * If the caller is CPU_DEAD and cwq->worklist was not empty, |
| 744 | * is not enough, a concurrent flush_workqueue() can insert a | 758 | * a concurrent flush_workqueue() can insert a barrier after us. |
| 745 | * barrier after us. | 759 | * However, in that case run_workqueue() won't return and check |
| 760 | * kthread_should_stop() until it flushes all work_struct's. | ||
| 746 | * When ->worklist becomes empty it is safe to exit because no | 761 | * When ->worklist becomes empty it is safe to exit because no |
| 747 | * more work_structs can be queued on this cwq: flush_workqueue | 762 | * more work_structs can be queued on this cwq: flush_workqueue |
| 748 | * checks list_empty(), and a "normal" queue_work() can't use | 763 | * checks list_empty(), and a "normal" queue_work() can't use |
| 749 | * a dead CPU. | 764 | * a dead CPU. |
| 750 | */ | 765 | */ |
| 751 | while (flush_cpu_workqueue(cwq)) | ||
| 752 | ; | ||
| 753 | |||
| 754 | kthread_stop(cwq->thread); | 766 | kthread_stop(cwq->thread); |
| 755 | cwq->thread = NULL; | 767 | cwq->thread = NULL; |
| 756 | } | 768 | } |
