aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/audit.c97
-rw-r--r--kernel/audit.h1
-rw-r--r--kernel/auditsc.c5
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/exit.c37
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/futex.c138
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kallsyms.c27
-rw-r--r--kernel/kfifo.c3
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/lockdep.c4
-rw-r--r--kernel/module.c63
-rw-r--r--kernel/nsproxy.c72
-rw-r--r--kernel/panic.c5
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/printk.c55
-rw-r--r--kernel/ptrace.c26
-rw-r--r--kernel/rcutorture.c4
-rw-r--r--kernel/rtmutex-debug.c6
-rw-r--r--kernel/rtmutex-tester.c1
-rw-r--r--kernel/rtmutex.c6
-rw-r--r--kernel/rtmutex_common.h9
-rw-r--r--kernel/sched.c30
-rw-r--r--kernel/seccomp.c29
-rw-r--r--kernel/signal.c33
-rw-r--r--kernel/softirq.c9
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/stop_machine.c8
-rw-r--r--kernel/sys.c13
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c45
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time.c32
-rw-r--r--kernel/time/clockevents.c41
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c37
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/time/timer_stats.c16
-rw-r--r--kernel/timer.c19
-rw-r--r--kernel/user.c18
-rw-r--r--kernel/user_namespace.c87
-rw-r--r--kernel/utsname.c12
-rw-r--r--kernel/utsname_sysctl.c5
-rw-r--r--kernel/workqueue.c60
47 files changed, 766 insertions, 339 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 642d4277c2ea..2a999836ca18 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -4,11 +4,12 @@
4 4
5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ 5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
6 exit.o itimer.o time.o softirq.o resource.o \ 6 exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
12 utsname.o
12 13
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 14obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-y += time/ 15obj-y += time/
@@ -48,7 +49,6 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
48obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
49obj-$(CONFIG_RELAY) += relay.o 50obj-$(CONFIG_RELAY) += relay.o
50obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 51obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
51obj-$(CONFIG_UTS_NS) += utsname.o
52obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 52obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
53obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 53obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
54 54
diff --git a/kernel/audit.c b/kernel/audit.c
index d13276d41410..eb0f9165b401 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -58,6 +58,7 @@
58#include <linux/selinux.h> 58#include <linux/selinux.h>
59#include <linux/inotify.h> 59#include <linux/inotify.h>
60#include <linux/freezer.h> 60#include <linux/freezer.h>
61#include <linux/tty.h>
61 62
62#include "audit.h" 63#include "audit.h"
63 64
@@ -391,6 +392,7 @@ static int kauditd_thread(void *dummy)
391{ 392{
392 struct sk_buff *skb; 393 struct sk_buff *skb;
393 394
395 set_freezable();
394 while (!kthread_should_stop()) { 396 while (!kthread_should_stop()) {
395 skb = skb_dequeue(&audit_skb_queue); 397 skb = skb_dequeue(&audit_skb_queue);
396 wake_up(&audit_backlog_wait); 398 wake_up(&audit_backlog_wait);
@@ -423,6 +425,31 @@ static int kauditd_thread(void *dummy)
423 return 0; 425 return 0;
424} 426}
425 427
428static int audit_prepare_user_tty(pid_t pid, uid_t loginuid)
429{
430 struct task_struct *tsk;
431 int err;
432
433 read_lock(&tasklist_lock);
434 tsk = find_task_by_pid(pid);
435 err = -ESRCH;
436 if (!tsk)
437 goto out;
438 err = 0;
439
440 spin_lock_irq(&tsk->sighand->siglock);
441 if (!tsk->signal->audit_tty)
442 err = -EPERM;
443 spin_unlock_irq(&tsk->sighand->siglock);
444 if (err)
445 goto out;
446
447 tty_audit_push_task(tsk, loginuid);
448out:
449 read_unlock(&tasklist_lock);
450 return err;
451}
452
426int audit_send_list(void *_dest) 453int audit_send_list(void *_dest)
427{ 454{
428 struct audit_netlink_list *dest = _dest; 455 struct audit_netlink_list *dest = _dest;
@@ -511,6 +538,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
511 case AUDIT_DEL: 538 case AUDIT_DEL:
512 case AUDIT_DEL_RULE: 539 case AUDIT_DEL_RULE:
513 case AUDIT_SIGNAL_INFO: 540 case AUDIT_SIGNAL_INFO:
541 case AUDIT_TTY_GET:
542 case AUDIT_TTY_SET:
514 if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) 543 if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
515 err = -EPERM; 544 err = -EPERM;
516 break; 545 break;
@@ -622,6 +651,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
622 err = audit_filter_user(&NETLINK_CB(skb), msg_type); 651 err = audit_filter_user(&NETLINK_CB(skb), msg_type);
623 if (err == 1) { 652 if (err == 1) {
624 err = 0; 653 err = 0;
654 if (msg_type == AUDIT_USER_TTY) {
655 err = audit_prepare_user_tty(pid, loginuid);
656 if (err)
657 break;
658 }
625 ab = audit_log_start(NULL, GFP_KERNEL, msg_type); 659 ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
626 if (ab) { 660 if (ab) {
627 audit_log_format(ab, 661 audit_log_format(ab,
@@ -638,8 +672,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
638 " subj=%s", ctx); 672 " subj=%s", ctx);
639 kfree(ctx); 673 kfree(ctx);
640 } 674 }
641 audit_log_format(ab, " msg='%.1024s'", 675 if (msg_type != AUDIT_USER_TTY)
642 (char *)data); 676 audit_log_format(ab, " msg='%.1024s'",
677 (char *)data);
678 else {
679 int size;
680
681 audit_log_format(ab, " msg=");
682 size = nlmsg_len(nlh);
683 audit_log_n_untrustedstring(ab, size,
684 data);
685 }
643 audit_set_pid(ab, pid); 686 audit_set_pid(ab, pid);
644 audit_log_end(ab); 687 audit_log_end(ab);
645 } 688 }
@@ -730,6 +773,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
730 0, 0, sig_data, sizeof(*sig_data) + len); 773 0, 0, sig_data, sizeof(*sig_data) + len);
731 kfree(sig_data); 774 kfree(sig_data);
732 break; 775 break;
776 case AUDIT_TTY_GET: {
777 struct audit_tty_status s;
778 struct task_struct *tsk;
779
780 read_lock(&tasklist_lock);
781 tsk = find_task_by_pid(pid);
782 if (!tsk)
783 err = -ESRCH;
784 else {
785 spin_lock_irq(&tsk->sighand->siglock);
786 s.enabled = tsk->signal->audit_tty != 0;
787 spin_unlock_irq(&tsk->sighand->siglock);
788 }
789 read_unlock(&tasklist_lock);
790 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0,
791 &s, sizeof(s));
792 break;
793 }
794 case AUDIT_TTY_SET: {
795 struct audit_tty_status *s;
796 struct task_struct *tsk;
797
798 if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
799 return -EINVAL;
800 s = data;
801 if (s->enabled != 0 && s->enabled != 1)
802 return -EINVAL;
803 read_lock(&tasklist_lock);
804 tsk = find_task_by_pid(pid);
805 if (!tsk)
806 err = -ESRCH;
807 else {
808 spin_lock_irq(&tsk->sighand->siglock);
809 tsk->signal->audit_tty = s->enabled != 0;
810 spin_unlock_irq(&tsk->sighand->siglock);
811 }
812 read_unlock(&tasklist_lock);
813 break;
814 }
733 default: 815 default:
734 err = -EINVAL; 816 err = -EINVAL;
735 break; 817 break;
@@ -1185,7 +1267,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1185} 1267}
1186 1268
1187/** 1269/**
1188 * audit_log_n_unstrustedstring - log a string that may contain random characters 1270 * audit_log_n_untrustedstring - log a string that may contain random characters
1189 * @ab: audit_buffer 1271 * @ab: audit_buffer
1190 * @len: lenth of string (not including trailing null) 1272 * @len: lenth of string (not including trailing null)
1191 * @string: string to be logged 1273 * @string: string to be logged
@@ -1201,25 +1283,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1201const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, 1283const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
1202 const char *string) 1284 const char *string)
1203{ 1285{
1204 const unsigned char *p = string; 1286 const unsigned char *p;
1205 1287
1206 while (*p) { 1288 for (p = string; p < (const unsigned char *)string + len && *p; p++) {
1207 if (*p == '"' || *p < 0x21 || *p > 0x7f) { 1289 if (*p == '"' || *p < 0x21 || *p > 0x7f) {
1208 audit_log_hex(ab, string, len); 1290 audit_log_hex(ab, string, len);
1209 return string + len + 1; 1291 return string + len + 1;
1210 } 1292 }
1211 p++;
1212 } 1293 }
1213 audit_log_n_string(ab, len, string); 1294 audit_log_n_string(ab, len, string);
1214 return p + 1; 1295 return p + 1;
1215} 1296}
1216 1297
1217/** 1298/**
1218 * audit_log_unstrustedstring - log a string that may contain random characters 1299 * audit_log_untrustedstring - log a string that may contain random characters
1219 * @ab: audit_buffer 1300 * @ab: audit_buffer
1220 * @string: string to be logged 1301 * @string: string to be logged
1221 * 1302 *
1222 * Same as audit_log_n_unstrustedstring(), except that strlen is used to 1303 * Same as audit_log_n_untrustedstring(), except that strlen is used to
1223 * determine string length. 1304 * determine string length.
1224 */ 1305 */
1225const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) 1306const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
diff --git a/kernel/audit.h b/kernel/audit.h
index 815d6f5c04ee..95877435c347 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
115extern void audit_send_reply(int pid, int seq, int type, 115extern void audit_send_reply(int pid, int seq, int type,
116 int done, int multi, 116 int done, int multi,
117 void *payload, int size); 117 void *payload, int size);
118extern void audit_log_lost(const char *message);
119extern void audit_panic(const char *message); 118extern void audit_panic(const char *message);
120 119
121struct audit_netlink_list { 120struct audit_netlink_list {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e36481ed61b4..b7640a5f382a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -71,9 +71,6 @@
71 71
72extern struct list_head audit_filter_list[]; 72extern struct list_head audit_filter_list[];
73 73
74/* No syscall auditing will take place unless audit_enabled != 0. */
75extern int audit_enabled;
76
77/* AUDIT_NAMES is the number of slots we reserve in the audit_context 74/* AUDIT_NAMES is the number of slots we reserve in the audit_context
78 * for saving names from getname(). */ 75 * for saving names from getname(). */
79#define AUDIT_NAMES 20 76#define AUDIT_NAMES 20
@@ -2040,7 +2037,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
2040 2037
2041/** 2038/**
2042 * audit_core_dumps - record information about processes that end abnormally 2039 * audit_core_dumps - record information about processes that end abnormally
2043 * @sig: signal value 2040 * @signr: signal value
2044 * 2041 *
2045 * If a process ends with a core dump, something fishy is going on and we 2042 * If a process ends with a core dump, something fishy is going on and we
2046 * should record the event for investigation. 2043 * should record the event for investigation.
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c4d123f74bd3..b4796d850140 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -981,10 +981,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
981 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); 981 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
982 if (!mmarray) 982 if (!mmarray)
983 goto done; 983 goto done;
984 write_lock_irq(&tasklist_lock); /* block fork */ 984 read_lock(&tasklist_lock); /* block fork */
985 if (atomic_read(&cs->count) <= ntasks) 985 if (atomic_read(&cs->count) <= ntasks)
986 break; /* got enough */ 986 break; /* got enough */
987 write_unlock_irq(&tasklist_lock); /* try again */ 987 read_unlock(&tasklist_lock); /* try again */
988 kfree(mmarray); 988 kfree(mmarray);
989 } 989 }
990 990
@@ -1006,7 +1006,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
1006 continue; 1006 continue;
1007 mmarray[n++] = mm; 1007 mmarray[n++] = mm;
1008 } while_each_thread(g, p); 1008 } while_each_thread(g, p);
1009 write_unlock_irq(&tasklist_lock); 1009 read_unlock(&tasklist_lock);
1010 1010
1011 /* 1011 /*
1012 * Now that we've dropped the tasklist spinlock, we can 1012 * Now that we've dropped the tasklist spinlock, we can
diff --git a/kernel/exit.c b/kernel/exit.c
index ca6a11b73023..e8af8d0c2483 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -31,6 +31,7 @@
31#include <linux/mempolicy.h> 31#include <linux/mempolicy.h>
32#include <linux/taskstats_kern.h> 32#include <linux/taskstats_kern.h>
33#include <linux/delayacct.h> 33#include <linux/delayacct.h>
34#include <linux/freezer.h>
34#include <linux/cpuset.h> 35#include <linux/cpuset.h>
35#include <linux/syscalls.h> 36#include <linux/syscalls.h>
36#include <linux/signal.h> 37#include <linux/signal.h>
@@ -387,6 +388,11 @@ void daemonize(const char *name, ...)
387 * they would be locked into memory. 388 * they would be locked into memory.
388 */ 389 */
389 exit_mm(current); 390 exit_mm(current);
391 /*
392 * We don't want to have TIF_FREEZE set if the system-wide hibernation
393 * or suspend transition begins right now.
394 */
395 current->flags |= PF_NOFREEZE;
390 396
391 set_special_pids(1, 1); 397 set_special_pids(1, 1);
392 proc_clear_tty(current); 398 proc_clear_tty(current);
@@ -858,6 +864,34 @@ static void exit_notify(struct task_struct *tsk)
858 release_task(tsk); 864 release_task(tsk);
859} 865}
860 866
867#ifdef CONFIG_DEBUG_STACK_USAGE
868static void check_stack_usage(void)
869{
870 static DEFINE_SPINLOCK(low_water_lock);
871 static int lowest_to_date = THREAD_SIZE;
872 unsigned long *n = end_of_stack(current);
873 unsigned long free;
874
875 while (*n == 0)
876 n++;
877 free = (unsigned long)n - (unsigned long)end_of_stack(current);
878
879 if (free >= lowest_to_date)
880 return;
881
882 spin_lock(&low_water_lock);
883 if (free < lowest_to_date) {
884 printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
885 "left\n",
886 current->comm, free);
887 lowest_to_date = free;
888 }
889 spin_unlock(&low_water_lock);
890}
891#else
892static inline void check_stack_usage(void) {}
893#endif
894
861fastcall NORET_TYPE void do_exit(long code) 895fastcall NORET_TYPE void do_exit(long code)
862{ 896{
863 struct task_struct *tsk = current; 897 struct task_struct *tsk = current;
@@ -937,6 +971,8 @@ fastcall NORET_TYPE void do_exit(long code)
937 if (unlikely(tsk->compat_robust_list)) 971 if (unlikely(tsk->compat_robust_list))
938 compat_exit_robust_list(tsk); 972 compat_exit_robust_list(tsk);
939#endif 973#endif
974 if (group_dead)
975 tty_audit_exit();
940 if (unlikely(tsk->audit_context)) 976 if (unlikely(tsk->audit_context))
941 audit_free(tsk); 977 audit_free(tsk);
942 978
@@ -949,6 +985,7 @@ fastcall NORET_TYPE void do_exit(long code)
949 exit_sem(tsk); 985 exit_sem(tsk);
950 __exit_files(tsk); 986 __exit_files(tsk);
951 __exit_fs(tsk); 987 __exit_fs(tsk);
988 check_stack_usage();
952 exit_thread(); 989 exit_thread();
953 cpuset_exit(tsk); 990 cpuset_exit(tsk);
954 exit_keys(tsk); 991 exit_keys(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index da3a155bba0d..ba39bdb2a7b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
49#include <linux/delayacct.h> 49#include <linux/delayacct.h>
50#include <linux/taskstats_kern.h> 50#include <linux/taskstats_kern.h>
51#include <linux/random.h> 51#include <linux/random.h>
52#include <linux/tty.h>
52 53
53#include <asm/pgtable.h> 54#include <asm/pgtable.h>
54#include <asm/pgalloc.h> 55#include <asm/pgalloc.h>
@@ -897,6 +898,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
897 } 898 }
898 acct_init_pacct(&sig->pacct); 899 acct_init_pacct(&sig->pacct);
899 900
901 tty_audit_fork(sig);
902
900 return 0; 903 return 0;
901} 904}
902 905
@@ -920,7 +923,7 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
920{ 923{
921 unsigned long new_flags = p->flags; 924 unsigned long new_flags = p->flags;
922 925
923 new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); 926 new_flags &= ~PF_SUPERPRIV;
924 new_flags |= PF_FORKNOEXEC; 927 new_flags |= PF_FORKNOEXEC;
925 if (!(clone_flags & CLONE_PTRACE)) 928 if (!(clone_flags & CLONE_PTRACE))
926 p->ptrace = 0; 929 p->ptrace = 0;
@@ -999,7 +1002,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
999 if (atomic_read(&p->user->processes) >= 1002 if (atomic_read(&p->user->processes) >=
1000 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 1003 p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
1001 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 1004 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
1002 p->user != &root_user) 1005 p->user != current->nsproxy->user_ns->root_user)
1003 goto bad_fork_free; 1006 goto bad_fork_free;
1004 } 1007 }
1005 1008
@@ -1059,6 +1062,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1059 1062
1060 p->lock_depth = -1; /* -1 = no lock */ 1063 p->lock_depth = -1; /* -1 = no lock */
1061 do_posix_clock_monotonic_gettime(&p->start_time); 1064 do_posix_clock_monotonic_gettime(&p->start_time);
1065 p->real_start_time = p->start_time;
1066 monotonic_to_bootbased(&p->real_start_time);
1062 p->security = NULL; 1067 p->security = NULL;
1063 p->io_context = NULL; 1068 p->io_context = NULL;
1064 p->io_wait = NULL; 1069 p->io_wait = NULL;
@@ -1601,7 +1606,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1601 err = -EINVAL; 1606 err = -EINVAL;
1602 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 1607 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1603 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 1608 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1604 CLONE_NEWUTS|CLONE_NEWIPC)) 1609 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
1605 goto bad_unshare_out; 1610 goto bad_unshare_out;
1606 1611
1607 if ((err = unshare_thread(unshare_flags))) 1612 if ((err = unshare_thread(unshare_flags)))
diff --git a/kernel/futex.c b/kernel/futex.c
index 45490bec5831..5c3f45d07c53 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -121,6 +121,24 @@ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
121static struct vfsmount *futex_mnt; 121static struct vfsmount *futex_mnt;
122 122
123/* 123/*
124 * Take mm->mmap_sem, when futex is shared
125 */
126static inline void futex_lock_mm(struct rw_semaphore *fshared)
127{
128 if (fshared)
129 down_read(fshared);
130}
131
132/*
133 * Release mm->mmap_sem, when the futex is shared
134 */
135static inline void futex_unlock_mm(struct rw_semaphore *fshared)
136{
137 if (fshared)
138 up_read(fshared);
139}
140
141/*
124 * We hash on the keys returned from get_futex_key (see below). 142 * We hash on the keys returned from get_futex_key (see below).
125 */ 143 */
126static struct futex_hash_bucket *hash_futex(union futex_key *key) 144static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -287,7 +305,18 @@ void drop_futex_key_refs(union futex_key *key)
287} 305}
288EXPORT_SYMBOL_GPL(drop_futex_key_refs); 306EXPORT_SYMBOL_GPL(drop_futex_key_refs);
289 307
290static inline int get_futex_value_locked(u32 *dest, u32 __user *from) 308static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
309{
310 u32 curval;
311
312 pagefault_disable();
313 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
314 pagefault_enable();
315
316 return curval;
317}
318
319static int get_futex_value_locked(u32 *dest, u32 __user *from)
291{ 320{
292 int ret; 321 int ret;
293 322
@@ -620,9 +649,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
620 649
621 newval = FUTEX_WAITERS | new_owner->pid; 650 newval = FUTEX_WAITERS | new_owner->pid;
622 651
623 pagefault_disable(); 652 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
624 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
625 pagefault_enable();
626 653
627 if (curval == -EFAULT) 654 if (curval == -EFAULT)
628 ret = -EFAULT; 655 ret = -EFAULT;
@@ -659,9 +686,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
659 * There is no waiter, so we unlock the futex. The owner died 686 * There is no waiter, so we unlock the futex. The owner died
660 * bit has not to be preserved here. We are the owner: 687 * bit has not to be preserved here. We are the owner:
661 */ 688 */
662 pagefault_disable(); 689 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
663 oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
664 pagefault_enable();
665 690
666 if (oldval == -EFAULT) 691 if (oldval == -EFAULT)
667 return oldval; 692 return oldval;
@@ -700,8 +725,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
700 union futex_key key; 725 union futex_key key;
701 int ret; 726 int ret;
702 727
703 if (fshared) 728 futex_lock_mm(fshared);
704 down_read(fshared);
705 729
706 ret = get_futex_key(uaddr, fshared, &key); 730 ret = get_futex_key(uaddr, fshared, &key);
707 if (unlikely(ret != 0)) 731 if (unlikely(ret != 0))
@@ -725,8 +749,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
725 749
726 spin_unlock(&hb->lock); 750 spin_unlock(&hb->lock);
727out: 751out:
728 if (fshared) 752 futex_unlock_mm(fshared);
729 up_read(fshared);
730 return ret; 753 return ret;
731} 754}
732 755
@@ -746,8 +769,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
746 int ret, op_ret, attempt = 0; 769 int ret, op_ret, attempt = 0;
747 770
748retryfull: 771retryfull:
749 if (fshared) 772 futex_lock_mm(fshared);
750 down_read(fshared);
751 773
752 ret = get_futex_key(uaddr1, fshared, &key1); 774 ret = get_futex_key(uaddr1, fshared, &key1);
753 if (unlikely(ret != 0)) 775 if (unlikely(ret != 0))
@@ -793,7 +815,7 @@ retry:
793 */ 815 */
794 if (attempt++) { 816 if (attempt++) {
795 ret = futex_handle_fault((unsigned long)uaddr2, 817 ret = futex_handle_fault((unsigned long)uaddr2,
796 fshared, attempt); 818 fshared, attempt);
797 if (ret) 819 if (ret)
798 goto out; 820 goto out;
799 goto retry; 821 goto retry;
@@ -803,8 +825,7 @@ retry:
803 * If we would have faulted, release mmap_sem, 825 * If we would have faulted, release mmap_sem,
804 * fault it in and start all over again. 826 * fault it in and start all over again.
805 */ 827 */
806 if (fshared) 828 futex_unlock_mm(fshared);
807 up_read(fshared);
808 829
809 ret = get_user(dummy, uaddr2); 830 ret = get_user(dummy, uaddr2);
810 if (ret) 831 if (ret)
@@ -841,8 +862,8 @@ retry:
841 if (hb1 != hb2) 862 if (hb1 != hb2)
842 spin_unlock(&hb2->lock); 863 spin_unlock(&hb2->lock);
843out: 864out:
844 if (fshared) 865 futex_unlock_mm(fshared);
845 up_read(fshared); 866
846 return ret; 867 return ret;
847} 868}
848 869
@@ -861,8 +882,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
861 int ret, drop_count = 0; 882 int ret, drop_count = 0;
862 883
863 retry: 884 retry:
864 if (fshared) 885 futex_lock_mm(fshared);
865 down_read(fshared);
866 886
867 ret = get_futex_key(uaddr1, fshared, &key1); 887 ret = get_futex_key(uaddr1, fshared, &key1);
868 if (unlikely(ret != 0)) 888 if (unlikely(ret != 0))
@@ -890,8 +910,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
890 * If we would have faulted, release mmap_sem, fault 910 * If we would have faulted, release mmap_sem, fault
891 * it in and start all over again. 911 * it in and start all over again.
892 */ 912 */
893 if (fshared) 913 futex_unlock_mm(fshared);
894 up_read(fshared);
895 914
896 ret = get_user(curval, uaddr1); 915 ret = get_user(curval, uaddr1);
897 916
@@ -944,8 +963,7 @@ out_unlock:
944 drop_futex_key_refs(&key1); 963 drop_futex_key_refs(&key1);
945 964
946out: 965out:
947 if (fshared) 966 futex_unlock_mm(fshared);
948 up_read(fshared);
949 return ret; 967 return ret;
950} 968}
951 969
@@ -1113,10 +1131,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1113 while (!ret) { 1131 while (!ret) {
1114 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1132 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1115 1133
1116 pagefault_disable(); 1134 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1117 curval = futex_atomic_cmpxchg_inatomic(uaddr,
1118 uval, newval);
1119 pagefault_enable();
1120 1135
1121 if (curval == -EFAULT) 1136 if (curval == -EFAULT)
1122 ret = -EFAULT; 1137 ret = -EFAULT;
@@ -1134,6 +1149,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1134#define ARG3_SHARED 1 1149#define ARG3_SHARED 1
1135 1150
1136static long futex_wait_restart(struct restart_block *restart); 1151static long futex_wait_restart(struct restart_block *restart);
1152
1137static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1153static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1138 u32 val, ktime_t *abs_time) 1154 u32 val, ktime_t *abs_time)
1139{ 1155{
@@ -1148,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1148 1164
1149 q.pi_state = NULL; 1165 q.pi_state = NULL;
1150 retry: 1166 retry:
1151 if (fshared) 1167 futex_lock_mm(fshared);
1152 down_read(fshared);
1153 1168
1154 ret = get_futex_key(uaddr, fshared, &q.key); 1169 ret = get_futex_key(uaddr, fshared, &q.key);
1155 if (unlikely(ret != 0)) 1170 if (unlikely(ret != 0))
@@ -1186,8 +1201,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1186 * If we would have faulted, release mmap_sem, fault it in and 1201 * If we would have faulted, release mmap_sem, fault it in and
1187 * start all over again. 1202 * start all over again.
1188 */ 1203 */
1189 if (fshared) 1204 futex_unlock_mm(fshared);
1190 up_read(fshared);
1191 1205
1192 ret = get_user(uval, uaddr); 1206 ret = get_user(uval, uaddr);
1193 1207
@@ -1206,8 +1220,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1206 * Now the futex is queued and we have checked the data, we 1220 * Now the futex is queued and we have checked the data, we
1207 * don't want to hold mmap_sem while we sleep. 1221 * don't want to hold mmap_sem while we sleep.
1208 */ 1222 */
1209 if (fshared) 1223 futex_unlock_mm(fshared);
1210 up_read(fshared);
1211 1224
1212 /* 1225 /*
1213 * There might have been scheduling since the queue_me(), as we 1226 * There might have been scheduling since the queue_me(), as we
@@ -1285,8 +1298,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1285 queue_unlock(&q, hb); 1298 queue_unlock(&q, hb);
1286 1299
1287 out_release_sem: 1300 out_release_sem:
1288 if (fshared) 1301 futex_unlock_mm(fshared);
1289 up_read(fshared);
1290 return ret; 1302 return ret;
1291} 1303}
1292 1304
@@ -1333,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1333 1345
1334 q.pi_state = NULL; 1346 q.pi_state = NULL;
1335 retry: 1347 retry:
1336 if (fshared) 1348 futex_lock_mm(fshared);
1337 down_read(fshared);
1338 1349
1339 ret = get_futex_key(uaddr, fshared, &q.key); 1350 ret = get_futex_key(uaddr, fshared, &q.key);
1340 if (unlikely(ret != 0)) 1351 if (unlikely(ret != 0))
@@ -1353,9 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1353 */ 1364 */
1354 newval = current->pid; 1365 newval = current->pid;
1355 1366
1356 pagefault_disable(); 1367 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
1357 curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
1358 pagefault_enable();
1359 1368
1360 if (unlikely(curval == -EFAULT)) 1369 if (unlikely(curval == -EFAULT))
1361 goto uaddr_faulted; 1370 goto uaddr_faulted;
@@ -1398,9 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1398 lock_taken = 1; 1407 lock_taken = 1;
1399 } 1408 }
1400 1409
1401 pagefault_disable(); 1410 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1402 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
1403 pagefault_enable();
1404 1411
1405 if (unlikely(curval == -EFAULT)) 1412 if (unlikely(curval == -EFAULT))
1406 goto uaddr_faulted; 1413 goto uaddr_faulted;
@@ -1428,8 +1435,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1428 * exit to complete. 1435 * exit to complete.
1429 */ 1436 */
1430 queue_unlock(&q, hb); 1437 queue_unlock(&q, hb);
1431 if (fshared) 1438 futex_unlock_mm(fshared);
1432 up_read(fshared);
1433 cond_resched(); 1439 cond_resched();
1434 goto retry; 1440 goto retry;
1435 1441
@@ -1465,8 +1471,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1465 * Now the futex is queued and we have checked the data, we 1471 * Now the futex is queued and we have checked the data, we
1466 * don't want to hold mmap_sem while we sleep. 1472 * don't want to hold mmap_sem while we sleep.
1467 */ 1473 */
1468 if (fshared) 1474 futex_unlock_mm(fshared);
1469 up_read(fshared);
1470 1475
1471 WARN_ON(!q.pi_state); 1476 WARN_ON(!q.pi_state);
1472 /* 1477 /*
@@ -1480,8 +1485,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1480 ret = ret ? 0 : -EWOULDBLOCK; 1485 ret = ret ? 0 : -EWOULDBLOCK;
1481 } 1486 }
1482 1487
1483 if (fshared) 1488 futex_lock_mm(fshared);
1484 down_read(fshared);
1485 spin_lock(q.lock_ptr); 1489 spin_lock(q.lock_ptr);
1486 1490
1487 if (!ret) { 1491 if (!ret) {
@@ -1518,8 +1522,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1518 1522
1519 /* Unqueue and drop the lock */ 1523 /* Unqueue and drop the lock */
1520 unqueue_me_pi(&q); 1524 unqueue_me_pi(&q);
1521 if (fshared) 1525 futex_unlock_mm(fshared);
1522 up_read(fshared);
1523 1526
1524 return ret != -EINTR ? ret : -ERESTARTNOINTR; 1527 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1525 1528
@@ -1527,8 +1530,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1527 queue_unlock(&q, hb); 1530 queue_unlock(&q, hb);
1528 1531
1529 out_release_sem: 1532 out_release_sem:
1530 if (fshared) 1533 futex_unlock_mm(fshared);
1531 up_read(fshared);
1532 return ret; 1534 return ret;
1533 1535
1534 uaddr_faulted: 1536 uaddr_faulted:
@@ -1550,8 +1552,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1550 goto retry_unlocked; 1552 goto retry_unlocked;
1551 } 1553 }
1552 1554
1553 if (fshared) 1555 futex_unlock_mm(fshared);
1554 up_read(fshared);
1555 1556
1556 ret = get_user(uval, uaddr); 1557 ret = get_user(uval, uaddr);
1557 if (!ret && (uval != -EFAULT)) 1558 if (!ret && (uval != -EFAULT))
@@ -1585,8 +1586,7 @@ retry:
1585 /* 1586 /*
1586 * First take all the futex related locks: 1587 * First take all the futex related locks:
1587 */ 1588 */
1588 if (fshared) 1589 futex_lock_mm(fshared);
1589 down_read(fshared);
1590 1590
1591 ret = get_futex_key(uaddr, fshared, &key); 1591 ret = get_futex_key(uaddr, fshared, &key);
1592 if (unlikely(ret != 0)) 1592 if (unlikely(ret != 0))
@@ -1601,11 +1601,9 @@ retry_unlocked:
1601 * again. If it succeeds then we can return without waking 1601 * again. If it succeeds then we can return without waking
1602 * anyone else up: 1602 * anyone else up:
1603 */ 1603 */
1604 if (!(uval & FUTEX_OWNER_DIED)) { 1604 if (!(uval & FUTEX_OWNER_DIED))
1605 pagefault_disable(); 1605 uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
1606 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1606
1607 pagefault_enable();
1608 }
1609 1607
1610 if (unlikely(uval == -EFAULT)) 1608 if (unlikely(uval == -EFAULT))
1611 goto pi_faulted; 1609 goto pi_faulted;
@@ -1647,8 +1645,7 @@ retry_unlocked:
1647out_unlock: 1645out_unlock:
1648 spin_unlock(&hb->lock); 1646 spin_unlock(&hb->lock);
1649out: 1647out:
1650 if (fshared) 1648 futex_unlock_mm(fshared);
1651 up_read(fshared);
1652 1649
1653 return ret; 1650 return ret;
1654 1651
@@ -1671,8 +1668,7 @@ pi_faulted:
1671 goto retry_unlocked; 1668 goto retry_unlocked;
1672 } 1669 }
1673 1670
1674 if (fshared) 1671 futex_unlock_mm(fshared);
1675 up_read(fshared);
1676 1672
1677 ret = get_user(uval, uaddr); 1673 ret = get_user(uval, uaddr);
1678 if (!ret && (uval != -EFAULT)) 1674 if (!ret && (uval != -EFAULT))
@@ -1729,8 +1725,8 @@ static int futex_fd(u32 __user *uaddr, int signal)
1729 1725
1730 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { 1726 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
1731 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " 1727 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
1732 "will be removed from the kernel in June 2007\n", 1728 "will be removed from the kernel in June 2007\n",
1733 current->comm); 1729 current->comm);
1734 } 1730 }
1735 1731
1736 ret = -EINVAL; 1732 ret = -EINVAL;
@@ -1908,10 +1904,8 @@ retry:
1908 * Wake robust non-PI futexes here. The wakeup of 1904 * Wake robust non-PI futexes here. The wakeup of
1909 * PI futexes happens in exit_pi_state(): 1905 * PI futexes happens in exit_pi_state():
1910 */ 1906 */
1911 if (!pi) { 1907 if (!pi && (uval & FUTEX_WAITERS))
1912 if (uval & FUTEX_WAITERS)
1913 futex_wake(uaddr, &curr->mm->mmap_sem, 1); 1908 futex_wake(uaddr, &curr->mm->mmap_sem, 1);
1914 }
1915 } 1909 }
1916 return 0; 1910 return 0;
1917} 1911}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 23c03f43e196..72d034258ba1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1406,7 +1406,7 @@ static void migrate_hrtimers(int cpu)
1406static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, 1406static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1407 unsigned long action, void *hcpu) 1407 unsigned long action, void *hcpu)
1408{ 1408{
1409 long cpu = (long)hcpu; 1409 unsigned int cpu = (long)hcpu;
1410 1410
1411 switch (action) { 1411 switch (action) {
1412 1412
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index bd9e272d55e9..32b161972fad 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -172,7 +172,17 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
172 irqreturn_t action_ret) 172 irqreturn_t action_ret)
173{ 173{
174 if (unlikely(action_ret != IRQ_HANDLED)) { 174 if (unlikely(action_ret != IRQ_HANDLED)) {
175 desc->irqs_unhandled++; 175 /*
176 * If we are seeing only the odd spurious IRQ caused by
177 * bus asynchronicity then don't eventually trigger an error,
178 * otherwise the couter becomes a doomsday timer for otherwise
179 * working systems
180 */
181 if (jiffies - desc->last_unhandled > HZ/10)
182 desc->irqs_unhandled = 1;
183 else
184 desc->irqs_unhandled++;
185 desc->last_unhandled = jiffies;
176 if (unlikely(action_ret != IRQ_NONE)) 186 if (unlikely(action_ret != IRQ_NONE))
177 report_bad_irq(irq, desc, action_ret); 187 report_bad_irq(irq, desc, action_ret);
178 } 188 }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index fed54418626c..474219a41929 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -152,7 +152,7 @@ static unsigned int get_symbol_offset(unsigned long pos)
152/* Lookup the address for this symbol. Returns 0 if not found. */ 152/* Lookup the address for this symbol. Returns 0 if not found. */
153unsigned long kallsyms_lookup_name(const char *name) 153unsigned long kallsyms_lookup_name(const char *name)
154{ 154{
155 char namebuf[KSYM_NAME_LEN+1]; 155 char namebuf[KSYM_NAME_LEN];
156 unsigned long i; 156 unsigned long i;
157 unsigned int off; 157 unsigned int off;
158 158
@@ -248,7 +248,7 @@ const char *kallsyms_lookup(unsigned long addr,
248{ 248{
249 const char *msym; 249 const char *msym;
250 250
251 namebuf[KSYM_NAME_LEN] = 0; 251 namebuf[KSYM_NAME_LEN - 1] = 0;
252 namebuf[0] = 0; 252 namebuf[0] = 0;
253 253
254 if (is_ksym_addr(addr)) { 254 if (is_ksym_addr(addr)) {
@@ -265,7 +265,7 @@ const char *kallsyms_lookup(unsigned long addr,
265 /* see if it's in a module */ 265 /* see if it's in a module */
266 msym = module_address_lookup(addr, symbolsize, offset, modname); 266 msym = module_address_lookup(addr, symbolsize, offset, modname);
267 if (msym) 267 if (msym)
268 return strncpy(namebuf, msym, KSYM_NAME_LEN); 268 return strncpy(namebuf, msym, KSYM_NAME_LEN - 1);
269 269
270 return NULL; 270 return NULL;
271} 271}
@@ -273,7 +273,7 @@ const char *kallsyms_lookup(unsigned long addr,
273int lookup_symbol_name(unsigned long addr, char *symname) 273int lookup_symbol_name(unsigned long addr, char *symname)
274{ 274{
275 symname[0] = '\0'; 275 symname[0] = '\0';
276 symname[KSYM_NAME_LEN] = '\0'; 276 symname[KSYM_NAME_LEN - 1] = '\0';
277 277
278 if (is_ksym_addr(addr)) { 278 if (is_ksym_addr(addr)) {
279 unsigned long pos; 279 unsigned long pos;
@@ -291,7 +291,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
291 unsigned long *offset, char *modname, char *name) 291 unsigned long *offset, char *modname, char *name)
292{ 292{
293 name[0] = '\0'; 293 name[0] = '\0';
294 name[KSYM_NAME_LEN] = '\0'; 294 name[KSYM_NAME_LEN - 1] = '\0';
295 295
296 if (is_ksym_addr(addr)) { 296 if (is_ksym_addr(addr)) {
297 unsigned long pos; 297 unsigned long pos;
@@ -312,18 +312,17 @@ int sprint_symbol(char *buffer, unsigned long address)
312 char *modname; 312 char *modname;
313 const char *name; 313 const char *name;
314 unsigned long offset, size; 314 unsigned long offset, size;
315 char namebuf[KSYM_NAME_LEN+1]; 315 char namebuf[KSYM_NAME_LEN];
316 316
317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
318 if (!name) 318 if (!name)
319 return sprintf(buffer, "0x%lx", address); 319 return sprintf(buffer, "0x%lx", address);
320 else { 320
321 if (modname) 321 if (modname)
322 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, 322 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
323 size, modname); 323 size, modname);
324 else 324 else
325 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); 325 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
326 }
327} 326}
328 327
329/* Look up a kernel symbol and print it to the kernel messages. */ 328/* Look up a kernel symbol and print it to the kernel messages. */
@@ -343,8 +342,8 @@ struct kallsym_iter
343 unsigned long value; 342 unsigned long value;
344 unsigned int nameoff; /* If iterating in core kernel symbols */ 343 unsigned int nameoff; /* If iterating in core kernel symbols */
345 char type; 344 char type;
346 char name[KSYM_NAME_LEN+1]; 345 char name[KSYM_NAME_LEN];
347 char module_name[MODULE_NAME_LEN + 1]; 346 char module_name[MODULE_NAME_LEN];
348 int exported; 347 int exported;
349}; 348};
350 349
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index cee419143fd4..bc41ad0f24f8 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -24,6 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/kfifo.h> 26#include <linux/kfifo.h>
27#include <linux/log2.h>
27 28
28/** 29/**
29 * kfifo_init - allocates a new FIFO using a preallocated buffer 30 * kfifo_init - allocates a new FIFO using a preallocated buffer
@@ -41,7 +42,7 @@ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
41 struct kfifo *fifo; 42 struct kfifo *fifo;
42 43
43 /* size must be a power of 2 */ 44 /* size must be a power of 2 */
44 BUG_ON(size & (size - 1)); 45 BUG_ON(!is_power_of_2(size));
45 46
46 fifo = kmalloc(sizeof(struct kfifo), gfp_mask); 47 fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
47 if (!fifo) 48 if (!fifo)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index bbd51b81a3e8..a404f7ee7395 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -215,7 +215,7 @@ int kthread_stop(struct task_struct *k)
215EXPORT_SYMBOL(kthread_stop); 215EXPORT_SYMBOL(kthread_stop);
216 216
217 217
218static __init void kthreadd_setup(void) 218static noinline __init_refok void kthreadd_setup(void)
219{ 219{
220 struct task_struct *tsk = current; 220 struct task_struct *tsk = current;
221 221
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 1a5ff2211d88..edba2ffb43de 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -379,7 +379,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4
379 379
380static void print_lock_name(struct lock_class *class) 380static void print_lock_name(struct lock_class *class)
381{ 381{
382 char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4; 382 char str[KSYM_NAME_LEN], c1, c2, c3, c4;
383 const char *name; 383 const char *name;
384 384
385 get_usage_chars(class, &c1, &c2, &c3, &c4); 385 get_usage_chars(class, &c1, &c2, &c3, &c4);
@@ -401,7 +401,7 @@ static void print_lock_name(struct lock_class *class)
401static void print_lockdep_cache(struct lockdep_map *lock) 401static void print_lockdep_cache(struct lockdep_map *lock)
402{ 402{
403 const char *name; 403 const char *name;
404 char str[KSYM_NAME_LEN + 1]; 404 char str[KSYM_NAME_LEN];
405 405
406 name = lock->name; 406 name = lock->name;
407 if (!name) 407 if (!name)
diff --git a/kernel/module.c b/kernel/module.c
index 015d60cfd90e..33c04ad51175 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -61,10 +61,8 @@ extern int module_sysfs_initialized;
61/* If this is set, the section belongs in the init part of the module */ 61/* If this is set, the section belongs in the init part of the module */
62#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 62#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
63 63
64/* Protects module list */ 64/* List of modules, protected by module_mutex or preempt_disable
65static DEFINE_SPINLOCK(modlist_lock); 65 * (add/delete uses stop_machine). */
66
67/* List of modules, protected by module_mutex AND modlist_lock */
68static DEFINE_MUTEX(module_mutex); 66static DEFINE_MUTEX(module_mutex);
69static LIST_HEAD(modules); 67static LIST_HEAD(modules);
70 68
@@ -760,14 +758,13 @@ static void print_unload_info(struct seq_file *m, struct module *mod)
760void __symbol_put(const char *symbol) 758void __symbol_put(const char *symbol)
761{ 759{
762 struct module *owner; 760 struct module *owner;
763 unsigned long flags;
764 const unsigned long *crc; 761 const unsigned long *crc;
765 762
766 spin_lock_irqsave(&modlist_lock, flags); 763 preempt_disable();
767 if (!__find_symbol(symbol, &owner, &crc, 1)) 764 if (!__find_symbol(symbol, &owner, &crc, 1))
768 BUG(); 765 BUG();
769 module_put(owner); 766 module_put(owner);
770 spin_unlock_irqrestore(&modlist_lock, flags); 767 preempt_enable();
771} 768}
772EXPORT_SYMBOL(__symbol_put); 769EXPORT_SYMBOL(__symbol_put);
773 770
@@ -1228,14 +1225,14 @@ static void free_module(struct module *mod)
1228void *__symbol_get(const char *symbol) 1225void *__symbol_get(const char *symbol)
1229{ 1226{
1230 struct module *owner; 1227 struct module *owner;
1231 unsigned long value, flags; 1228 unsigned long value;
1232 const unsigned long *crc; 1229 const unsigned long *crc;
1233 1230
1234 spin_lock_irqsave(&modlist_lock, flags); 1231 preempt_disable();
1235 value = __find_symbol(symbol, &owner, &crc, 1); 1232 value = __find_symbol(symbol, &owner, &crc, 1);
1236 if (value && !strong_try_module_get(owner)) 1233 if (value && !strong_try_module_get(owner))
1237 value = 0; 1234 value = 0;
1238 spin_unlock_irqrestore(&modlist_lock, flags); 1235 preempt_enable();
1239 1236
1240 return (void *)value; 1237 return (void *)value;
1241} 1238}
@@ -2136,7 +2133,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
2136 sym = get_ksymbol(mod, addr, NULL, NULL); 2133 sym = get_ksymbol(mod, addr, NULL, NULL);
2137 if (!sym) 2134 if (!sym)
2138 goto out; 2135 goto out;
2139 strlcpy(symname, sym, KSYM_NAME_LEN + 1); 2136 strlcpy(symname, sym, KSYM_NAME_LEN);
2140 mutex_unlock(&module_mutex); 2137 mutex_unlock(&module_mutex);
2141 return 0; 2138 return 0;
2142 } 2139 }
@@ -2161,9 +2158,9 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
2161 if (!sym) 2158 if (!sym)
2162 goto out; 2159 goto out;
2163 if (modname) 2160 if (modname)
2164 strlcpy(modname, mod->name, MODULE_NAME_LEN + 1); 2161 strlcpy(modname, mod->name, MODULE_NAME_LEN);
2165 if (name) 2162 if (name)
2166 strlcpy(name, sym, KSYM_NAME_LEN + 1); 2163 strlcpy(name, sym, KSYM_NAME_LEN);
2167 mutex_unlock(&module_mutex); 2164 mutex_unlock(&module_mutex);
2168 return 0; 2165 return 0;
2169 } 2166 }
@@ -2184,8 +2181,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2184 *value = mod->symtab[symnum].st_value; 2181 *value = mod->symtab[symnum].st_value;
2185 *type = mod->symtab[symnum].st_info; 2182 *type = mod->symtab[symnum].st_info;
2186 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, 2183 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
2187 KSYM_NAME_LEN + 1); 2184 KSYM_NAME_LEN);
2188 strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); 2185 strlcpy(module_name, mod->name, MODULE_NAME_LEN);
2189 *exported = is_exported(name, mod); 2186 *exported = is_exported(name, mod);
2190 mutex_unlock(&module_mutex); 2187 mutex_unlock(&module_mutex);
2191 return 0; 2188 return 0;
@@ -2232,26 +2229,13 @@ unsigned long module_kallsyms_lookup_name(const char *name)
2232/* Called by the /proc file system to return a list of modules. */ 2229/* Called by the /proc file system to return a list of modules. */
2233static void *m_start(struct seq_file *m, loff_t *pos) 2230static void *m_start(struct seq_file *m, loff_t *pos)
2234{ 2231{
2235 struct list_head *i;
2236 loff_t n = 0;
2237
2238 mutex_lock(&module_mutex); 2232 mutex_lock(&module_mutex);
2239 list_for_each(i, &modules) { 2233 return seq_list_start(&modules, *pos);
2240 if (n++ == *pos)
2241 break;
2242 }
2243 if (i == &modules)
2244 return NULL;
2245 return i;
2246} 2234}
2247 2235
2248static void *m_next(struct seq_file *m, void *p, loff_t *pos) 2236static void *m_next(struct seq_file *m, void *p, loff_t *pos)
2249{ 2237{
2250 struct list_head *i = p; 2238 return seq_list_next(p, &modules, pos);
2251 (*pos)++;
2252 if (i->next == &modules)
2253 return NULL;
2254 return i->next;
2255} 2239}
2256 2240
2257static void m_stop(struct seq_file *m, void *p) 2241static void m_stop(struct seq_file *m, void *p)
@@ -2321,11 +2305,10 @@ const struct seq_operations modules_op = {
2321/* Given an address, look for it in the module exception tables. */ 2305/* Given an address, look for it in the module exception tables. */
2322const struct exception_table_entry *search_module_extables(unsigned long addr) 2306const struct exception_table_entry *search_module_extables(unsigned long addr)
2323{ 2307{
2324 unsigned long flags;
2325 const struct exception_table_entry *e = NULL; 2308 const struct exception_table_entry *e = NULL;
2326 struct module *mod; 2309 struct module *mod;
2327 2310
2328 spin_lock_irqsave(&modlist_lock, flags); 2311 preempt_disable();
2329 list_for_each_entry(mod, &modules, list) { 2312 list_for_each_entry(mod, &modules, list) {
2330 if (mod->num_exentries == 0) 2313 if (mod->num_exentries == 0)
2331 continue; 2314 continue;
@@ -2336,7 +2319,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2336 if (e) 2319 if (e)
2337 break; 2320 break;
2338 } 2321 }
2339 spin_unlock_irqrestore(&modlist_lock, flags); 2322 preempt_enable();
2340 2323
2341 /* Now, if we found one, we are running inside it now, hence 2324 /* Now, if we found one, we are running inside it now, hence
2342 we cannot unload the module, hence no refcnt needed. */ 2325 we cannot unload the module, hence no refcnt needed. */
@@ -2348,25 +2331,24 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2348 */ 2331 */
2349int is_module_address(unsigned long addr) 2332int is_module_address(unsigned long addr)
2350{ 2333{
2351 unsigned long flags;
2352 struct module *mod; 2334 struct module *mod;
2353 2335
2354 spin_lock_irqsave(&modlist_lock, flags); 2336 preempt_disable();
2355 2337
2356 list_for_each_entry(mod, &modules, list) { 2338 list_for_each_entry(mod, &modules, list) {
2357 if (within(addr, mod->module_core, mod->core_size)) { 2339 if (within(addr, mod->module_core, mod->core_size)) {
2358 spin_unlock_irqrestore(&modlist_lock, flags); 2340 preempt_enable();
2359 return 1; 2341 return 1;
2360 } 2342 }
2361 } 2343 }
2362 2344
2363 spin_unlock_irqrestore(&modlist_lock, flags); 2345 preempt_enable();
2364 2346
2365 return 0; 2347 return 0;
2366} 2348}
2367 2349
2368 2350
2369/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2351/* Is this a valid kernel address? */
2370struct module *__module_text_address(unsigned long addr) 2352struct module *__module_text_address(unsigned long addr)
2371{ 2353{
2372 struct module *mod; 2354 struct module *mod;
@@ -2381,11 +2363,10 @@ struct module *__module_text_address(unsigned long addr)
2381struct module *module_text_address(unsigned long addr) 2363struct module *module_text_address(unsigned long addr)
2382{ 2364{
2383 struct module *mod; 2365 struct module *mod;
2384 unsigned long flags;
2385 2366
2386 spin_lock_irqsave(&modlist_lock, flags); 2367 preempt_disable();
2387 mod = __module_text_address(addr); 2368 mod = __module_text_address(addr);
2388 spin_unlock_irqrestore(&modlist_lock, flags); 2369 preempt_enable();
2389 2370
2390 return mod; 2371 return mod;
2391} 2372}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 9e83b589f754..10f0bbba382b 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -21,6 +21,8 @@
21#include <linux/utsname.h> 21#include <linux/utsname.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23 23
24static struct kmem_cache *nsproxy_cachep;
25
24struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 26struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
25 27
26static inline void get_nsproxy(struct nsproxy *ns) 28static inline void get_nsproxy(struct nsproxy *ns)
@@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
43{ 45{
44 struct nsproxy *ns; 46 struct nsproxy *ns;
45 47
46 ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); 48 ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
47 if (ns) 49 if (ns) {
50 memcpy(ns, orig, sizeof(struct nsproxy));
48 atomic_set(&ns->count, 1); 51 atomic_set(&ns->count, 1);
52 }
49 return ns; 53 return ns;
50} 54}
51 55
@@ -54,33 +58,51 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
54 * Return the newly created nsproxy. Do not attach this to the task, 58 * Return the newly created nsproxy. Do not attach this to the task,
55 * leave it to the caller to do proper locking and attach it to task. 59 * leave it to the caller to do proper locking and attach it to task.
56 */ 60 */
57static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, 61static struct nsproxy *create_new_namespaces(unsigned long flags,
58 struct fs_struct *new_fs) 62 struct task_struct *tsk, struct fs_struct *new_fs)
59{ 63{
60 struct nsproxy *new_nsp; 64 struct nsproxy *new_nsp;
65 int err;
61 66
62 new_nsp = clone_nsproxy(tsk->nsproxy); 67 new_nsp = clone_nsproxy(tsk->nsproxy);
63 if (!new_nsp) 68 if (!new_nsp)
64 return ERR_PTR(-ENOMEM); 69 return ERR_PTR(-ENOMEM);
65 70
66 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 71 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
67 if (IS_ERR(new_nsp->mnt_ns)) 72 if (IS_ERR(new_nsp->mnt_ns)) {
73 err = PTR_ERR(new_nsp->mnt_ns);
68 goto out_ns; 74 goto out_ns;
75 }
69 76
70 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); 77 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
71 if (IS_ERR(new_nsp->uts_ns)) 78 if (IS_ERR(new_nsp->uts_ns)) {
79 err = PTR_ERR(new_nsp->uts_ns);
72 goto out_uts; 80 goto out_uts;
81 }
73 82
74 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 83 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
75 if (IS_ERR(new_nsp->ipc_ns)) 84 if (IS_ERR(new_nsp->ipc_ns)) {
85 err = PTR_ERR(new_nsp->ipc_ns);
76 goto out_ipc; 86 goto out_ipc;
87 }
77 88
78 new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); 89 new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns);
79 if (IS_ERR(new_nsp->pid_ns)) 90 if (IS_ERR(new_nsp->pid_ns)) {
91 err = PTR_ERR(new_nsp->pid_ns);
80 goto out_pid; 92 goto out_pid;
93 }
94
95 new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
96 if (IS_ERR(new_nsp->user_ns)) {
97 err = PTR_ERR(new_nsp->user_ns);
98 goto out_user;
99 }
81 100
82 return new_nsp; 101 return new_nsp;
83 102
103out_user:
104 if (new_nsp->pid_ns)
105 put_pid_ns(new_nsp->pid_ns);
84out_pid: 106out_pid:
85 if (new_nsp->ipc_ns) 107 if (new_nsp->ipc_ns)
86 put_ipc_ns(new_nsp->ipc_ns); 108 put_ipc_ns(new_nsp->ipc_ns);
@@ -91,15 +113,15 @@ out_uts:
91 if (new_nsp->mnt_ns) 113 if (new_nsp->mnt_ns)
92 put_mnt_ns(new_nsp->mnt_ns); 114 put_mnt_ns(new_nsp->mnt_ns);
93out_ns: 115out_ns:
94 kfree(new_nsp); 116 kmem_cache_free(nsproxy_cachep, new_nsp);
95 return ERR_PTR(-ENOMEM); 117 return ERR_PTR(err);
96} 118}
97 119
98/* 120/*
99 * called from clone. This now handles copy for nsproxy and all 121 * called from clone. This now handles copy for nsproxy and all
100 * namespaces therein. 122 * namespaces therein.
101 */ 123 */
102int copy_namespaces(int flags, struct task_struct *tsk) 124int copy_namespaces(unsigned long flags, struct task_struct *tsk)
103{ 125{
104 struct nsproxy *old_ns = tsk->nsproxy; 126 struct nsproxy *old_ns = tsk->nsproxy;
105 struct nsproxy *new_ns; 127 struct nsproxy *new_ns;
@@ -110,7 +132,7 @@ int copy_namespaces(int flags, struct task_struct *tsk)
110 132
111 get_nsproxy(old_ns); 133 get_nsproxy(old_ns);
112 134
113 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) 135 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
114 return 0; 136 return 0;
115 137
116 if (!capable(CAP_SYS_ADMIN)) { 138 if (!capable(CAP_SYS_ADMIN)) {
@@ -140,7 +162,9 @@ void free_nsproxy(struct nsproxy *ns)
140 put_ipc_ns(ns->ipc_ns); 162 put_ipc_ns(ns->ipc_ns);
141 if (ns->pid_ns) 163 if (ns->pid_ns)
142 put_pid_ns(ns->pid_ns); 164 put_pid_ns(ns->pid_ns);
143 kfree(ns); 165 if (ns->user_ns)
166 put_user_ns(ns->user_ns);
167 kmem_cache_free(nsproxy_cachep, ns);
144} 168}
145 169
146/* 170/*
@@ -152,19 +176,10 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
152{ 176{
153 int err = 0; 177 int err = 0;
154 178
155 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) 179 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
180 CLONE_NEWUSER)))
156 return 0; 181 return 0;
157 182
158#ifndef CONFIG_IPC_NS
159 if (unshare_flags & CLONE_NEWIPC)
160 return -EINVAL;
161#endif
162
163#ifndef CONFIG_UTS_NS
164 if (unshare_flags & CLONE_NEWUTS)
165 return -EINVAL;
166#endif
167
168 if (!capable(CAP_SYS_ADMIN)) 183 if (!capable(CAP_SYS_ADMIN))
169 return -EPERM; 184 return -EPERM;
170 185
@@ -174,3 +189,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
174 err = PTR_ERR(*new_nsp); 189 err = PTR_ERR(*new_nsp);
175 return err; 190 return err;
176} 191}
192
193static int __init nsproxy_cache_init(void)
194{
195 nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
196 0, SLAB_PANIC, NULL, NULL);
197 return 0;
198}
199
200module_init(nsproxy_cache_init);
diff --git a/kernel/panic.c b/kernel/panic.c
index 623d1828259a..f64f4c1ac11f 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -159,14 +159,15 @@ const char *print_tainted(void)
159{ 159{
160 static char buf[20]; 160 static char buf[20];
161 if (tainted) { 161 if (tainted) {
162 snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c", 162 snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c",
163 tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', 163 tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
164 tainted & TAINT_FORCED_MODULE ? 'F' : ' ', 164 tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
165 tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', 165 tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
166 tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', 166 tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
167 tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', 167 tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
168 tainted & TAINT_BAD_PAGE ? 'B' : ' ', 168 tainted & TAINT_BAD_PAGE ? 'B' : ' ',
169 tainted & TAINT_USER ? 'U' : ' '); 169 tainted & TAINT_USER ? 'U' : ' ',
170 tainted & TAINT_DIE ? 'D' : ' ');
170 } 171 }
171 else 172 else
172 snprintf(buf, sizeof(buf), "Not tainted"); 173 snprintf(buf, sizeof(buf), "Not tainted");
diff --git a/kernel/pid.c b/kernel/pid.c
index eb66bd2953ab..c6e3f9ffff87 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -365,7 +365,7 @@ struct pid *find_ge_pid(int nr)
365} 365}
366EXPORT_SYMBOL_GPL(find_get_pid); 366EXPORT_SYMBOL_GPL(find_get_pid);
367 367
368struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) 368struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
369{ 369{
370 BUG_ON(!old_ns); 370 BUG_ON(!old_ns);
371 get_pid_ns(old_ns); 371 get_pid_ns(old_ns);
diff --git a/kernel/printk.c b/kernel/printk.c
index 0bbdeac2810c..051d27e36a6c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -449,13 +449,16 @@ static int printk_time = 1;
449#else 449#else
450static int printk_time = 0; 450static int printk_time = 0;
451#endif 451#endif
452module_param(printk_time, int, S_IRUGO | S_IWUSR); 452module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
453 453
454static int __init printk_time_setup(char *str) 454static int __init printk_time_setup(char *str)
455{ 455{
456 if (*str) 456 if (*str)
457 return 0; 457 return 0;
458 printk_time = 1; 458 printk_time = 1;
459 printk(KERN_NOTICE "The 'time' option is deprecated and "
460 "is scheduled for removal in early 2008\n");
461 printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n");
459 return 1; 462 return 1;
460} 463}
461 464
@@ -483,6 +486,9 @@ static int have_callable_console(void)
483 * @fmt: format string 486 * @fmt: format string
484 * 487 *
485 * This is printk(). It can be called from any context. We want it to work. 488 * This is printk(). It can be called from any context. We want it to work.
489 * Be aware of the fact that if oops_in_progress is not set, we might try to
490 * wake klogd up which could deadlock on runqueue lock if printk() is called
491 * from scheduler code.
486 * 492 *
487 * We try to grab the console_sem. If we succeed, it's easy - we log the output and 493 * We try to grab the console_sem. If we succeed, it's easy - we log the output and
488 * call the console drivers. If we fail to get the semaphore we place the output 494 * call the console drivers. If we fail to get the semaphore we place the output
@@ -654,7 +660,7 @@ static void call_console_drivers(unsigned long start, unsigned long end)
654 */ 660 */
655static int __init console_setup(char *str) 661static int __init console_setup(char *str)
656{ 662{
657 char name[sizeof(console_cmdline[0].name)]; 663 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
658 char *s, *options; 664 char *s, *options;
659 int idx; 665 int idx;
660 666
@@ -662,27 +668,27 @@ static int __init console_setup(char *str)
662 * Decode str into name, index, options. 668 * Decode str into name, index, options.
663 */ 669 */
664 if (str[0] >= '0' && str[0] <= '9') { 670 if (str[0] >= '0' && str[0] <= '9') {
665 strcpy(name, "ttyS"); 671 strcpy(buf, "ttyS");
666 strncpy(name + 4, str, sizeof(name) - 5); 672 strncpy(buf + 4, str, sizeof(buf) - 5);
667 } else { 673 } else {
668 strncpy(name, str, sizeof(name) - 1); 674 strncpy(buf, str, sizeof(buf) - 1);
669 } 675 }
670 name[sizeof(name) - 1] = 0; 676 buf[sizeof(buf) - 1] = 0;
671 if ((options = strchr(str, ',')) != NULL) 677 if ((options = strchr(str, ',')) != NULL)
672 *(options++) = 0; 678 *(options++) = 0;
673#ifdef __sparc__ 679#ifdef __sparc__
674 if (!strcmp(str, "ttya")) 680 if (!strcmp(str, "ttya"))
675 strcpy(name, "ttyS0"); 681 strcpy(buf, "ttyS0");
676 if (!strcmp(str, "ttyb")) 682 if (!strcmp(str, "ttyb"))
677 strcpy(name, "ttyS1"); 683 strcpy(buf, "ttyS1");
678#endif 684#endif
679 for (s = name; *s; s++) 685 for (s = buf; *s; s++)
680 if ((*s >= '0' && *s <= '9') || *s == ',') 686 if ((*s >= '0' && *s <= '9') || *s == ',')
681 break; 687 break;
682 idx = simple_strtoul(s, NULL, 10); 688 idx = simple_strtoul(s, NULL, 10);
683 *s = 0; 689 *s = 0;
684 690
685 add_preferred_console(name, idx, options); 691 add_preferred_console(buf, idx, options);
686 return 1; 692 return 1;
687} 693}
688__setup("console=", console_setup); 694__setup("console=", console_setup);
@@ -709,7 +715,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
709 * See if this tty is not yet registered, and 715 * See if this tty is not yet registered, and
710 * if we have a slot free. 716 * if we have a slot free.
711 */ 717 */
712 for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) 718 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
713 if (strcmp(console_cmdline[i].name, name) == 0 && 719 if (strcmp(console_cmdline[i].name, name) == 0 &&
714 console_cmdline[i].index == idx) { 720 console_cmdline[i].index == idx) {
715 selected_console = i; 721 selected_console = i;
@@ -726,6 +732,25 @@ int __init add_preferred_console(char *name, int idx, char *options)
726 return 0; 732 return 0;
727} 733}
728 734
735int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
736{
737 struct console_cmdline *c;
738 int i;
739
740 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
741 if (strcmp(console_cmdline[i].name, name) == 0 &&
742 console_cmdline[i].index == idx) {
743 c = &console_cmdline[i];
744 memcpy(c->name, name_new, sizeof(c->name));
745 c->name[sizeof(c->name) - 1] = 0;
746 c->options = options;
747 c->index = idx_new;
748 return i;
749 }
750 /* not found */
751 return -1;
752}
753
729#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND 754#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
730/** 755/**
731 * suspend_console - suspend the console subsystem 756 * suspend_console - suspend the console subsystem
@@ -942,6 +967,9 @@ void register_console(struct console *console)
942 if (preferred_console < 0 || bootconsole || !console_drivers) 967 if (preferred_console < 0 || bootconsole || !console_drivers)
943 preferred_console = selected_console; 968 preferred_console = selected_console;
944 969
970 if (console->early_setup)
971 console->early_setup();
972
945 /* 973 /*
946 * See if we want to use this console driver. If we 974 * See if we want to use this console driver. If we
947 * didn't select a console we take the first one 975 * didn't select a console we take the first one
@@ -985,12 +1013,15 @@ void register_console(struct console *console)
985 if (!(console->flags & CON_ENABLED)) 1013 if (!(console->flags & CON_ENABLED))
986 return; 1014 return;
987 1015
988 if (bootconsole) { 1016 if (bootconsole && (console->flags & CON_CONSDEV)) {
989 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", 1017 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n",
990 bootconsole->name, bootconsole->index, 1018 bootconsole->name, bootconsole->index,
991 console->name, console->index); 1019 console->name, console->index);
992 unregister_console(bootconsole); 1020 unregister_console(bootconsole);
993 console->flags &= ~CON_PRINTBUFFER; 1021 console->flags &= ~CON_PRINTBUFFER;
1022 } else {
1023 printk(KERN_INFO "console [%s%d] enabled\n",
1024 console->name, console->index);
994 } 1025 }
995 1026
996 /* 1027 /*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ad7949a589dd..4a1745f1dadf 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -161,6 +161,7 @@ int ptrace_may_attach(struct task_struct *task)
161int ptrace_attach(struct task_struct *task) 161int ptrace_attach(struct task_struct *task)
162{ 162{
163 int retval; 163 int retval;
164 unsigned long flags;
164 165
165 audit_ptrace(task); 166 audit_ptrace(task);
166 167
@@ -181,9 +182,7 @@ repeat:
181 * cpu's that may have task_lock). 182 * cpu's that may have task_lock).
182 */ 183 */
183 task_lock(task); 184 task_lock(task);
184 local_irq_disable(); 185 if (!write_trylock_irqsave(&tasklist_lock, flags)) {
185 if (!write_trylock(&tasklist_lock)) {
186 local_irq_enable();
187 task_unlock(task); 186 task_unlock(task);
188 do { 187 do {
189 cpu_relax(); 188 cpu_relax();
@@ -211,7 +210,7 @@ repeat:
211 force_sig_specific(SIGSTOP, task); 210 force_sig_specific(SIGSTOP, task);
212 211
213bad: 212bad:
214 write_unlock_irq(&tasklist_lock); 213 write_unlock_irqrestore(&tasklist_lock, flags);
215 task_unlock(task); 214 task_unlock(task);
216out: 215out:
217 return retval; 216 return retval;
@@ -491,3 +490,22 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
491 return ret; 490 return ret;
492} 491}
493#endif /* __ARCH_SYS_PTRACE */ 492#endif /* __ARCH_SYS_PTRACE */
493
494int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data)
495{
496 unsigned long tmp;
497 int copied;
498
499 copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0);
500 if (copied != sizeof(tmp))
501 return -EIO;
502 return put_user(tmp, (unsigned long __user *)data);
503}
504
505int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
506{
507 int copied;
508
509 copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
510 return (copied == sizeof(data)) ? 0 : -EIO;
511}
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 55ba82a85a66..ddff33247785 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -40,6 +40,7 @@
40#include <linux/moduleparam.h> 40#include <linux/moduleparam.h>
41#include <linux/percpu.h> 41#include <linux/percpu.h>
42#include <linux/notifier.h> 42#include <linux/notifier.h>
43#include <linux/freezer.h>
43#include <linux/cpu.h> 44#include <linux/cpu.h>
44#include <linux/random.h> 45#include <linux/random.h>
45#include <linux/delay.h> 46#include <linux/delay.h>
@@ -518,7 +519,6 @@ rcu_torture_writer(void *arg)
518 519
519 VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); 520 VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
520 set_user_nice(current, 19); 521 set_user_nice(current, 19);
521 current->flags |= PF_NOFREEZE;
522 522
523 do { 523 do {
524 schedule_timeout_uninterruptible(1); 524 schedule_timeout_uninterruptible(1);
@@ -558,7 +558,6 @@ rcu_torture_fakewriter(void *arg)
558 558
559 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); 559 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
560 set_user_nice(current, 19); 560 set_user_nice(current, 19);
561 current->flags |= PF_NOFREEZE;
562 561
563 do { 562 do {
564 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 563 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
@@ -589,7 +588,6 @@ rcu_torture_reader(void *arg)
589 588
590 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 589 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
591 set_user_nice(current, 19); 590 set_user_nice(current, 19);
592 current->flags |= PF_NOFREEZE;
593 591
594 do { 592 do {
595 idx = cur_ops->readlock(); 593 idx = cur_ops->readlock();
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index da8d6bf46457..5aedbee014df 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -29,12 +29,6 @@
29 29
30#include "rtmutex_common.h" 30#include "rtmutex_common.h"
31 31
32#ifdef CONFIG_DEBUG_RT_MUTEXES
33# include "rtmutex-debug.h"
34#else
35# include "rtmutex.h"
36#endif
37
38# define TRACE_WARN_ON(x) WARN_ON(x) 32# define TRACE_WARN_ON(x) WARN_ON(x)
39# define TRACE_BUG_ON(x) BUG_ON(x) 33# define TRACE_BUG_ON(x) BUG_ON(x)
40 34
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 015fc633c96c..e3055ba69159 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -260,6 +260,7 @@ static int test_func(void *data)
260 int ret; 260 int ret;
261 261
262 current->flags |= PF_MUTEX_TESTER; 262 current->flags |= PF_MUTEX_TESTER;
263 set_freezable();
263 allow_signal(SIGHUP); 264 allow_signal(SIGHUP);
264 265
265 for(;;) { 266 for(;;) {
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 17d28ce20300..8cd9bd2cdb34 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -17,12 +17,6 @@
17 17
18#include "rtmutex_common.h" 18#include "rtmutex_common.h"
19 19
20#ifdef CONFIG_DEBUG_RT_MUTEXES
21# include "rtmutex-debug.h"
22#else
23# include "rtmutex.h"
24#endif
25
26/* 20/*
27 * lock->owner state tracking: 21 * lock->owner state tracking:
28 * 22 *
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 9c75856e791e..2d3b83593ca3 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -103,7 +103,7 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
103 103
104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) 104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
105{ 105{
106 return (struct task_struct *) 106 return (struct task_struct *)
107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
108} 108}
109 109
@@ -120,4 +120,11 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
120 struct task_struct *proxy_owner); 120 struct task_struct *proxy_owner);
121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, 121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
122 struct task_struct *proxy_owner); 122 struct task_struct *proxy_owner);
123
124#ifdef CONFIG_DEBUG_RT_MUTEXES
125# include "rtmutex-debug.h"
126#else
127# include "rtmutex.h"
128#endif
129
123#endif 130#endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 3332bbb5d5cf..cb31fb4a1379 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -736,7 +736,9 @@ static void update_curr_load(struct rq *rq, u64 now)
736 * 736 *
737 * The "10% effect" is relative and cumulative: from _any_ nice level, 737 * The "10% effect" is relative and cumulative: from _any_ nice level,
738 * if you go up 1 level, it's -10% CPU usage, if you go down 1 level 738 * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
739 * it's +10% CPU usage. 739 * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
740 * If a task goes up by ~10% and another task goes down by ~10% then
741 * the relative distance between them is ~25%.)
740 */ 742 */
741static const int prio_to_weight[40] = { 743static const int prio_to_weight[40] = {
742/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, 744/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
@@ -746,15 +748,22 @@ static const int prio_to_weight[40] = {
746/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, 748/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15,
747}; 749};
748 750
751/*
752 * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
753 *
754 * In cases where the weight does not change often, we can use the
755 * precalculated inverse to speed up arithmetics by turning divisions
756 * into multiplications:
757 */
749static const u32 prio_to_wmult[40] = { 758static const u32 prio_to_wmult[40] = {
750 48356, 60446, 75558, 94446, 118058, 147573, 759/* -20 */ 48356, 60446, 75558, 94446, 118058,
751 184467, 230589, 288233, 360285, 450347, 760/* -15 */ 147573, 184467, 230589, 288233, 360285,
752 562979, 703746, 879575, 1099582, 1374389, 761/* -10 */ 450347, 562979, 703746, 879575, 1099582,
753 1717986, 2147483, 2684354, 3355443, 4194304, 762/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443,
754 5244160, 6557201, 8196502, 10250518, 12782640, 763/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518,
755 16025997, 19976592, 24970740, 31350126, 39045157, 764/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126,
756 49367440, 61356675, 76695844, 95443717, 119304647, 765/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717,
757 148102320, 186737708, 238609294, 286331153, 766/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
758}; 767};
759 768
760static inline void 769static inline void
@@ -4903,8 +4912,6 @@ static int migration_thread(void *data)
4903 struct migration_req *req; 4912 struct migration_req *req;
4904 struct list_head *head; 4913 struct list_head *head;
4905 4914
4906 try_to_freeze();
4907
4908 spin_lock_irq(&rq->lock); 4915 spin_lock_irq(&rq->lock);
4909 4916
4910 if (cpu_is_offline(cpu)) { 4917 if (cpu_is_offline(cpu)) {
@@ -5138,7 +5145,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5138 p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); 5145 p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
5139 if (IS_ERR(p)) 5146 if (IS_ERR(p))
5140 return NOTIFY_BAD; 5147 return NOTIFY_BAD;
5141 p->flags |= PF_NOFREEZE;
5142 kthread_bind(p, cpu); 5148 kthread_bind(p, cpu);
5143 /* Must be high prio: stop_machine expects to yield to it. */ 5149 /* Must be high prio: stop_machine expects to yield to it. */
5144 rq = task_rq_lock(p, &flags); 5150 rq = task_rq_lock(p, &flags);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index c3391b6020e8..ad64fcb731f2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -10,6 +10,7 @@
10#include <linux/sched.h> 10#include <linux/sched.h>
11 11
12/* #define SECCOMP_DEBUG 1 */ 12/* #define SECCOMP_DEBUG 1 */
13#define NR_SECCOMP_MODES 1
13 14
14/* 15/*
15 * Secure computing mode 1 allows only read/write/exit/sigreturn. 16 * Secure computing mode 1 allows only read/write/exit/sigreturn.
@@ -54,3 +55,31 @@ void __secure_computing(int this_syscall)
54#endif 55#endif
55 do_exit(SIGKILL); 56 do_exit(SIGKILL);
56} 57}
58
59long prctl_get_seccomp(void)
60{
61 return current->seccomp.mode;
62}
63
64long prctl_set_seccomp(unsigned long seccomp_mode)
65{
66 long ret;
67
68 /* can set it only once to be even more secure */
69 ret = -EPERM;
70 if (unlikely(current->seccomp.mode))
71 goto out;
72
73 ret = -EINVAL;
74 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
75 current->seccomp.mode = seccomp_mode;
76 set_thread_flag(TIF_SECCOMP);
77#ifdef TIF_NOTSC
78 disable_TSC();
79#endif
80 ret = 0;
81 }
82
83 out:
84 return ret;
85}
diff --git a/kernel/signal.c b/kernel/signal.c
index f9405609774e..39d122753bac 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -718,6 +718,37 @@ out_set:
718#define LEGACY_QUEUE(sigptr, sig) \ 718#define LEGACY_QUEUE(sigptr, sig) \
719 (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) 719 (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
720 720
721int print_fatal_signals;
722
723static void print_fatal_signal(struct pt_regs *regs, int signr)
724{
725 printk("%s/%d: potentially unexpected fatal signal %d.\n",
726 current->comm, current->pid, signr);
727
728#ifdef __i386__
729 printk("code at %08lx: ", regs->eip);
730 {
731 int i;
732 for (i = 0; i < 16; i++) {
733 unsigned char insn;
734
735 __get_user(insn, (unsigned char *)(regs->eip + i));
736 printk("%02x ", insn);
737 }
738 }
739#endif
740 printk("\n");
741 show_regs(regs);
742}
743
744static int __init setup_print_fatal_signals(char *str)
745{
746 get_option (&str, &print_fatal_signals);
747
748 return 1;
749}
750
751__setup("print-fatal-signals=", setup_print_fatal_signals);
721 752
722static int 753static int
723specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) 754specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
@@ -1855,6 +1886,8 @@ relock:
1855 * Anything else is fatal, maybe with a core dump. 1886 * Anything else is fatal, maybe with a core dump.
1856 */ 1887 */
1857 current->flags |= PF_SIGNALED; 1888 current->flags |= PF_SIGNALED;
1889 if ((signr != SIGKILL) && print_fatal_signals)
1890 print_fatal_signal(regs, signr);
1858 if (sig_kernel_coredump(signr)) { 1891 if (sig_kernel_coredump(signr)) {
1859 /* 1892 /*
1860 * If it was able to dump core, this kills all 1893 * If it was able to dump core, this kills all
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 73217a9e2875..0f546ddea43d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -14,6 +14,7 @@
14#include <linux/notifier.h> 14#include <linux/notifier.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17#include <linux/freezer.h>
17#include <linux/kthread.h> 18#include <linux/kthread.h>
18#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
19#include <linux/smp.h> 20#include <linux/smp.h>
@@ -488,8 +489,6 @@ void __init softirq_init(void)
488 489
489static int ksoftirqd(void * __bind_cpu) 490static int ksoftirqd(void * __bind_cpu)
490{ 491{
491 current->flags |= PF_NOFREEZE;
492
493 set_current_state(TASK_INTERRUPTIBLE); 492 set_current_state(TASK_INTERRUPTIBLE);
494 493
495 while (!kthread_should_stop()) { 494 while (!kthread_should_stop()) {
@@ -614,12 +613,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
614 kthread_bind(per_cpu(ksoftirqd, hotcpu), 613 kthread_bind(per_cpu(ksoftirqd, hotcpu),
615 any_online_cpu(cpu_online_map)); 614 any_online_cpu(cpu_online_map));
616 case CPU_DEAD: 615 case CPU_DEAD:
617 case CPU_DEAD_FROZEN: 616 case CPU_DEAD_FROZEN: {
617 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
618
618 p = per_cpu(ksoftirqd, hotcpu); 619 p = per_cpu(ksoftirqd, hotcpu);
619 per_cpu(ksoftirqd, hotcpu) = NULL; 620 per_cpu(ksoftirqd, hotcpu) = NULL;
621 sched_setscheduler(p, SCHED_FIFO, &param);
620 kthread_stop(p); 622 kthread_stop(p);
621 takeover_tasklets(hotcpu); 623 takeover_tasklets(hotcpu);
622 break; 624 break;
625 }
623#endif /* CONFIG_HOTPLUG_CPU */ 626#endif /* CONFIG_HOTPLUG_CPU */
624 } 627 }
625 return NOTIFY_OK; 628 return NOTIFY_OK;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 0131e296ffb4..708d4882c0c3 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -10,6 +10,7 @@
10#include <linux/cpu.h> 10#include <linux/cpu.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/freezer.h>
13#include <linux/kthread.h> 14#include <linux/kthread.h>
14#include <linux/notifier.h> 15#include <linux/notifier.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -116,7 +117,6 @@ static int watchdog(void * __bind_cpu)
116 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 117 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
117 118
118 sched_setscheduler(current, SCHED_FIFO, &param); 119 sched_setscheduler(current, SCHED_FIFO, &param);
119 current->flags |= PF_NOFREEZE;
120 120
121 /* initialize timestamp */ 121 /* initialize timestamp */
122 touch_softlockup_watchdog(); 122 touch_softlockup_watchdog();
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index fcee2a8e6da3..319821ef78af 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -93,10 +93,6 @@ static void stopmachine_set_state(enum stopmachine_state state)
93static int stop_machine(void) 93static int stop_machine(void)
94{ 94{
95 int i, ret = 0; 95 int i, ret = 0;
96 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
97
98 /* One high-prio thread per cpu. We'll do this one. */
99 sched_setscheduler(current, SCHED_FIFO, &param);
100 96
101 atomic_set(&stopmachine_thread_ack, 0); 97 atomic_set(&stopmachine_thread_ack, 0);
102 stopmachine_num_threads = 0; 98 stopmachine_num_threads = 0;
@@ -189,6 +185,10 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
189 185
190 p = kthread_create(do_stop, &smdata, "kstopmachine"); 186 p = kthread_create(do_stop, &smdata, "kstopmachine");
191 if (!IS_ERR(p)) { 187 if (!IS_ERR(p)) {
188 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
189
190 /* One high-prio thread per cpu. We'll do this one. */
191 sched_setscheduler(p, SCHED_FIFO, &param);
192 kthread_bind(p, cpu); 192 kthread_bind(p, cpu);
193 wake_up_process(p); 193 wake_up_process(p);
194 wait_for_completion(&smdata.done); 194 wait_for_completion(&smdata.done);
diff --git a/kernel/sys.c b/kernel/sys.c
index 872271ccc384..4d141ae3e802 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -31,10 +31,12 @@
31#include <linux/cn_proc.h> 31#include <linux/cn_proc.h>
32#include <linux/getcpu.h> 32#include <linux/getcpu.h>
33#include <linux/task_io_accounting_ops.h> 33#include <linux/task_io_accounting_ops.h>
34#include <linux/seccomp.h>
34 35
35#include <linux/compat.h> 36#include <linux/compat.h>
36#include <linux/syscalls.h> 37#include <linux/syscalls.h>
37#include <linux/kprobes.h> 38#include <linux/kprobes.h>
39#include <linux/user_namespace.h>
38 40
39#include <asm/uaccess.h> 41#include <asm/uaccess.h>
40#include <asm/io.h> 42#include <asm/io.h>
@@ -1078,13 +1080,13 @@ static int set_user(uid_t new_ruid, int dumpclear)
1078{ 1080{
1079 struct user_struct *new_user; 1081 struct user_struct *new_user;
1080 1082
1081 new_user = alloc_uid(new_ruid); 1083 new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
1082 if (!new_user) 1084 if (!new_user)
1083 return -EAGAIN; 1085 return -EAGAIN;
1084 1086
1085 if (atomic_read(&new_user->processes) >= 1087 if (atomic_read(&new_user->processes) >=
1086 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 1088 current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
1087 new_user != &root_user) { 1089 new_user != current->nsproxy->user_ns->root_user) {
1088 free_uid(new_user); 1090 free_uid(new_user);
1089 return -EAGAIN; 1091 return -EAGAIN;
1090 } 1092 }
@@ -2241,6 +2243,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
2241 error = SET_ENDIAN(current, arg2); 2243 error = SET_ENDIAN(current, arg2);
2242 break; 2244 break;
2243 2245
2246 case PR_GET_SECCOMP:
2247 error = prctl_get_seccomp();
2248 break;
2249 case PR_SET_SECCOMP:
2250 error = prctl_set_seccomp(arg2);
2251 break;
2252
2244 default: 2253 default:
2245 error = -EINVAL; 2254 error = -EINVAL;
2246 break; 2255 break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7e11e2c98bf9..b0ec498a18d9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -14,6 +14,7 @@ asmlinkage long sys_ni_syscall(void)
14 14
15cond_syscall(sys_nfsservctl); 15cond_syscall(sys_nfsservctl);
16cond_syscall(sys_quotactl); 16cond_syscall(sys_quotactl);
17cond_syscall(sys32_quotactl);
17cond_syscall(sys_acct); 18cond_syscall(sys_acct);
18cond_syscall(sys_lookup_dcookie); 19cond_syscall(sys_lookup_dcookie);
19cond_syscall(sys_swapon); 20cond_syscall(sys_swapon);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d93e13d93f24..7063ebc6db05 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -29,6 +29,7 @@
29#include <linux/utsname.h> 29#include <linux/utsname.h>
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/fs.h>
32#include <linux/init.h> 33#include <linux/init.h>
33#include <linux/kernel.h> 34#include <linux/kernel.h>
34#include <linux/kobject.h> 35#include <linux/kobject.h>
@@ -49,9 +50,6 @@
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/processor.h> 51#include <asm/processor.h>
51 52
52extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53 void __user *buffer, size_t *lenp, loff_t *ppos);
54
55#ifdef CONFIG_X86 53#ifdef CONFIG_X86
56#include <asm/nmi.h> 54#include <asm/nmi.h>
57#include <asm/stacktrace.h> 55#include <asm/stacktrace.h>
@@ -61,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
61 59
62/* External variables not in a header file. */ 60/* External variables not in a header file. */
63extern int C_A_D; 61extern int C_A_D;
62extern int print_fatal_signals;
64extern int sysctl_overcommit_memory; 63extern int sysctl_overcommit_memory;
65extern int sysctl_overcommit_ratio; 64extern int sysctl_overcommit_ratio;
66extern int sysctl_panic_on_oom; 65extern int sysctl_panic_on_oom;
@@ -202,7 +201,10 @@ static ctl_table root_table[] = {
202 .mode = 0555, 201 .mode = 0555,
203 .child = dev_table, 202 .child = dev_table,
204 }, 203 },
205 204/*
205 * NOTE: do not add new entries to this table unless you have read
206 * Documentation/sysctl/ctl_unnumbered.txt
207 */
206 { .ctl_name = 0 } 208 { .ctl_name = 0 }
207}; 209};
208 210
@@ -340,6 +342,14 @@ static ctl_table kern_table[] = {
340 .proc_handler = &proc_dointvec, 342 .proc_handler = &proc_dointvec,
341 }, 343 },
342#endif 344#endif
345 {
346 .ctl_name = CTL_UNNUMBERED,
347 .procname = "print-fatal-signals",
348 .data = &print_fatal_signals,
349 .maxlen = sizeof(int),
350 .mode = 0644,
351 .proc_handler = &proc_dointvec,
352 },
343#ifdef __sparc__ 353#ifdef __sparc__
344 { 354 {
345 .ctl_name = KERN_SPARC_REBOOT, 355 .ctl_name = KERN_SPARC_REBOOT,
@@ -814,6 +824,14 @@ static ctl_table vm_table[] = {
814 .mode = 0644, 824 .mode = 0644,
815 .proc_handler = &proc_dointvec, 825 .proc_handler = &proc_dointvec,
816 }, 826 },
827 {
828 .ctl_name = CTL_UNNUMBERED,
829 .procname = "hugepages_treat_as_movable",
830 .data = &hugepages_treat_as_movable,
831 .maxlen = sizeof(int),
832 .mode = 0644,
833 .proc_handler = &hugetlb_treat_movable_handler,
834 },
817#endif 835#endif
818 { 836 {
819 .ctl_name = VM_LOWMEM_RESERVE_RATIO, 837 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
@@ -958,6 +976,17 @@ static ctl_table vm_table[] = {
958 .mode = 0644, 976 .mode = 0644,
959 .proc_handler = &proc_doulongvec_minmax, 977 .proc_handler = &proc_doulongvec_minmax,
960 }, 978 },
979#ifdef CONFIG_NUMA
980 {
981 .ctl_name = CTL_UNNUMBERED,
982 .procname = "numa_zonelist_order",
983 .data = &numa_zonelist_order,
984 .maxlen = NUMA_ZONELIST_ORDER_LEN,
985 .mode = 0644,
986 .proc_handler = &numa_zonelist_order_handler,
987 .strategy = &sysctl_string,
988 },
989#endif
961#endif 990#endif
962#if defined(CONFIG_X86_32) || \ 991#if defined(CONFIG_X86_32) || \
963 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 992 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
@@ -972,6 +1001,10 @@ static ctl_table vm_table[] = {
972 .extra1 = &zero, 1001 .extra1 = &zero,
973 }, 1002 },
974#endif 1003#endif
1004/*
1005 * NOTE: do not add new entries to this table unless you have read
1006 * Documentation/sysctl/ctl_unnumbered.txt
1007 */
975 { .ctl_name = 0 } 1008 { .ctl_name = 0 }
976}; 1009};
977 1010
@@ -1112,6 +1145,10 @@ static ctl_table fs_table[] = {
1112 .child = binfmt_misc_table, 1145 .child = binfmt_misc_table,
1113 }, 1146 },
1114#endif 1147#endif
1148/*
1149 * NOTE: do not add new entries to this table unless you have read
1150 * Documentation/sysctl/ctl_unnumbered.txt
1151 */
1115 { .ctl_name = 0 } 1152 { .ctl_name = 0 }
1116}; 1153};
1117 1154
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 906cae771585..059431ed67db 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
196 196
197 /* fill in basic acct fields */ 197 /* fill in basic acct fields */
198 stats->version = TASKSTATS_VERSION; 198 stats->version = TASKSTATS_VERSION;
199 stats->nvcsw = tsk->nvcsw;
200 stats->nivcsw = tsk->nivcsw;
199 bacct_add_tsk(stats, tsk); 201 bacct_add_tsk(stats, tsk);
200 202
201 /* fill in extended acct fields */ 203 /* fill in extended acct fields */
@@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
242 */ 244 */
243 delayacct_add_tsk(stats, tsk); 245 delayacct_add_tsk(stats, tsk);
244 246
247 stats->nvcsw += tsk->nvcsw;
248 stats->nivcsw += tsk->nivcsw;
245 } while_each_thread(first, tsk); 249 } while_each_thread(first, tsk);
246 250
247 unlock_task_sighand(first, &flags); 251 unlock_task_sighand(first, &flags);
diff --git a/kernel/time.c b/kernel/time.c
index f04791f69408..ffe19149d770 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz);
57 */ 57 */
58asmlinkage long sys_time(time_t __user * tloc) 58asmlinkage long sys_time(time_t __user * tloc)
59{ 59{
60 time_t i; 60 /*
61 struct timeval tv; 61 * We read xtime.tv_sec atomically - it's updated
62 * atomically by update_wall_time(), so no need to
63 * even read-lock the xtime seqlock:
64 */
65 time_t i = xtime.tv_sec;
62 66
63 do_gettimeofday(&tv); 67 smp_rmb(); /* sys_time() results are coherent */
64 i = tv.tv_sec;
65 68
66 if (tloc) { 69 if (tloc) {
67 if (put_user(i,tloc)) 70 if (put_user(i, tloc))
68 i = -EFAULT; 71 i = -EFAULT;
69 } 72 }
70 return i; 73 return i;
@@ -373,12 +376,25 @@ void do_gettimeofday (struct timeval *tv)
373 376
374 tv->tv_sec = sec; 377 tv->tv_sec = sec;
375 tv->tv_usec = usec; 378 tv->tv_usec = usec;
376}
377 379
380 /*
381 * Make sure xtime.tv_sec [returned by sys_time()] always
382 * follows the gettimeofday() result precisely. This
383 * condition is extremely unlikely, it can hit at most
384 * once per second:
385 */
386 if (unlikely(xtime.tv_sec != tv->tv_sec)) {
387 unsigned long flags;
388
389 write_seqlock_irqsave(&xtime_lock, flags);
390 update_wall_time();
391 write_sequnlock_irqrestore(&xtime_lock, flags);
392 }
393}
378EXPORT_SYMBOL(do_gettimeofday); 394EXPORT_SYMBOL(do_gettimeofday);
379 395
396#else /* CONFIG_TIME_INTERPOLATION */
380 397
381#else
382#ifndef CONFIG_GENERIC_TIME 398#ifndef CONFIG_GENERIC_TIME
383/* 399/*
384 * Simulate gettimeofday using do_gettimeofday which only allows a timeval 400 * Simulate gettimeofday using do_gettimeofday which only allows a timeval
@@ -394,7 +410,7 @@ void getnstimeofday(struct timespec *tv)
394} 410}
395EXPORT_SYMBOL_GPL(getnstimeofday); 411EXPORT_SYMBOL_GPL(getnstimeofday);
396#endif 412#endif
397#endif 413#endif /* CONFIG_TIME_INTERPOLATION */
398 414
399/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. 415/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
400 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 416 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 76212b2a99de..2ad1c37b8dfe 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -205,47 +205,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
205} 205}
206 206
207/** 207/**
208 * clockevents_request_device
209 */
210struct clock_event_device *clockevents_request_device(unsigned int features,
211 cpumask_t cpumask)
212{
213 struct clock_event_device *cur, *dev = NULL;
214 struct list_head *tmp;
215
216 spin_lock(&clockevents_lock);
217
218 list_for_each(tmp, &clockevent_devices) {
219 cur = list_entry(tmp, struct clock_event_device, list);
220
221 if ((cur->features & features) == features &&
222 cpus_equal(cpumask, cur->cpumask)) {
223 if (!dev || dev->rating < cur->rating)
224 dev = cur;
225 }
226 }
227
228 clockevents_exchange_device(NULL, dev);
229
230 spin_unlock(&clockevents_lock);
231
232 return dev;
233}
234
235/**
236 * clockevents_release_device
237 */
238void clockevents_release_device(struct clock_event_device *dev)
239{
240 spin_lock(&clockevents_lock);
241
242 clockevents_exchange_device(dev, NULL);
243 clockevents_notify_released();
244
245 spin_unlock(&clockevents_lock);
246}
247
248/**
249 * clockevents_notify - notification about relevant events 208 * clockevents_notify - notification about relevant events
250 */ 209 */
251void clockevents_notify(unsigned long reason, void *arg) 210void clockevents_notify(unsigned long reason, void *arg)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index cf53bb5814cb..438c6b723ee2 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -13,7 +13,7 @@
13#include <linux/timex.h> 13#include <linux/timex.h>
14#include <linux/jiffies.h> 14#include <linux/jiffies.h>
15#include <linux/hrtimer.h> 15#include <linux/hrtimer.h>
16 16#include <linux/capability.h>
17#include <asm/div64.h> 17#include <asm/div64.h>
18#include <asm/timex.h> 18#include <asm/timex.h>
19 19
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3d1042f82a68..728cedfd3cbd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -36,9 +36,17 @@ EXPORT_SYMBOL(xtime_lock);
36 * at zero at system boot time, so wall_to_monotonic will be negative, 36 * at zero at system boot time, so wall_to_monotonic will be negative,
37 * however, we will ALWAYS keep the tv_nsec part positive so we can use 37 * however, we will ALWAYS keep the tv_nsec part positive so we can use
38 * the usual normalization. 38 * the usual normalization.
39 *
40 * wall_to_monotonic is moved after resume from suspend for the monotonic
41 * time not to jump. We need to add total_sleep_time to wall_to_monotonic
42 * to get the real boot based time offset.
43 *
44 * - wall_to_monotonic is no longer the boot time, getboottime must be
45 * used instead.
39 */ 46 */
40struct timespec xtime __attribute__ ((aligned (16))); 47struct timespec xtime __attribute__ ((aligned (16)));
41struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 48struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
49static unsigned long total_sleep_time; /* seconds */
42 50
43EXPORT_SYMBOL(xtime); 51EXPORT_SYMBOL(xtime);
44 52
@@ -251,6 +259,7 @@ void __init timekeeping_init(void)
251 xtime.tv_nsec = 0; 259 xtime.tv_nsec = 0;
252 set_normalized_timespec(&wall_to_monotonic, 260 set_normalized_timespec(&wall_to_monotonic,
253 -xtime.tv_sec, -xtime.tv_nsec); 261 -xtime.tv_sec, -xtime.tv_nsec);
262 total_sleep_time = 0;
254 263
255 write_sequnlock_irqrestore(&xtime_lock, flags); 264 write_sequnlock_irqrestore(&xtime_lock, flags);
256} 265}
@@ -282,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev)
282 291
283 xtime.tv_sec += sleep_length; 292 xtime.tv_sec += sleep_length;
284 wall_to_monotonic.tv_sec -= sleep_length; 293 wall_to_monotonic.tv_sec -= sleep_length;
294 total_sleep_time += sleep_length;
285 } 295 }
286 /* re-base the last cycle value */ 296 /* re-base the last cycle value */
287 clock->cycle_last = clocksource_read(clock); 297 clock->cycle_last = clocksource_read(clock);
@@ -476,3 +486,30 @@ void update_wall_time(void)
476 change_clocksource(); 486 change_clocksource();
477 update_vsyscall(&xtime, clock); 487 update_vsyscall(&xtime, clock);
478} 488}
489
490/**
491 * getboottime - Return the real time of system boot.
492 * @ts: pointer to the timespec to be set
493 *
494 * Returns the time of day in a timespec.
495 *
496 * This is based on the wall_to_monotonic offset and the total suspend
497 * time. Calls to settimeofday will affect the value returned (which
498 * basically means that however wrong your real time clock is at boot time,
499 * you get the right time here).
500 */
501void getboottime(struct timespec *ts)
502{
503 set_normalized_timespec(ts,
504 - (wall_to_monotonic.tv_sec + total_sleep_time),
505 - wall_to_monotonic.tv_nsec);
506}
507
508/**
509 * monotonic_to_bootbased - Convert the monotonic time to boot based.
510 * @ts: pointer to the timespec to be converted
511 */
512void monotonic_to_bootbased(struct timespec *ts)
513{
514 ts->tv_sec += total_sleep_time;
515}
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 8bbcfb77f7d2..e5edc3a22a08 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -38,7 +38,7 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
38 38
39static void print_name_offset(struct seq_file *m, void *sym) 39static void print_name_offset(struct seq_file *m, void *sym)
40{ 40{
41 char symname[KSYM_NAME_LEN+1]; 41 char symname[KSYM_NAME_LEN];
42 42
43 if (lookup_symbol_name((unsigned long)sym, symname) < 0) 43 if (lookup_symbol_name((unsigned long)sym, symname) < 0)
44 SEQ_printf(m, "<%p>", sym); 44 SEQ_printf(m, "<%p>", sym);
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 321693724ad7..8ed62fda16c6 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -68,6 +68,7 @@ struct entry {
68 * Number of timeout events: 68 * Number of timeout events:
69 */ 69 */
70 unsigned long count; 70 unsigned long count;
71 unsigned int timer_flag;
71 72
72 /* 73 /*
73 * We save the command-line string to preserve 74 * We save the command-line string to preserve
@@ -231,7 +232,8 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
231 * incremented. Otherwise the timer is registered in a free slot. 232 * incremented. Otherwise the timer is registered in a free slot.
232 */ 233 */
233void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 234void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
234 void *timerf, char * comm) 235 void *timerf, char *comm,
236 unsigned int timer_flag)
235{ 237{
236 /* 238 /*
237 * It doesnt matter which lock we take: 239 * It doesnt matter which lock we take:
@@ -249,6 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
249 input.start_func = startf; 251 input.start_func = startf;
250 input.expire_func = timerf; 252 input.expire_func = timerf;
251 input.pid = pid; 253 input.pid = pid;
254 input.timer_flag = timer_flag;
252 255
253 spin_lock_irqsave(lock, flags); 256 spin_lock_irqsave(lock, flags);
254 if (!active) 257 if (!active)
@@ -266,7 +269,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
266 269
267static void print_name_offset(struct seq_file *m, unsigned long addr) 270static void print_name_offset(struct seq_file *m, unsigned long addr)
268{ 271{
269 char symname[KSYM_NAME_LEN+1]; 272 char symname[KSYM_NAME_LEN];
270 273
271 if (lookup_symbol_name(addr, symname) < 0) 274 if (lookup_symbol_name(addr, symname) < 0)
272 seq_printf(m, "<%p>", (void *)addr); 275 seq_printf(m, "<%p>", (void *)addr);
@@ -295,7 +298,7 @@ static int tstats_show(struct seq_file *m, void *v)
295 period = ktime_to_timespec(time); 298 period = ktime_to_timespec(time);
296 ms = period.tv_nsec / 1000000; 299 ms = period.tv_nsec / 1000000;
297 300
298 seq_puts(m, "Timer Stats Version: v0.1\n"); 301 seq_puts(m, "Timer Stats Version: v0.2\n");
299 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); 302 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
300 if (atomic_read(&overflow_count)) 303 if (atomic_read(&overflow_count))
301 seq_printf(m, "Overflow: %d entries\n", 304 seq_printf(m, "Overflow: %d entries\n",
@@ -303,8 +306,13 @@ static int tstats_show(struct seq_file *m, void *v)
303 306
304 for (i = 0; i < nr_entries; i++) { 307 for (i = 0; i < nr_entries; i++) {
305 entry = entries + i; 308 entry = entries + i;
306 seq_printf(m, "%4lu, %5d %-16s ", 309 if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
310 seq_printf(m, "%4luD, %5d %-16s ",
307 entry->count, entry->pid, entry->comm); 311 entry->count, entry->pid, entry->comm);
312 } else {
313 seq_printf(m, " %4lu, %5d %-16s ",
314 entry->count, entry->pid, entry->comm);
315 }
308 316
309 print_name_offset(m, (unsigned long)entry->start_func); 317 print_name_offset(m, (unsigned long)entry->start_func);
310 seq_puts(m, " ("); 318 seq_puts(m, " (");
diff --git a/kernel/timer.c b/kernel/timer.c
index 1a69705c2fb9..b7792fb03387 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -305,6 +305,20 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
306 timer->start_pid = current->pid; 306 timer->start_pid = current->pid;
307} 307}
308
309static void timer_stats_account_timer(struct timer_list *timer)
310{
311 unsigned int flag = 0;
312
313 if (unlikely(tbase_get_deferrable(timer->base)))
314 flag |= TIMER_STATS_FLAG_DEFERRABLE;
315
316 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
317 timer->function, timer->start_comm, flag);
318}
319
320#else
321static void timer_stats_account_timer(struct timer_list *timer) {}
308#endif 322#endif
309 323
310/** 324/**
@@ -1114,6 +1128,7 @@ int do_sysinfo(struct sysinfo *info)
1114 getnstimeofday(&tp); 1128 getnstimeofday(&tp);
1115 tp.tv_sec += wall_to_monotonic.tv_sec; 1129 tp.tv_sec += wall_to_monotonic.tv_sec;
1116 tp.tv_nsec += wall_to_monotonic.tv_nsec; 1130 tp.tv_nsec += wall_to_monotonic.tv_nsec;
1131 monotonic_to_bootbased(&tp);
1117 if (tp.tv_nsec - NSEC_PER_SEC >= 0) { 1132 if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
1118 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; 1133 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1119 tp.tv_sec++; 1134 tp.tv_sec++;
@@ -1206,7 +1221,8 @@ static int __devinit init_timers_cpu(int cpu)
1206 /* 1221 /*
1207 * The APs use this path later in boot 1222 * The APs use this path later in boot
1208 */ 1223 */
1209 base = kmalloc_node(sizeof(*base), GFP_KERNEL, 1224 base = kmalloc_node(sizeof(*base),
1225 GFP_KERNEL | __GFP_ZERO,
1210 cpu_to_node(cpu)); 1226 cpu_to_node(cpu));
1211 if (!base) 1227 if (!base)
1212 return -ENOMEM; 1228 return -ENOMEM;
@@ -1217,7 +1233,6 @@ static int __devinit init_timers_cpu(int cpu)
1217 kfree(base); 1233 kfree(base);
1218 return -ENOMEM; 1234 return -ENOMEM;
1219 } 1235 }
1220 memset(base, 0, sizeof(*base));
1221 per_cpu(tvec_bases, cpu) = base; 1236 per_cpu(tvec_bases, cpu) = base;
1222 } else { 1237 } else {
1223 /* 1238 /*
diff --git a/kernel/user.c b/kernel/user.c
index 4869563080e9..98b82507797a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -14,20 +14,19 @@
14#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/key.h> 15#include <linux/key.h>
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/module.h>
18#include <linux/user_namespace.h>
17 19
18/* 20/*
19 * UID task count cache, to get fast user lookup in "alloc_uid" 21 * UID task count cache, to get fast user lookup in "alloc_uid"
20 * when changing user ID's (ie setuid() and friends). 22 * when changing user ID's (ie setuid() and friends).
21 */ 23 */
22 24
23#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
24#define UIDHASH_SZ (1 << UIDHASH_BITS)
25#define UIDHASH_MASK (UIDHASH_SZ - 1) 25#define UIDHASH_MASK (UIDHASH_SZ - 1)
26#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) 26#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
27#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) 27#define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid)))
28 28
29static struct kmem_cache *uid_cachep; 29static struct kmem_cache *uid_cachep;
30static struct list_head uidhash_table[UIDHASH_SZ];
31 30
32/* 31/*
33 * The uidhash_lock is mostly taken from process context, but it is 32 * The uidhash_lock is mostly taken from process context, but it is
@@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid)
94{ 93{
95 struct user_struct *ret; 94 struct user_struct *ret;
96 unsigned long flags; 95 unsigned long flags;
96 struct user_namespace *ns = current->nsproxy->user_ns;
97 97
98 spin_lock_irqsave(&uidhash_lock, flags); 98 spin_lock_irqsave(&uidhash_lock, flags);
99 ret = uid_hash_find(uid, uidhashentry(uid)); 99 ret = uid_hash_find(uid, uidhashentry(ns, uid));
100 spin_unlock_irqrestore(&uidhash_lock, flags); 100 spin_unlock_irqrestore(&uidhash_lock, flags);
101 return ret; 101 return ret;
102} 102}
@@ -120,9 +120,9 @@ void free_uid(struct user_struct *up)
120 } 120 }
121} 121}
122 122
123struct user_struct * alloc_uid(uid_t uid) 123struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
124{ 124{
125 struct list_head *hashent = uidhashentry(uid); 125 struct list_head *hashent = uidhashentry(ns, uid);
126 struct user_struct *up; 126 struct user_struct *up;
127 127
128 spin_lock_irq(&uidhash_lock); 128 spin_lock_irq(&uidhash_lock);
@@ -211,11 +211,11 @@ static int __init uid_cache_init(void)
211 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 211 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
212 212
213 for(n = 0; n < UIDHASH_SZ; ++n) 213 for(n = 0; n < UIDHASH_SZ; ++n)
214 INIT_LIST_HEAD(uidhash_table + n); 214 INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
215 215
216 /* Insert the root user immediately (init already runs as root) */ 216 /* Insert the root user immediately (init already runs as root) */
217 spin_lock_irq(&uidhash_lock); 217 spin_lock_irq(&uidhash_lock);
218 uid_hash_insert(&root_user, uidhashentry(0)); 218 uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
219 spin_unlock_irq(&uidhash_lock); 219 spin_unlock_irq(&uidhash_lock);
220 220
221 return 0; 221 return 0;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
new file mode 100644
index 000000000000..d055d987850c
--- /dev/null
+++ b/kernel/user_namespace.c
@@ -0,0 +1,87 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License as
4 * published by the Free Software Foundation, version 2 of the
5 * License.
6 */
7
8#include <linux/module.h>
9#include <linux/version.h>
10#include <linux/nsproxy.h>
11#include <linux/user_namespace.h>
12
13struct user_namespace init_user_ns = {
14 .kref = {
15 .refcount = ATOMIC_INIT(2),
16 },
17 .root_user = &root_user,
18};
19
20EXPORT_SYMBOL_GPL(init_user_ns);
21
22#ifdef CONFIG_USER_NS
23
24/*
25 * Clone a new ns copying an original user ns, setting refcount to 1
26 * @old_ns: namespace to clone
27 * Return NULL on error (failure to kmalloc), new ns otherwise
28 */
29static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
30{
31 struct user_namespace *ns;
32 struct user_struct *new_user;
33 int n;
34
35 ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
36 if (!ns)
37 return ERR_PTR(-ENOMEM);
38
39 kref_init(&ns->kref);
40
41 for (n = 0; n < UIDHASH_SZ; ++n)
42 INIT_LIST_HEAD(ns->uidhash_table + n);
43
44 /* Insert new root user. */
45 ns->root_user = alloc_uid(ns, 0);
46 if (!ns->root_user) {
47 kfree(ns);
48 return ERR_PTR(-ENOMEM);
49 }
50
51 /* Reset current->user with a new one */
52 new_user = alloc_uid(ns, current->uid);
53 if (!new_user) {
54 free_uid(ns->root_user);
55 kfree(ns);
56 return ERR_PTR(-ENOMEM);
57 }
58
59 switch_uid(new_user);
60 return ns;
61}
62
63struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
64{
65 struct user_namespace *new_ns;
66
67 BUG_ON(!old_ns);
68 get_user_ns(old_ns);
69
70 if (!(flags & CLONE_NEWUSER))
71 return old_ns;
72
73 new_ns = clone_user_ns(old_ns);
74
75 put_user_ns(old_ns);
76 return new_ns;
77}
78
79void free_user_ns(struct kref *kref)
80{
81 struct user_namespace *ns;
82
83 ns = container_of(kref, struct user_namespace, kref);
84 kfree(ns);
85}
86
87#endif /* CONFIG_USER_NS */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 160c8c5136bd..9d8180a0f0d8 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -13,6 +13,7 @@
13#include <linux/uts.h> 13#include <linux/uts.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/version.h> 15#include <linux/version.h>
16#include <linux/err.h>
16 17
17/* 18/*
18 * Clone a new ns copying an original utsname, setting refcount to 1 19 * Clone a new ns copying an original utsname, setting refcount to 1
@@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
24 struct uts_namespace *ns; 25 struct uts_namespace *ns;
25 26
26 ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); 27 ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
27 if (ns) { 28 if (!ns)
28 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 29 return ERR_PTR(-ENOMEM);
29 kref_init(&ns->kref); 30
30 } 31 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
32 kref_init(&ns->kref);
31 return ns; 33 return ns;
32} 34}
33 35
@@ -37,7 +39,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
37 * utsname of this process won't be seen by parent, and vice 39 * utsname of this process won't be seen by parent, and vice
38 * versa. 40 * versa.
39 */ 41 */
40struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) 42struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
41{ 43{
42 struct uts_namespace *new_ns; 44 struct uts_namespace *new_ns;
43 45
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index f22b9dbd2a9c..c76c06466bfd 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -18,10 +18,7 @@
18static void *get_uts(ctl_table *table, int write) 18static void *get_uts(ctl_table *table, int write)
19{ 19{
20 char *which = table->data; 20 char *which = table->data;
21#ifdef CONFIG_UTS_NS 21
22 struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
23 which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
24#endif
25 if (!write) 22 if (!write)
26 down_read(&uts_sem); 23 down_read(&uts_sem);
27 else 24 else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3bebf73be976..58e5c152a6bb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -282,8 +282,8 @@ static int worker_thread(void *__cwq)
282 struct cpu_workqueue_struct *cwq = __cwq; 282 struct cpu_workqueue_struct *cwq = __cwq;
283 DEFINE_WAIT(wait); 283 DEFINE_WAIT(wait);
284 284
285 if (!cwq->wq->freezeable) 285 if (cwq->wq->freezeable)
286 current->flags |= PF_NOFREEZE; 286 set_freezable();
287 287
288 set_user_nice(current, -5); 288 set_user_nice(current, -5);
289 289
@@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
382EXPORT_SYMBOL_GPL(flush_workqueue); 382EXPORT_SYMBOL_GPL(flush_workqueue);
383 383
384/* 384/*
385 * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit, 385 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
386 * so this work can't be re-armed in any way. 386 * so this work can't be re-armed in any way.
387 */ 387 */
388static int try_to_grab_pending(struct work_struct *work) 388static int try_to_grab_pending(struct work_struct *work)
389{ 389{
390 struct cpu_workqueue_struct *cwq; 390 struct cpu_workqueue_struct *cwq;
391 int ret = 0; 391 int ret = -1;
392 392
393 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) 393 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
394 return 1; 394 return 0;
395 395
396 /* 396 /*
397 * The queueing is in progress, or it is already queued. Try to 397 * The queueing is in progress, or it is already queued. Try to
@@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work)
457 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 457 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
458} 458}
459 459
460static int __cancel_work_timer(struct work_struct *work,
461 struct timer_list* timer)
462{
463 int ret;
464
465 do {
466 ret = (timer && likely(del_timer(timer)));
467 if (!ret)
468 ret = try_to_grab_pending(work);
469 wait_on_work(work);
470 } while (unlikely(ret < 0));
471
472 work_clear_pending(work);
473 return ret;
474}
475
460/** 476/**
461 * cancel_work_sync - block until a work_struct's callback has terminated 477 * cancel_work_sync - block until a work_struct's callback has terminated
462 * @work: the work which is to be flushed 478 * @work: the work which is to be flushed
463 * 479 *
480 * Returns true if @work was pending.
481 *
464 * cancel_work_sync() will cancel the work if it is queued. If the work's 482 * cancel_work_sync() will cancel the work if it is queued. If the work's
465 * callback appears to be running, cancel_work_sync() will block until it 483 * callback appears to be running, cancel_work_sync() will block until it
466 * has completed. 484 * has completed.
@@ -476,31 +494,26 @@ static void wait_on_work(struct work_struct *work)
476 * The caller must ensure that workqueue_struct on which this work was last 494 * The caller must ensure that workqueue_struct on which this work was last
477 * queued can't be destroyed before this function returns. 495 * queued can't be destroyed before this function returns.
478 */ 496 */
479void cancel_work_sync(struct work_struct *work) 497int cancel_work_sync(struct work_struct *work)
480{ 498{
481 while (!try_to_grab_pending(work)) 499 return __cancel_work_timer(work, NULL);
482 cpu_relax();
483 wait_on_work(work);
484 work_clear_pending(work);
485} 500}
486EXPORT_SYMBOL_GPL(cancel_work_sync); 501EXPORT_SYMBOL_GPL(cancel_work_sync);
487 502
488/** 503/**
489 * cancel_rearming_delayed_work - reliably kill off a delayed work. 504 * cancel_delayed_work_sync - reliably kill off a delayed work.
490 * @dwork: the delayed work struct 505 * @dwork: the delayed work struct
491 * 506 *
507 * Returns true if @dwork was pending.
508 *
492 * It is possible to use this function if @dwork rearms itself via queue_work() 509 * It is possible to use this function if @dwork rearms itself via queue_work()
493 * or queue_delayed_work(). See also the comment for cancel_work_sync(). 510 * or queue_delayed_work(). See also the comment for cancel_work_sync().
494 */ 511 */
495void cancel_rearming_delayed_work(struct delayed_work *dwork) 512int cancel_delayed_work_sync(struct delayed_work *dwork)
496{ 513{
497 while (!del_timer(&dwork->timer) && 514 return __cancel_work_timer(&dwork->work, &dwork->timer);
498 !try_to_grab_pending(&dwork->work))
499 cpu_relax();
500 wait_on_work(&dwork->work);
501 work_clear_pending(&dwork->work);
502} 515}
503EXPORT_SYMBOL(cancel_rearming_delayed_work); 516EXPORT_SYMBOL(cancel_delayed_work_sync);
504 517
505static struct workqueue_struct *keventd_wq __read_mostly; 518static struct workqueue_struct *keventd_wq __read_mostly;
506 519
@@ -739,18 +752,17 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
739 if (cwq->thread == NULL) 752 if (cwq->thread == NULL)
740 return; 753 return;
741 754
755 flush_cpu_workqueue(cwq);
742 /* 756 /*
743 * If the caller is CPU_DEAD the single flush_cpu_workqueue() 757 * If the caller is CPU_DEAD and cwq->worklist was not empty,
744 * is not enough, a concurrent flush_workqueue() can insert a 758 * a concurrent flush_workqueue() can insert a barrier after us.
745 * barrier after us. 759 * However, in that case run_workqueue() won't return and check
760 * kthread_should_stop() until it flushes all work_struct's.
746 * When ->worklist becomes empty it is safe to exit because no 761 * When ->worklist becomes empty it is safe to exit because no
747 * more work_structs can be queued on this cwq: flush_workqueue 762 * more work_structs can be queued on this cwq: flush_workqueue
748 * checks list_empty(), and a "normal" queue_work() can't use 763 * checks list_empty(), and a "normal" queue_work() can't use
749 * a dead CPU. 764 * a dead CPU.
750 */ 765 */
751 while (flush_cpu_workqueue(cwq))
752 ;
753
754 kthread_stop(cwq->thread); 766 kthread_stop(cwq->thread);
755 cwq->thread = NULL; 767 cwq->thread = NULL;
756} 768}