aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/audit.c96
-rw-r--r--kernel/audit.h1
-rw-r--r--kernel/auditsc.c5
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/exit.c31
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/futex.c138
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kallsyms.c11
-rw-r--r--kernel/kfifo.c3
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c53
-rw-r--r--kernel/nsproxy.c72
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/printk.c55
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/rtmutex-debug.c6
-rw-r--r--kernel/rtmutex.c6
-rw-r--r--kernel/rtmutex_common.h9
-rw-r--r--kernel/seccomp.c29
-rw-r--r--kernel/signal.c33
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/stop_machine.c8
-rw-r--r--kernel/sys.c13
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c41
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time.c32
-rw-r--r--kernel/time/clockevents.c41
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c37
-rw-r--r--kernel/time/timer_stats.c14
-rw-r--r--kernel/timer.c15
-rw-r--r--kernel/user.c18
-rw-r--r--kernel/user_namespace.c87
-rw-r--r--kernel/utsname.c12
-rw-r--r--kernel/utsname_sysctl.c5
-rw-r--r--kernel/workqueue.c45
39 files changed, 686 insertions, 287 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 642d4277c2ea..2a999836ca18 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -4,11 +4,12 @@
4 4
5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ 5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
6 exit.o itimer.o time.o softirq.o resource.o \ 6 exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
12 utsname.o
12 13
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 14obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-y += time/ 15obj-y += time/
@@ -48,7 +49,6 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
48obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
49obj-$(CONFIG_RELAY) += relay.o 50obj-$(CONFIG_RELAY) += relay.o
50obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 51obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
51obj-$(CONFIG_UTS_NS) += utsname.o
52obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 52obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
53obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 53obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
54 54
diff --git a/kernel/audit.c b/kernel/audit.c
index d13276d41410..5ce8851facf7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -58,6 +58,7 @@
58#include <linux/selinux.h> 58#include <linux/selinux.h>
59#include <linux/inotify.h> 59#include <linux/inotify.h>
60#include <linux/freezer.h> 60#include <linux/freezer.h>
61#include <linux/tty.h>
61 62
62#include "audit.h" 63#include "audit.h"
63 64
@@ -423,6 +424,31 @@ static int kauditd_thread(void *dummy)
423 return 0; 424 return 0;
424} 425}
425 426
427static int audit_prepare_user_tty(pid_t pid, uid_t loginuid)
428{
429 struct task_struct *tsk;
430 int err;
431
432 read_lock(&tasklist_lock);
433 tsk = find_task_by_pid(pid);
434 err = -ESRCH;
435 if (!tsk)
436 goto out;
437 err = 0;
438
439 spin_lock_irq(&tsk->sighand->siglock);
440 if (!tsk->signal->audit_tty)
441 err = -EPERM;
442 spin_unlock_irq(&tsk->sighand->siglock);
443 if (err)
444 goto out;
445
446 tty_audit_push_task(tsk, loginuid);
447out:
448 read_unlock(&tasklist_lock);
449 return err;
450}
451
426int audit_send_list(void *_dest) 452int audit_send_list(void *_dest)
427{ 453{
428 struct audit_netlink_list *dest = _dest; 454 struct audit_netlink_list *dest = _dest;
@@ -511,6 +537,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
511 case AUDIT_DEL: 537 case AUDIT_DEL:
512 case AUDIT_DEL_RULE: 538 case AUDIT_DEL_RULE:
513 case AUDIT_SIGNAL_INFO: 539 case AUDIT_SIGNAL_INFO:
540 case AUDIT_TTY_GET:
541 case AUDIT_TTY_SET:
514 if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) 542 if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
515 err = -EPERM; 543 err = -EPERM;
516 break; 544 break;
@@ -622,6 +650,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
622 err = audit_filter_user(&NETLINK_CB(skb), msg_type); 650 err = audit_filter_user(&NETLINK_CB(skb), msg_type);
623 if (err == 1) { 651 if (err == 1) {
624 err = 0; 652 err = 0;
653 if (msg_type == AUDIT_USER_TTY) {
654 err = audit_prepare_user_tty(pid, loginuid);
655 if (err)
656 break;
657 }
625 ab = audit_log_start(NULL, GFP_KERNEL, msg_type); 658 ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
626 if (ab) { 659 if (ab) {
627 audit_log_format(ab, 660 audit_log_format(ab,
@@ -638,8 +671,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
638 " subj=%s", ctx); 671 " subj=%s", ctx);
639 kfree(ctx); 672 kfree(ctx);
640 } 673 }
641 audit_log_format(ab, " msg='%.1024s'", 674 if (msg_type != AUDIT_USER_TTY)
642 (char *)data); 675 audit_log_format(ab, " msg='%.1024s'",
676 (char *)data);
677 else {
678 int size;
679
680 audit_log_format(ab, " msg=");
681 size = nlmsg_len(nlh);
682 audit_log_n_untrustedstring(ab, size,
683 data);
684 }
643 audit_set_pid(ab, pid); 685 audit_set_pid(ab, pid);
644 audit_log_end(ab); 686 audit_log_end(ab);
645 } 687 }
@@ -730,6 +772,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
730 0, 0, sig_data, sizeof(*sig_data) + len); 772 0, 0, sig_data, sizeof(*sig_data) + len);
731 kfree(sig_data); 773 kfree(sig_data);
732 break; 774 break;
775 case AUDIT_TTY_GET: {
776 struct audit_tty_status s;
777 struct task_struct *tsk;
778
779 read_lock(&tasklist_lock);
780 tsk = find_task_by_pid(pid);
781 if (!tsk)
782 err = -ESRCH;
783 else {
784 spin_lock_irq(&tsk->sighand->siglock);
785 s.enabled = tsk->signal->audit_tty != 0;
786 spin_unlock_irq(&tsk->sighand->siglock);
787 }
788 read_unlock(&tasklist_lock);
789 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0,
790 &s, sizeof(s));
791 break;
792 }
793 case AUDIT_TTY_SET: {
794 struct audit_tty_status *s;
795 struct task_struct *tsk;
796
797 if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
798 return -EINVAL;
799 s = data;
800 if (s->enabled != 0 && s->enabled != 1)
801 return -EINVAL;
802 read_lock(&tasklist_lock);
803 tsk = find_task_by_pid(pid);
804 if (!tsk)
805 err = -ESRCH;
806 else {
807 spin_lock_irq(&tsk->sighand->siglock);
808 tsk->signal->audit_tty = s->enabled != 0;
809 spin_unlock_irq(&tsk->sighand->siglock);
810 }
811 read_unlock(&tasklist_lock);
812 break;
813 }
733 default: 814 default:
734 err = -EINVAL; 815 err = -EINVAL;
735 break; 816 break;
@@ -1185,7 +1266,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1185} 1266}
1186 1267
1187/** 1268/**
1188 * audit_log_n_unstrustedstring - log a string that may contain random characters 1269 * audit_log_n_untrustedstring - log a string that may contain random characters
1189 * @ab: audit_buffer 1270 * @ab: audit_buffer
1190 * @len: lenth of string (not including trailing null) 1271 * @len: lenth of string (not including trailing null)
1191 * @string: string to be logged 1272 * @string: string to be logged
@@ -1201,25 +1282,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1201const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, 1282const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
1202 const char *string) 1283 const char *string)
1203{ 1284{
1204 const unsigned char *p = string; 1285 const unsigned char *p;
1205 1286
1206 while (*p) { 1287 for (p = string; p < (const unsigned char *)string + len && *p; p++) {
1207 if (*p == '"' || *p < 0x21 || *p > 0x7f) { 1288 if (*p == '"' || *p < 0x21 || *p > 0x7f) {
1208 audit_log_hex(ab, string, len); 1289 audit_log_hex(ab, string, len);
1209 return string + len + 1; 1290 return string + len + 1;
1210 } 1291 }
1211 p++;
1212 } 1292 }
1213 audit_log_n_string(ab, len, string); 1293 audit_log_n_string(ab, len, string);
1214 return p + 1; 1294 return p + 1;
1215} 1295}
1216 1296
1217/** 1297/**
1218 * audit_log_unstrustedstring - log a string that may contain random characters 1298 * audit_log_untrustedstring - log a string that may contain random characters
1219 * @ab: audit_buffer 1299 * @ab: audit_buffer
1220 * @string: string to be logged 1300 * @string: string to be logged
1221 * 1301 *
1222 * Same as audit_log_n_unstrustedstring(), except that strlen is used to 1302 * Same as audit_log_n_untrustedstring(), except that strlen is used to
1223 * determine string length. 1303 * determine string length.
1224 */ 1304 */
1225const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) 1305const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
diff --git a/kernel/audit.h b/kernel/audit.h
index 815d6f5c04ee..95877435c347 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
115extern void audit_send_reply(int pid, int seq, int type, 115extern void audit_send_reply(int pid, int seq, int type,
116 int done, int multi, 116 int done, int multi,
117 void *payload, int size); 117 void *payload, int size);
118extern void audit_log_lost(const char *message);
119extern void audit_panic(const char *message); 118extern void audit_panic(const char *message);
120 119
121struct audit_netlink_list { 120struct audit_netlink_list {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e36481ed61b4..b7640a5f382a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -71,9 +71,6 @@
71 71
72extern struct list_head audit_filter_list[]; 72extern struct list_head audit_filter_list[];
73 73
74/* No syscall auditing will take place unless audit_enabled != 0. */
75extern int audit_enabled;
76
77/* AUDIT_NAMES is the number of slots we reserve in the audit_context 74/* AUDIT_NAMES is the number of slots we reserve in the audit_context
78 * for saving names from getname(). */ 75 * for saving names from getname(). */
79#define AUDIT_NAMES 20 76#define AUDIT_NAMES 20
@@ -2040,7 +2037,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
2040 2037
2041/** 2038/**
2042 * audit_core_dumps - record information about processes that end abnormally 2039 * audit_core_dumps - record information about processes that end abnormally
2043 * @sig: signal value 2040 * @signr: signal value
2044 * 2041 *
2045 * If a process ends with a core dump, something fishy is going on and we 2042 * If a process ends with a core dump, something fishy is going on and we
2046 * should record the event for investigation. 2043 * should record the event for investigation.
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4c49188cc49b..824b1c01f410 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -981,10 +981,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
981 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); 981 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
982 if (!mmarray) 982 if (!mmarray)
983 goto done; 983 goto done;
984 write_lock_irq(&tasklist_lock); /* block fork */ 984 read_lock(&tasklist_lock); /* block fork */
985 if (atomic_read(&cs->count) <= ntasks) 985 if (atomic_read(&cs->count) <= ntasks)
986 break; /* got enough */ 986 break; /* got enough */
987 write_unlock_irq(&tasklist_lock); /* try again */ 987 read_unlock(&tasklist_lock); /* try again */
988 kfree(mmarray); 988 kfree(mmarray);
989 } 989 }
990 990
@@ -1006,7 +1006,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
1006 continue; 1006 continue;
1007 mmarray[n++] = mm; 1007 mmarray[n++] = mm;
1008 } while_each_thread(g, p); 1008 } while_each_thread(g, p);
1009 write_unlock_irq(&tasklist_lock); 1009 read_unlock(&tasklist_lock);
1010 1010
1011 /* 1011 /*
1012 * Now that we've dropped the tasklist spinlock, we can 1012 * Now that we've dropped the tasklist spinlock, we can
diff --git a/kernel/exit.c b/kernel/exit.c
index ca6a11b73023..57626692cd90 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -858,6 +858,34 @@ static void exit_notify(struct task_struct *tsk)
858 release_task(tsk); 858 release_task(tsk);
859} 859}
860 860
861#ifdef CONFIG_DEBUG_STACK_USAGE
862static void check_stack_usage(void)
863{
864 static DEFINE_SPINLOCK(low_water_lock);
865 static int lowest_to_date = THREAD_SIZE;
866 unsigned long *n = end_of_stack(current);
867 unsigned long free;
868
869 while (*n == 0)
870 n++;
871 free = (unsigned long)n - (unsigned long)end_of_stack(current);
872
873 if (free >= lowest_to_date)
874 return;
875
876 spin_lock(&low_water_lock);
877 if (free < lowest_to_date) {
878 printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
879 "left\n",
880 current->comm, free);
881 lowest_to_date = free;
882 }
883 spin_unlock(&low_water_lock);
884}
885#else
886static inline void check_stack_usage(void) {}
887#endif
888
861fastcall NORET_TYPE void do_exit(long code) 889fastcall NORET_TYPE void do_exit(long code)
862{ 890{
863 struct task_struct *tsk = current; 891 struct task_struct *tsk = current;
@@ -937,6 +965,8 @@ fastcall NORET_TYPE void do_exit(long code)
937 if (unlikely(tsk->compat_robust_list)) 965 if (unlikely(tsk->compat_robust_list))
938 compat_exit_robust_list(tsk); 966 compat_exit_robust_list(tsk);
939#endif 967#endif
968 if (group_dead)
969 tty_audit_exit();
940 if (unlikely(tsk->audit_context)) 970 if (unlikely(tsk->audit_context))
941 audit_free(tsk); 971 audit_free(tsk);
942 972
@@ -949,6 +979,7 @@ fastcall NORET_TYPE void do_exit(long code)
949 exit_sem(tsk); 979 exit_sem(tsk);
950 __exit_files(tsk); 980 __exit_files(tsk);
951 __exit_fs(tsk); 981 __exit_fs(tsk);
982 check_stack_usage();
952 exit_thread(); 983 exit_thread();
953 cpuset_exit(tsk); 984 cpuset_exit(tsk);
954 exit_keys(tsk); 985 exit_keys(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index da3a155bba0d..7c5c5888e00a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
49#include <linux/delayacct.h> 49#include <linux/delayacct.h>
50#include <linux/taskstats_kern.h> 50#include <linux/taskstats_kern.h>
51#include <linux/random.h> 51#include <linux/random.h>
52#include <linux/tty.h>
52 53
53#include <asm/pgtable.h> 54#include <asm/pgtable.h>
54#include <asm/pgalloc.h> 55#include <asm/pgalloc.h>
@@ -897,6 +898,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
897 } 898 }
898 acct_init_pacct(&sig->pacct); 899 acct_init_pacct(&sig->pacct);
899 900
901 tty_audit_fork(sig);
902
900 return 0; 903 return 0;
901} 904}
902 905
@@ -999,7 +1002,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
999 if (atomic_read(&p->user->processes) >= 1002 if (atomic_read(&p->user->processes) >=
1000 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 1003 p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
1001 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 1004 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
1002 p->user != &root_user) 1005 p->user != current->nsproxy->user_ns->root_user)
1003 goto bad_fork_free; 1006 goto bad_fork_free;
1004 } 1007 }
1005 1008
@@ -1059,6 +1062,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1059 1062
1060 p->lock_depth = -1; /* -1 = no lock */ 1063 p->lock_depth = -1; /* -1 = no lock */
1061 do_posix_clock_monotonic_gettime(&p->start_time); 1064 do_posix_clock_monotonic_gettime(&p->start_time);
1065 p->real_start_time = p->start_time;
1066 monotonic_to_bootbased(&p->real_start_time);
1062 p->security = NULL; 1067 p->security = NULL;
1063 p->io_context = NULL; 1068 p->io_context = NULL;
1064 p->io_wait = NULL; 1069 p->io_wait = NULL;
@@ -1601,7 +1606,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1601 err = -EINVAL; 1606 err = -EINVAL;
1602 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 1607 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1603 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 1608 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1604 CLONE_NEWUTS|CLONE_NEWIPC)) 1609 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
1605 goto bad_unshare_out; 1610 goto bad_unshare_out;
1606 1611
1607 if ((err = unshare_thread(unshare_flags))) 1612 if ((err = unshare_thread(unshare_flags)))
diff --git a/kernel/futex.c b/kernel/futex.c
index 45490bec5831..5c3f45d07c53 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -121,6 +121,24 @@ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
121static struct vfsmount *futex_mnt; 121static struct vfsmount *futex_mnt;
122 122
123/* 123/*
124 * Take mm->mmap_sem, when futex is shared
125 */
126static inline void futex_lock_mm(struct rw_semaphore *fshared)
127{
128 if (fshared)
129 down_read(fshared);
130}
131
132/*
133 * Release mm->mmap_sem, when the futex is shared
134 */
135static inline void futex_unlock_mm(struct rw_semaphore *fshared)
136{
137 if (fshared)
138 up_read(fshared);
139}
140
141/*
124 * We hash on the keys returned from get_futex_key (see below). 142 * We hash on the keys returned from get_futex_key (see below).
125 */ 143 */
126static struct futex_hash_bucket *hash_futex(union futex_key *key) 144static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -287,7 +305,18 @@ void drop_futex_key_refs(union futex_key *key)
287} 305}
288EXPORT_SYMBOL_GPL(drop_futex_key_refs); 306EXPORT_SYMBOL_GPL(drop_futex_key_refs);
289 307
290static inline int get_futex_value_locked(u32 *dest, u32 __user *from) 308static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
309{
310 u32 curval;
311
312 pagefault_disable();
313 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
314 pagefault_enable();
315
316 return curval;
317}
318
319static int get_futex_value_locked(u32 *dest, u32 __user *from)
291{ 320{
292 int ret; 321 int ret;
293 322
@@ -620,9 +649,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
620 649
621 newval = FUTEX_WAITERS | new_owner->pid; 650 newval = FUTEX_WAITERS | new_owner->pid;
622 651
623 pagefault_disable(); 652 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
624 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
625 pagefault_enable();
626 653
627 if (curval == -EFAULT) 654 if (curval == -EFAULT)
628 ret = -EFAULT; 655 ret = -EFAULT;
@@ -659,9 +686,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
659 * There is no waiter, so we unlock the futex. The owner died 686 * There is no waiter, so we unlock the futex. The owner died
660 * bit has not to be preserved here. We are the owner: 687 * bit has not to be preserved here. We are the owner:
661 */ 688 */
662 pagefault_disable(); 689 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
663 oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
664 pagefault_enable();
665 690
666 if (oldval == -EFAULT) 691 if (oldval == -EFAULT)
667 return oldval; 692 return oldval;
@@ -700,8 +725,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
700 union futex_key key; 725 union futex_key key;
701 int ret; 726 int ret;
702 727
703 if (fshared) 728 futex_lock_mm(fshared);
704 down_read(fshared);
705 729
706 ret = get_futex_key(uaddr, fshared, &key); 730 ret = get_futex_key(uaddr, fshared, &key);
707 if (unlikely(ret != 0)) 731 if (unlikely(ret != 0))
@@ -725,8 +749,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
725 749
726 spin_unlock(&hb->lock); 750 spin_unlock(&hb->lock);
727out: 751out:
728 if (fshared) 752 futex_unlock_mm(fshared);
729 up_read(fshared);
730 return ret; 753 return ret;
731} 754}
732 755
@@ -746,8 +769,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
746 int ret, op_ret, attempt = 0; 769 int ret, op_ret, attempt = 0;
747 770
748retryfull: 771retryfull:
749 if (fshared) 772 futex_lock_mm(fshared);
750 down_read(fshared);
751 773
752 ret = get_futex_key(uaddr1, fshared, &key1); 774 ret = get_futex_key(uaddr1, fshared, &key1);
753 if (unlikely(ret != 0)) 775 if (unlikely(ret != 0))
@@ -793,7 +815,7 @@ retry:
793 */ 815 */
794 if (attempt++) { 816 if (attempt++) {
795 ret = futex_handle_fault((unsigned long)uaddr2, 817 ret = futex_handle_fault((unsigned long)uaddr2,
796 fshared, attempt); 818 fshared, attempt);
797 if (ret) 819 if (ret)
798 goto out; 820 goto out;
799 goto retry; 821 goto retry;
@@ -803,8 +825,7 @@ retry:
803 * If we would have faulted, release mmap_sem, 825 * If we would have faulted, release mmap_sem,
804 * fault it in and start all over again. 826 * fault it in and start all over again.
805 */ 827 */
806 if (fshared) 828 futex_unlock_mm(fshared);
807 up_read(fshared);
808 829
809 ret = get_user(dummy, uaddr2); 830 ret = get_user(dummy, uaddr2);
810 if (ret) 831 if (ret)
@@ -841,8 +862,8 @@ retry:
841 if (hb1 != hb2) 862 if (hb1 != hb2)
842 spin_unlock(&hb2->lock); 863 spin_unlock(&hb2->lock);
843out: 864out:
844 if (fshared) 865 futex_unlock_mm(fshared);
845 up_read(fshared); 866
846 return ret; 867 return ret;
847} 868}
848 869
@@ -861,8 +882,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
861 int ret, drop_count = 0; 882 int ret, drop_count = 0;
862 883
863 retry: 884 retry:
864 if (fshared) 885 futex_lock_mm(fshared);
865 down_read(fshared);
866 886
867 ret = get_futex_key(uaddr1, fshared, &key1); 887 ret = get_futex_key(uaddr1, fshared, &key1);
868 if (unlikely(ret != 0)) 888 if (unlikely(ret != 0))
@@ -890,8 +910,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
890 * If we would have faulted, release mmap_sem, fault 910 * If we would have faulted, release mmap_sem, fault
891 * it in and start all over again. 911 * it in and start all over again.
892 */ 912 */
893 if (fshared) 913 futex_unlock_mm(fshared);
894 up_read(fshared);
895 914
896 ret = get_user(curval, uaddr1); 915 ret = get_user(curval, uaddr1);
897 916
@@ -944,8 +963,7 @@ out_unlock:
944 drop_futex_key_refs(&key1); 963 drop_futex_key_refs(&key1);
945 964
946out: 965out:
947 if (fshared) 966 futex_unlock_mm(fshared);
948 up_read(fshared);
949 return ret; 967 return ret;
950} 968}
951 969
@@ -1113,10 +1131,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1113 while (!ret) { 1131 while (!ret) {
1114 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1132 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1115 1133
1116 pagefault_disable(); 1134 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1117 curval = futex_atomic_cmpxchg_inatomic(uaddr,
1118 uval, newval);
1119 pagefault_enable();
1120 1135
1121 if (curval == -EFAULT) 1136 if (curval == -EFAULT)
1122 ret = -EFAULT; 1137 ret = -EFAULT;
@@ -1134,6 +1149,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1134#define ARG3_SHARED 1 1149#define ARG3_SHARED 1
1135 1150
1136static long futex_wait_restart(struct restart_block *restart); 1151static long futex_wait_restart(struct restart_block *restart);
1152
1137static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1153static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1138 u32 val, ktime_t *abs_time) 1154 u32 val, ktime_t *abs_time)
1139{ 1155{
@@ -1148,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1148 1164
1149 q.pi_state = NULL; 1165 q.pi_state = NULL;
1150 retry: 1166 retry:
1151 if (fshared) 1167 futex_lock_mm(fshared);
1152 down_read(fshared);
1153 1168
1154 ret = get_futex_key(uaddr, fshared, &q.key); 1169 ret = get_futex_key(uaddr, fshared, &q.key);
1155 if (unlikely(ret != 0)) 1170 if (unlikely(ret != 0))
@@ -1186,8 +1201,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1186 * If we would have faulted, release mmap_sem, fault it in and 1201 * If we would have faulted, release mmap_sem, fault it in and
1187 * start all over again. 1202 * start all over again.
1188 */ 1203 */
1189 if (fshared) 1204 futex_unlock_mm(fshared);
1190 up_read(fshared);
1191 1205
1192 ret = get_user(uval, uaddr); 1206 ret = get_user(uval, uaddr);
1193 1207
@@ -1206,8 +1220,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1206 * Now the futex is queued and we have checked the data, we 1220 * Now the futex is queued and we have checked the data, we
1207 * don't want to hold mmap_sem while we sleep. 1221 * don't want to hold mmap_sem while we sleep.
1208 */ 1222 */
1209 if (fshared) 1223 futex_unlock_mm(fshared);
1210 up_read(fshared);
1211 1224
1212 /* 1225 /*
1213 * There might have been scheduling since the queue_me(), as we 1226 * There might have been scheduling since the queue_me(), as we
@@ -1285,8 +1298,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1285 queue_unlock(&q, hb); 1298 queue_unlock(&q, hb);
1286 1299
1287 out_release_sem: 1300 out_release_sem:
1288 if (fshared) 1301 futex_unlock_mm(fshared);
1289 up_read(fshared);
1290 return ret; 1302 return ret;
1291} 1303}
1292 1304
@@ -1333,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1333 1345
1334 q.pi_state = NULL; 1346 q.pi_state = NULL;
1335 retry: 1347 retry:
1336 if (fshared) 1348 futex_lock_mm(fshared);
1337 down_read(fshared);
1338 1349
1339 ret = get_futex_key(uaddr, fshared, &q.key); 1350 ret = get_futex_key(uaddr, fshared, &q.key);
1340 if (unlikely(ret != 0)) 1351 if (unlikely(ret != 0))
@@ -1353,9 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1353 */ 1364 */
1354 newval = current->pid; 1365 newval = current->pid;
1355 1366
1356 pagefault_disable(); 1367 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
1357 curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
1358 pagefault_enable();
1359 1368
1360 if (unlikely(curval == -EFAULT)) 1369 if (unlikely(curval == -EFAULT))
1361 goto uaddr_faulted; 1370 goto uaddr_faulted;
@@ -1398,9 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1398 lock_taken = 1; 1407 lock_taken = 1;
1399 } 1408 }
1400 1409
1401 pagefault_disable(); 1410 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1402 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
1403 pagefault_enable();
1404 1411
1405 if (unlikely(curval == -EFAULT)) 1412 if (unlikely(curval == -EFAULT))
1406 goto uaddr_faulted; 1413 goto uaddr_faulted;
@@ -1428,8 +1435,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1428 * exit to complete. 1435 * exit to complete.
1429 */ 1436 */
1430 queue_unlock(&q, hb); 1437 queue_unlock(&q, hb);
1431 if (fshared) 1438 futex_unlock_mm(fshared);
1432 up_read(fshared);
1433 cond_resched(); 1439 cond_resched();
1434 goto retry; 1440 goto retry;
1435 1441
@@ -1465,8 +1471,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1465 * Now the futex is queued and we have checked the data, we 1471 * Now the futex is queued and we have checked the data, we
1466 * don't want to hold mmap_sem while we sleep. 1472 * don't want to hold mmap_sem while we sleep.
1467 */ 1473 */
1468 if (fshared) 1474 futex_unlock_mm(fshared);
1469 up_read(fshared);
1470 1475
1471 WARN_ON(!q.pi_state); 1476 WARN_ON(!q.pi_state);
1472 /* 1477 /*
@@ -1480,8 +1485,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1480 ret = ret ? 0 : -EWOULDBLOCK; 1485 ret = ret ? 0 : -EWOULDBLOCK;
1481 } 1486 }
1482 1487
1483 if (fshared) 1488 futex_lock_mm(fshared);
1484 down_read(fshared);
1485 spin_lock(q.lock_ptr); 1489 spin_lock(q.lock_ptr);
1486 1490
1487 if (!ret) { 1491 if (!ret) {
@@ -1518,8 +1522,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1518 1522
1519 /* Unqueue and drop the lock */ 1523 /* Unqueue and drop the lock */
1520 unqueue_me_pi(&q); 1524 unqueue_me_pi(&q);
1521 if (fshared) 1525 futex_unlock_mm(fshared);
1522 up_read(fshared);
1523 1526
1524 return ret != -EINTR ? ret : -ERESTARTNOINTR; 1527 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1525 1528
@@ -1527,8 +1530,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1527 queue_unlock(&q, hb); 1530 queue_unlock(&q, hb);
1528 1531
1529 out_release_sem: 1532 out_release_sem:
1530 if (fshared) 1533 futex_unlock_mm(fshared);
1531 up_read(fshared);
1532 return ret; 1534 return ret;
1533 1535
1534 uaddr_faulted: 1536 uaddr_faulted:
@@ -1550,8 +1552,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1550 goto retry_unlocked; 1552 goto retry_unlocked;
1551 } 1553 }
1552 1554
1553 if (fshared) 1555 futex_unlock_mm(fshared);
1554 up_read(fshared);
1555 1556
1556 ret = get_user(uval, uaddr); 1557 ret = get_user(uval, uaddr);
1557 if (!ret && (uval != -EFAULT)) 1558 if (!ret && (uval != -EFAULT))
@@ -1585,8 +1586,7 @@ retry:
1585 /* 1586 /*
1586 * First take all the futex related locks: 1587 * First take all the futex related locks:
1587 */ 1588 */
1588 if (fshared) 1589 futex_lock_mm(fshared);
1589 down_read(fshared);
1590 1590
1591 ret = get_futex_key(uaddr, fshared, &key); 1591 ret = get_futex_key(uaddr, fshared, &key);
1592 if (unlikely(ret != 0)) 1592 if (unlikely(ret != 0))
@@ -1601,11 +1601,9 @@ retry_unlocked:
1601 * again. If it succeeds then we can return without waking 1601 * again. If it succeeds then we can return without waking
1602 * anyone else up: 1602 * anyone else up:
1603 */ 1603 */
1604 if (!(uval & FUTEX_OWNER_DIED)) { 1604 if (!(uval & FUTEX_OWNER_DIED))
1605 pagefault_disable(); 1605 uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
1606 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1606
1607 pagefault_enable();
1608 }
1609 1607
1610 if (unlikely(uval == -EFAULT)) 1608 if (unlikely(uval == -EFAULT))
1611 goto pi_faulted; 1609 goto pi_faulted;
@@ -1647,8 +1645,7 @@ retry_unlocked:
1647out_unlock: 1645out_unlock:
1648 spin_unlock(&hb->lock); 1646 spin_unlock(&hb->lock);
1649out: 1647out:
1650 if (fshared) 1648 futex_unlock_mm(fshared);
1651 up_read(fshared);
1652 1649
1653 return ret; 1650 return ret;
1654 1651
@@ -1671,8 +1668,7 @@ pi_faulted:
1671 goto retry_unlocked; 1668 goto retry_unlocked;
1672 } 1669 }
1673 1670
1674 if (fshared) 1671 futex_unlock_mm(fshared);
1675 up_read(fshared);
1676 1672
1677 ret = get_user(uval, uaddr); 1673 ret = get_user(uval, uaddr);
1678 if (!ret && (uval != -EFAULT)) 1674 if (!ret && (uval != -EFAULT))
@@ -1729,8 +1725,8 @@ static int futex_fd(u32 __user *uaddr, int signal)
1729 1725
1730 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { 1726 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
1731 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " 1727 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
1732 "will be removed from the kernel in June 2007\n", 1728 "will be removed from the kernel in June 2007\n",
1733 current->comm); 1729 current->comm);
1734 } 1730 }
1735 1731
1736 ret = -EINVAL; 1732 ret = -EINVAL;
@@ -1908,10 +1904,8 @@ retry:
1908 * Wake robust non-PI futexes here. The wakeup of 1904 * Wake robust non-PI futexes here. The wakeup of
1909 * PI futexes happens in exit_pi_state(): 1905 * PI futexes happens in exit_pi_state():
1910 */ 1906 */
1911 if (!pi) { 1907 if (!pi && (uval & FUTEX_WAITERS))
1912 if (uval & FUTEX_WAITERS)
1913 futex_wake(uaddr, &curr->mm->mmap_sem, 1); 1908 futex_wake(uaddr, &curr->mm->mmap_sem, 1);
1914 }
1915 } 1909 }
1916 return 0; 1910 return 0;
1917} 1911}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index bd9e272d55e9..32b161972fad 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -172,7 +172,17 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
172 irqreturn_t action_ret) 172 irqreturn_t action_ret)
173{ 173{
174 if (unlikely(action_ret != IRQ_HANDLED)) { 174 if (unlikely(action_ret != IRQ_HANDLED)) {
175 desc->irqs_unhandled++; 175 /*
176 * If we are seeing only the odd spurious IRQ caused by
177 * bus asynchronicity then don't eventually trigger an error,
178 * otherwise the couter becomes a doomsday timer for otherwise
179 * working systems
180 */
181 if (jiffies - desc->last_unhandled > HZ/10)
182 desc->irqs_unhandled = 1;
183 else
184 desc->irqs_unhandled++;
185 desc->last_unhandled = jiffies;
176 if (unlikely(action_ret != IRQ_NONE)) 186 if (unlikely(action_ret != IRQ_NONE))
177 report_bad_irq(irq, desc, action_ret); 187 report_bad_irq(irq, desc, action_ret);
178 } 188 }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index fed54418626c..0d662475dd9f 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -317,13 +317,12 @@ int sprint_symbol(char *buffer, unsigned long address)
317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
318 if (!name) 318 if (!name)
319 return sprintf(buffer, "0x%lx", address); 319 return sprintf(buffer, "0x%lx", address);
320 else { 320
321 if (modname) 321 if (modname)
322 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, 322 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
323 size, modname); 323 size, modname);
324 else 324 else
325 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); 325 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
326 }
327} 326}
328 327
329/* Look up a kernel symbol and print it to the kernel messages. */ 328/* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index cee419143fd4..bc41ad0f24f8 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -24,6 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/kfifo.h> 26#include <linux/kfifo.h>
27#include <linux/log2.h>
27 28
28/** 29/**
29 * kfifo_init - allocates a new FIFO using a preallocated buffer 30 * kfifo_init - allocates a new FIFO using a preallocated buffer
@@ -41,7 +42,7 @@ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
41 struct kfifo *fifo; 42 struct kfifo *fifo;
42 43
43 /* size must be a power of 2 */ 44 /* size must be a power of 2 */
44 BUG_ON(size & (size - 1)); 45 BUG_ON(!is_power_of_2(size));
45 46
46 fifo = kmalloc(sizeof(struct kfifo), gfp_mask); 47 fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
47 if (!fifo) 48 if (!fifo)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index bbd51b81a3e8..a404f7ee7395 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -215,7 +215,7 @@ int kthread_stop(struct task_struct *k)
215EXPORT_SYMBOL(kthread_stop); 215EXPORT_SYMBOL(kthread_stop);
216 216
217 217
218static __init void kthreadd_setup(void) 218static noinline __init_refok void kthreadd_setup(void)
219{ 219{
220 struct task_struct *tsk = current; 220 struct task_struct *tsk = current;
221 221
diff --git a/kernel/module.c b/kernel/module.c
index 015d60cfd90e..539fed9ac83c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -61,10 +61,8 @@ extern int module_sysfs_initialized;
61/* If this is set, the section belongs in the init part of the module */ 61/* If this is set, the section belongs in the init part of the module */
62#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 62#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
63 63
64/* Protects module list */ 64/* List of modules, protected by module_mutex or preempt_disable
65static DEFINE_SPINLOCK(modlist_lock); 65 * (add/delete uses stop_machine). */
66
67/* List of modules, protected by module_mutex AND modlist_lock */
68static DEFINE_MUTEX(module_mutex); 66static DEFINE_MUTEX(module_mutex);
69static LIST_HEAD(modules); 67static LIST_HEAD(modules);
70 68
@@ -760,14 +758,13 @@ static void print_unload_info(struct seq_file *m, struct module *mod)
760void __symbol_put(const char *symbol) 758void __symbol_put(const char *symbol)
761{ 759{
762 struct module *owner; 760 struct module *owner;
763 unsigned long flags;
764 const unsigned long *crc; 761 const unsigned long *crc;
765 762
766 spin_lock_irqsave(&modlist_lock, flags); 763 preempt_disable();
767 if (!__find_symbol(symbol, &owner, &crc, 1)) 764 if (!__find_symbol(symbol, &owner, &crc, 1))
768 BUG(); 765 BUG();
769 module_put(owner); 766 module_put(owner);
770 spin_unlock_irqrestore(&modlist_lock, flags); 767 preempt_enable();
771} 768}
772EXPORT_SYMBOL(__symbol_put); 769EXPORT_SYMBOL(__symbol_put);
773 770
@@ -1228,14 +1225,14 @@ static void free_module(struct module *mod)
1228void *__symbol_get(const char *symbol) 1225void *__symbol_get(const char *symbol)
1229{ 1226{
1230 struct module *owner; 1227 struct module *owner;
1231 unsigned long value, flags; 1228 unsigned long value;
1232 const unsigned long *crc; 1229 const unsigned long *crc;
1233 1230
1234 spin_lock_irqsave(&modlist_lock, flags); 1231 preempt_disable();
1235 value = __find_symbol(symbol, &owner, &crc, 1); 1232 value = __find_symbol(symbol, &owner, &crc, 1);
1236 if (value && !strong_try_module_get(owner)) 1233 if (value && !strong_try_module_get(owner))
1237 value = 0; 1234 value = 0;
1238 spin_unlock_irqrestore(&modlist_lock, flags); 1235 preempt_enable();
1239 1236
1240 return (void *)value; 1237 return (void *)value;
1241} 1238}
@@ -2232,26 +2229,13 @@ unsigned long module_kallsyms_lookup_name(const char *name)
2232/* Called by the /proc file system to return a list of modules. */ 2229/* Called by the /proc file system to return a list of modules. */
2233static void *m_start(struct seq_file *m, loff_t *pos) 2230static void *m_start(struct seq_file *m, loff_t *pos)
2234{ 2231{
2235 struct list_head *i;
2236 loff_t n = 0;
2237
2238 mutex_lock(&module_mutex); 2232 mutex_lock(&module_mutex);
2239 list_for_each(i, &modules) { 2233 return seq_list_start(&modules, *pos);
2240 if (n++ == *pos)
2241 break;
2242 }
2243 if (i == &modules)
2244 return NULL;
2245 return i;
2246} 2234}
2247 2235
2248static void *m_next(struct seq_file *m, void *p, loff_t *pos) 2236static void *m_next(struct seq_file *m, void *p, loff_t *pos)
2249{ 2237{
2250 struct list_head *i = p; 2238 return seq_list_next(p, &modules, pos);
2251 (*pos)++;
2252 if (i->next == &modules)
2253 return NULL;
2254 return i->next;
2255} 2239}
2256 2240
2257static void m_stop(struct seq_file *m, void *p) 2241static void m_stop(struct seq_file *m, void *p)
@@ -2321,11 +2305,10 @@ const struct seq_operations modules_op = {
2321/* Given an address, look for it in the module exception tables. */ 2305/* Given an address, look for it in the module exception tables. */
2322const struct exception_table_entry *search_module_extables(unsigned long addr) 2306const struct exception_table_entry *search_module_extables(unsigned long addr)
2323{ 2307{
2324 unsigned long flags;
2325 const struct exception_table_entry *e = NULL; 2308 const struct exception_table_entry *e = NULL;
2326 struct module *mod; 2309 struct module *mod;
2327 2310
2328 spin_lock_irqsave(&modlist_lock, flags); 2311 preempt_disable();
2329 list_for_each_entry(mod, &modules, list) { 2312 list_for_each_entry(mod, &modules, list) {
2330 if (mod->num_exentries == 0) 2313 if (mod->num_exentries == 0)
2331 continue; 2314 continue;
@@ -2336,7 +2319,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2336 if (e) 2319 if (e)
2337 break; 2320 break;
2338 } 2321 }
2339 spin_unlock_irqrestore(&modlist_lock, flags); 2322 preempt_enable();
2340 2323
2341 /* Now, if we found one, we are running inside it now, hence 2324 /* Now, if we found one, we are running inside it now, hence
2342 we cannot unload the module, hence no refcnt needed. */ 2325 we cannot unload the module, hence no refcnt needed. */
@@ -2348,25 +2331,24 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2348 */ 2331 */
2349int is_module_address(unsigned long addr) 2332int is_module_address(unsigned long addr)
2350{ 2333{
2351 unsigned long flags;
2352 struct module *mod; 2334 struct module *mod;
2353 2335
2354 spin_lock_irqsave(&modlist_lock, flags); 2336 preempt_disable();
2355 2337
2356 list_for_each_entry(mod, &modules, list) { 2338 list_for_each_entry(mod, &modules, list) {
2357 if (within(addr, mod->module_core, mod->core_size)) { 2339 if (within(addr, mod->module_core, mod->core_size)) {
2358 spin_unlock_irqrestore(&modlist_lock, flags); 2340 preempt_enable();
2359 return 1; 2341 return 1;
2360 } 2342 }
2361 } 2343 }
2362 2344
2363 spin_unlock_irqrestore(&modlist_lock, flags); 2345 preempt_enable();
2364 2346
2365 return 0; 2347 return 0;
2366} 2348}
2367 2349
2368 2350
2369/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2351/* Is this a valid kernel address? */
2370struct module *__module_text_address(unsigned long addr) 2352struct module *__module_text_address(unsigned long addr)
2371{ 2353{
2372 struct module *mod; 2354 struct module *mod;
@@ -2381,11 +2363,10 @@ struct module *__module_text_address(unsigned long addr)
2381struct module *module_text_address(unsigned long addr) 2363struct module *module_text_address(unsigned long addr)
2382{ 2364{
2383 struct module *mod; 2365 struct module *mod;
2384 unsigned long flags;
2385 2366
2386 spin_lock_irqsave(&modlist_lock, flags); 2367 preempt_disable();
2387 mod = __module_text_address(addr); 2368 mod = __module_text_address(addr);
2388 spin_unlock_irqrestore(&modlist_lock, flags); 2369 preempt_enable();
2389 2370
2390 return mod; 2371 return mod;
2391} 2372}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 9e83b589f754..10f0bbba382b 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -21,6 +21,8 @@
21#include <linux/utsname.h> 21#include <linux/utsname.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23 23
24static struct kmem_cache *nsproxy_cachep;
25
24struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 26struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
25 27
26static inline void get_nsproxy(struct nsproxy *ns) 28static inline void get_nsproxy(struct nsproxy *ns)
@@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
43{ 45{
44 struct nsproxy *ns; 46 struct nsproxy *ns;
45 47
46 ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); 48 ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
47 if (ns) 49 if (ns) {
50 memcpy(ns, orig, sizeof(struct nsproxy));
48 atomic_set(&ns->count, 1); 51 atomic_set(&ns->count, 1);
52 }
49 return ns; 53 return ns;
50} 54}
51 55
@@ -54,33 +58,51 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
54 * Return the newly created nsproxy. Do not attach this to the task, 58 * Return the newly created nsproxy. Do not attach this to the task,
55 * leave it to the caller to do proper locking and attach it to task. 59 * leave it to the caller to do proper locking and attach it to task.
56 */ 60 */
57static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, 61static struct nsproxy *create_new_namespaces(unsigned long flags,
58 struct fs_struct *new_fs) 62 struct task_struct *tsk, struct fs_struct *new_fs)
59{ 63{
60 struct nsproxy *new_nsp; 64 struct nsproxy *new_nsp;
65 int err;
61 66
62 new_nsp = clone_nsproxy(tsk->nsproxy); 67 new_nsp = clone_nsproxy(tsk->nsproxy);
63 if (!new_nsp) 68 if (!new_nsp)
64 return ERR_PTR(-ENOMEM); 69 return ERR_PTR(-ENOMEM);
65 70
66 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 71 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
67 if (IS_ERR(new_nsp->mnt_ns)) 72 if (IS_ERR(new_nsp->mnt_ns)) {
73 err = PTR_ERR(new_nsp->mnt_ns);
68 goto out_ns; 74 goto out_ns;
75 }
69 76
70 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); 77 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
71 if (IS_ERR(new_nsp->uts_ns)) 78 if (IS_ERR(new_nsp->uts_ns)) {
79 err = PTR_ERR(new_nsp->uts_ns);
72 goto out_uts; 80 goto out_uts;
81 }
73 82
74 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 83 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
75 if (IS_ERR(new_nsp->ipc_ns)) 84 if (IS_ERR(new_nsp->ipc_ns)) {
85 err = PTR_ERR(new_nsp->ipc_ns);
76 goto out_ipc; 86 goto out_ipc;
87 }
77 88
78 new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); 89 new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns);
79 if (IS_ERR(new_nsp->pid_ns)) 90 if (IS_ERR(new_nsp->pid_ns)) {
91 err = PTR_ERR(new_nsp->pid_ns);
80 goto out_pid; 92 goto out_pid;
93 }
94
95 new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
96 if (IS_ERR(new_nsp->user_ns)) {
97 err = PTR_ERR(new_nsp->user_ns);
98 goto out_user;
99 }
81 100
82 return new_nsp; 101 return new_nsp;
83 102
103out_user:
104 if (new_nsp->pid_ns)
105 put_pid_ns(new_nsp->pid_ns);
84out_pid: 106out_pid:
85 if (new_nsp->ipc_ns) 107 if (new_nsp->ipc_ns)
86 put_ipc_ns(new_nsp->ipc_ns); 108 put_ipc_ns(new_nsp->ipc_ns);
@@ -91,15 +113,15 @@ out_uts:
91 if (new_nsp->mnt_ns) 113 if (new_nsp->mnt_ns)
92 put_mnt_ns(new_nsp->mnt_ns); 114 put_mnt_ns(new_nsp->mnt_ns);
93out_ns: 115out_ns:
94 kfree(new_nsp); 116 kmem_cache_free(nsproxy_cachep, new_nsp);
95 return ERR_PTR(-ENOMEM); 117 return ERR_PTR(err);
96} 118}
97 119
98/* 120/*
99 * called from clone. This now handles copy for nsproxy and all 121 * called from clone. This now handles copy for nsproxy and all
100 * namespaces therein. 122 * namespaces therein.
101 */ 123 */
102int copy_namespaces(int flags, struct task_struct *tsk) 124int copy_namespaces(unsigned long flags, struct task_struct *tsk)
103{ 125{
104 struct nsproxy *old_ns = tsk->nsproxy; 126 struct nsproxy *old_ns = tsk->nsproxy;
105 struct nsproxy *new_ns; 127 struct nsproxy *new_ns;
@@ -110,7 +132,7 @@ int copy_namespaces(int flags, struct task_struct *tsk)
110 132
111 get_nsproxy(old_ns); 133 get_nsproxy(old_ns);
112 134
113 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) 135 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
114 return 0; 136 return 0;
115 137
116 if (!capable(CAP_SYS_ADMIN)) { 138 if (!capable(CAP_SYS_ADMIN)) {
@@ -140,7 +162,9 @@ void free_nsproxy(struct nsproxy *ns)
140 put_ipc_ns(ns->ipc_ns); 162 put_ipc_ns(ns->ipc_ns);
141 if (ns->pid_ns) 163 if (ns->pid_ns)
142 put_pid_ns(ns->pid_ns); 164 put_pid_ns(ns->pid_ns);
143 kfree(ns); 165 if (ns->user_ns)
166 put_user_ns(ns->user_ns);
167 kmem_cache_free(nsproxy_cachep, ns);
144} 168}
145 169
146/* 170/*
@@ -152,19 +176,10 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
152{ 176{
153 int err = 0; 177 int err = 0;
154 178
155 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) 179 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
180 CLONE_NEWUSER)))
156 return 0; 181 return 0;
157 182
158#ifndef CONFIG_IPC_NS
159 if (unshare_flags & CLONE_NEWIPC)
160 return -EINVAL;
161#endif
162
163#ifndef CONFIG_UTS_NS
164 if (unshare_flags & CLONE_NEWUTS)
165 return -EINVAL;
166#endif
167
168 if (!capable(CAP_SYS_ADMIN)) 183 if (!capable(CAP_SYS_ADMIN))
169 return -EPERM; 184 return -EPERM;
170 185
@@ -174,3 +189,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
174 err = PTR_ERR(*new_nsp); 189 err = PTR_ERR(*new_nsp);
175 return err; 190 return err;
176} 191}
192
193static int __init nsproxy_cache_init(void)
194{
195 nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
196 0, SLAB_PANIC, NULL, NULL);
197 return 0;
198}
199
200module_init(nsproxy_cache_init);
diff --git a/kernel/pid.c b/kernel/pid.c
index eb66bd2953ab..c6e3f9ffff87 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -365,7 +365,7 @@ struct pid *find_ge_pid(int nr)
365} 365}
366EXPORT_SYMBOL_GPL(find_get_pid); 366EXPORT_SYMBOL_GPL(find_get_pid);
367 367
368struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) 368struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
369{ 369{
370 BUG_ON(!old_ns); 370 BUG_ON(!old_ns);
371 get_pid_ns(old_ns); 371 get_pid_ns(old_ns);
diff --git a/kernel/printk.c b/kernel/printk.c
index 0bbdeac2810c..051d27e36a6c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -449,13 +449,16 @@ static int printk_time = 1;
449#else 449#else
450static int printk_time = 0; 450static int printk_time = 0;
451#endif 451#endif
452module_param(printk_time, int, S_IRUGO | S_IWUSR); 452module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
453 453
454static int __init printk_time_setup(char *str) 454static int __init printk_time_setup(char *str)
455{ 455{
456 if (*str) 456 if (*str)
457 return 0; 457 return 0;
458 printk_time = 1; 458 printk_time = 1;
459 printk(KERN_NOTICE "The 'time' option is deprecated and "
460 "is scheduled for removal in early 2008\n");
461 printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n");
459 return 1; 462 return 1;
460} 463}
461 464
@@ -483,6 +486,9 @@ static int have_callable_console(void)
483 * @fmt: format string 486 * @fmt: format string
484 * 487 *
485 * This is printk(). It can be called from any context. We want it to work. 488 * This is printk(). It can be called from any context. We want it to work.
489 * Be aware of the fact that if oops_in_progress is not set, we might try to
490 * wake klogd up which could deadlock on runqueue lock if printk() is called
491 * from scheduler code.
486 * 492 *
487 * We try to grab the console_sem. If we succeed, it's easy - we log the output and 493 * We try to grab the console_sem. If we succeed, it's easy - we log the output and
488 * call the console drivers. If we fail to get the semaphore we place the output 494 * call the console drivers. If we fail to get the semaphore we place the output
@@ -654,7 +660,7 @@ static void call_console_drivers(unsigned long start, unsigned long end)
654 */ 660 */
655static int __init console_setup(char *str) 661static int __init console_setup(char *str)
656{ 662{
657 char name[sizeof(console_cmdline[0].name)]; 663 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
658 char *s, *options; 664 char *s, *options;
659 int idx; 665 int idx;
660 666
@@ -662,27 +668,27 @@ static int __init console_setup(char *str)
662 * Decode str into name, index, options. 668 * Decode str into name, index, options.
663 */ 669 */
664 if (str[0] >= '0' && str[0] <= '9') { 670 if (str[0] >= '0' && str[0] <= '9') {
665 strcpy(name, "ttyS"); 671 strcpy(buf, "ttyS");
666 strncpy(name + 4, str, sizeof(name) - 5); 672 strncpy(buf + 4, str, sizeof(buf) - 5);
667 } else { 673 } else {
668 strncpy(name, str, sizeof(name) - 1); 674 strncpy(buf, str, sizeof(buf) - 1);
669 } 675 }
670 name[sizeof(name) - 1] = 0; 676 buf[sizeof(buf) - 1] = 0;
671 if ((options = strchr(str, ',')) != NULL) 677 if ((options = strchr(str, ',')) != NULL)
672 *(options++) = 0; 678 *(options++) = 0;
673#ifdef __sparc__ 679#ifdef __sparc__
674 if (!strcmp(str, "ttya")) 680 if (!strcmp(str, "ttya"))
675 strcpy(name, "ttyS0"); 681 strcpy(buf, "ttyS0");
676 if (!strcmp(str, "ttyb")) 682 if (!strcmp(str, "ttyb"))
677 strcpy(name, "ttyS1"); 683 strcpy(buf, "ttyS1");
678#endif 684#endif
679 for (s = name; *s; s++) 685 for (s = buf; *s; s++)
680 if ((*s >= '0' && *s <= '9') || *s == ',') 686 if ((*s >= '0' && *s <= '9') || *s == ',')
681 break; 687 break;
682 idx = simple_strtoul(s, NULL, 10); 688 idx = simple_strtoul(s, NULL, 10);
683 *s = 0; 689 *s = 0;
684 690
685 add_preferred_console(name, idx, options); 691 add_preferred_console(buf, idx, options);
686 return 1; 692 return 1;
687} 693}
688__setup("console=", console_setup); 694__setup("console=", console_setup);
@@ -709,7 +715,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
709 * See if this tty is not yet registered, and 715 * See if this tty is not yet registered, and
710 * if we have a slot free. 716 * if we have a slot free.
711 */ 717 */
712 for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) 718 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
713 if (strcmp(console_cmdline[i].name, name) == 0 && 719 if (strcmp(console_cmdline[i].name, name) == 0 &&
714 console_cmdline[i].index == idx) { 720 console_cmdline[i].index == idx) {
715 selected_console = i; 721 selected_console = i;
@@ -726,6 +732,25 @@ int __init add_preferred_console(char *name, int idx, char *options)
726 return 0; 732 return 0;
727} 733}
728 734
735int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
736{
737 struct console_cmdline *c;
738 int i;
739
740 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
741 if (strcmp(console_cmdline[i].name, name) == 0 &&
742 console_cmdline[i].index == idx) {
743 c = &console_cmdline[i];
744 memcpy(c->name, name_new, sizeof(c->name));
745 c->name[sizeof(c->name) - 1] = 0;
746 c->options = options;
747 c->index = idx_new;
748 return i;
749 }
750 /* not found */
751 return -1;
752}
753
729#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND 754#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
730/** 755/**
731 * suspend_console - suspend the console subsystem 756 * suspend_console - suspend the console subsystem
@@ -942,6 +967,9 @@ void register_console(struct console *console)
942 if (preferred_console < 0 || bootconsole || !console_drivers) 967 if (preferred_console < 0 || bootconsole || !console_drivers)
943 preferred_console = selected_console; 968 preferred_console = selected_console;
944 969
970 if (console->early_setup)
971 console->early_setup();
972
945 /* 973 /*
946 * See if we want to use this console driver. If we 974 * See if we want to use this console driver. If we
947 * didn't select a console we take the first one 975 * didn't select a console we take the first one
@@ -985,12 +1013,15 @@ void register_console(struct console *console)
985 if (!(console->flags & CON_ENABLED)) 1013 if (!(console->flags & CON_ENABLED))
986 return; 1014 return;
987 1015
988 if (bootconsole) { 1016 if (bootconsole && (console->flags & CON_CONSDEV)) {
989 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", 1017 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n",
990 bootconsole->name, bootconsole->index, 1018 bootconsole->name, bootconsole->index,
991 console->name, console->index); 1019 console->name, console->index);
992 unregister_console(bootconsole); 1020 unregister_console(bootconsole);
993 console->flags &= ~CON_PRINTBUFFER; 1021 console->flags &= ~CON_PRINTBUFFER;
1022 } else {
1023 printk(KERN_INFO "console [%s%d] enabled\n",
1024 console->name, console->index);
994 } 1025 }
995 1026
996 /* 1027 /*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ad7949a589dd..b1d11f1c7cf7 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -161,6 +161,7 @@ int ptrace_may_attach(struct task_struct *task)
161int ptrace_attach(struct task_struct *task) 161int ptrace_attach(struct task_struct *task)
162{ 162{
163 int retval; 163 int retval;
164 unsigned long flags;
164 165
165 audit_ptrace(task); 166 audit_ptrace(task);
166 167
@@ -181,9 +182,7 @@ repeat:
181 * cpu's that may have task_lock). 182 * cpu's that may have task_lock).
182 */ 183 */
183 task_lock(task); 184 task_lock(task);
184 local_irq_disable(); 185 if (!write_trylock_irqsave(&tasklist_lock, flags)) {
185 if (!write_trylock(&tasklist_lock)) {
186 local_irq_enable();
187 task_unlock(task); 186 task_unlock(task);
188 do { 187 do {
189 cpu_relax(); 188 cpu_relax();
@@ -211,7 +210,7 @@ repeat:
211 force_sig_specific(SIGSTOP, task); 210 force_sig_specific(SIGSTOP, task);
212 211
213bad: 212bad:
214 write_unlock_irq(&tasklist_lock); 213 write_unlock_irqrestore(&tasklist_lock, flags);
215 task_unlock(task); 214 task_unlock(task);
216out: 215out:
217 return retval; 216 return retval;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index da8d6bf46457..5aedbee014df 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -29,12 +29,6 @@
29 29
30#include "rtmutex_common.h" 30#include "rtmutex_common.h"
31 31
32#ifdef CONFIG_DEBUG_RT_MUTEXES
33# include "rtmutex-debug.h"
34#else
35# include "rtmutex.h"
36#endif
37
38# define TRACE_WARN_ON(x) WARN_ON(x) 32# define TRACE_WARN_ON(x) WARN_ON(x)
39# define TRACE_BUG_ON(x) BUG_ON(x) 33# define TRACE_BUG_ON(x) BUG_ON(x)
40 34
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 17d28ce20300..8cd9bd2cdb34 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -17,12 +17,6 @@
17 17
18#include "rtmutex_common.h" 18#include "rtmutex_common.h"
19 19
20#ifdef CONFIG_DEBUG_RT_MUTEXES
21# include "rtmutex-debug.h"
22#else
23# include "rtmutex.h"
24#endif
25
26/* 20/*
27 * lock->owner state tracking: 21 * lock->owner state tracking:
28 * 22 *
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 9c75856e791e..2d3b83593ca3 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -103,7 +103,7 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
103 103
104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) 104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
105{ 105{
106 return (struct task_struct *) 106 return (struct task_struct *)
107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
108} 108}
109 109
@@ -120,4 +120,11 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
120 struct task_struct *proxy_owner); 120 struct task_struct *proxy_owner);
121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, 121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
122 struct task_struct *proxy_owner); 122 struct task_struct *proxy_owner);
123
124#ifdef CONFIG_DEBUG_RT_MUTEXES
125# include "rtmutex-debug.h"
126#else
127# include "rtmutex.h"
128#endif
129
123#endif 130#endif
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index c3391b6020e8..ad64fcb731f2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -10,6 +10,7 @@
10#include <linux/sched.h> 10#include <linux/sched.h>
11 11
12/* #define SECCOMP_DEBUG 1 */ 12/* #define SECCOMP_DEBUG 1 */
13#define NR_SECCOMP_MODES 1
13 14
14/* 15/*
15 * Secure computing mode 1 allows only read/write/exit/sigreturn. 16 * Secure computing mode 1 allows only read/write/exit/sigreturn.
@@ -54,3 +55,31 @@ void __secure_computing(int this_syscall)
54#endif 55#endif
55 do_exit(SIGKILL); 56 do_exit(SIGKILL);
56} 57}
58
59long prctl_get_seccomp(void)
60{
61 return current->seccomp.mode;
62}
63
64long prctl_set_seccomp(unsigned long seccomp_mode)
65{
66 long ret;
67
68 /* can set it only once to be even more secure */
69 ret = -EPERM;
70 if (unlikely(current->seccomp.mode))
71 goto out;
72
73 ret = -EINVAL;
74 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
75 current->seccomp.mode = seccomp_mode;
76 set_thread_flag(TIF_SECCOMP);
77#ifdef TIF_NOTSC
78 disable_TSC();
79#endif
80 ret = 0;
81 }
82
83 out:
84 return ret;
85}
diff --git a/kernel/signal.c b/kernel/signal.c
index f9405609774e..39d122753bac 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -718,6 +718,37 @@ out_set:
718#define LEGACY_QUEUE(sigptr, sig) \ 718#define LEGACY_QUEUE(sigptr, sig) \
719 (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) 719 (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
720 720
721int print_fatal_signals;
722
723static void print_fatal_signal(struct pt_regs *regs, int signr)
724{
725 printk("%s/%d: potentially unexpected fatal signal %d.\n",
726 current->comm, current->pid, signr);
727
728#ifdef __i386__
729 printk("code at %08lx: ", regs->eip);
730 {
731 int i;
732 for (i = 0; i < 16; i++) {
733 unsigned char insn;
734
735 __get_user(insn, (unsigned char *)(regs->eip + i));
736 printk("%02x ", insn);
737 }
738 }
739#endif
740 printk("\n");
741 show_regs(regs);
742}
743
744static int __init setup_print_fatal_signals(char *str)
745{
746 get_option (&str, &print_fatal_signals);
747
748 return 1;
749}
750
751__setup("print-fatal-signals=", setup_print_fatal_signals);
721 752
722static int 753static int
723specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) 754specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
@@ -1855,6 +1886,8 @@ relock:
1855 * Anything else is fatal, maybe with a core dump. 1886 * Anything else is fatal, maybe with a core dump.
1856 */ 1887 */
1857 current->flags |= PF_SIGNALED; 1888 current->flags |= PF_SIGNALED;
1889 if ((signr != SIGKILL) && print_fatal_signals)
1890 print_fatal_signal(regs, signr);
1858 if (sig_kernel_coredump(signr)) { 1891 if (sig_kernel_coredump(signr)) {
1859 /* 1892 /*
1860 * If it was able to dump core, this kills all 1893 * If it was able to dump core, this kills all
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 73217a9e2875..8de267790166 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -614,12 +614,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
614 kthread_bind(per_cpu(ksoftirqd, hotcpu), 614 kthread_bind(per_cpu(ksoftirqd, hotcpu),
615 any_online_cpu(cpu_online_map)); 615 any_online_cpu(cpu_online_map));
616 case CPU_DEAD: 616 case CPU_DEAD:
617 case CPU_DEAD_FROZEN: 617 case CPU_DEAD_FROZEN: {
618 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
619
618 p = per_cpu(ksoftirqd, hotcpu); 620 p = per_cpu(ksoftirqd, hotcpu);
619 per_cpu(ksoftirqd, hotcpu) = NULL; 621 per_cpu(ksoftirqd, hotcpu) = NULL;
622 sched_setscheduler(p, SCHED_FIFO, &param);
620 kthread_stop(p); 623 kthread_stop(p);
621 takeover_tasklets(hotcpu); 624 takeover_tasklets(hotcpu);
622 break; 625 break;
626 }
623#endif /* CONFIG_HOTPLUG_CPU */ 627#endif /* CONFIG_HOTPLUG_CPU */
624 } 628 }
625 return NOTIFY_OK; 629 return NOTIFY_OK;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index fcee2a8e6da3..319821ef78af 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -93,10 +93,6 @@ static void stopmachine_set_state(enum stopmachine_state state)
93static int stop_machine(void) 93static int stop_machine(void)
94{ 94{
95 int i, ret = 0; 95 int i, ret = 0;
96 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
97
98 /* One high-prio thread per cpu. We'll do this one. */
99 sched_setscheduler(current, SCHED_FIFO, &param);
100 96
101 atomic_set(&stopmachine_thread_ack, 0); 97 atomic_set(&stopmachine_thread_ack, 0);
102 stopmachine_num_threads = 0; 98 stopmachine_num_threads = 0;
@@ -189,6 +185,10 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
189 185
190 p = kthread_create(do_stop, &smdata, "kstopmachine"); 186 p = kthread_create(do_stop, &smdata, "kstopmachine");
191 if (!IS_ERR(p)) { 187 if (!IS_ERR(p)) {
188 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
189
190 /* One high-prio thread per cpu. We'll do this one. */
191 sched_setscheduler(p, SCHED_FIFO, &param);
192 kthread_bind(p, cpu); 192 kthread_bind(p, cpu);
193 wake_up_process(p); 193 wake_up_process(p);
194 wait_for_completion(&smdata.done); 194 wait_for_completion(&smdata.done);
diff --git a/kernel/sys.c b/kernel/sys.c
index 872271ccc384..4d141ae3e802 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -31,10 +31,12 @@
31#include <linux/cn_proc.h> 31#include <linux/cn_proc.h>
32#include <linux/getcpu.h> 32#include <linux/getcpu.h>
33#include <linux/task_io_accounting_ops.h> 33#include <linux/task_io_accounting_ops.h>
34#include <linux/seccomp.h>
34 35
35#include <linux/compat.h> 36#include <linux/compat.h>
36#include <linux/syscalls.h> 37#include <linux/syscalls.h>
37#include <linux/kprobes.h> 38#include <linux/kprobes.h>
39#include <linux/user_namespace.h>
38 40
39#include <asm/uaccess.h> 41#include <asm/uaccess.h>
40#include <asm/io.h> 42#include <asm/io.h>
@@ -1078,13 +1080,13 @@ static int set_user(uid_t new_ruid, int dumpclear)
1078{ 1080{
1079 struct user_struct *new_user; 1081 struct user_struct *new_user;
1080 1082
1081 new_user = alloc_uid(new_ruid); 1083 new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
1082 if (!new_user) 1084 if (!new_user)
1083 return -EAGAIN; 1085 return -EAGAIN;
1084 1086
1085 if (atomic_read(&new_user->processes) >= 1087 if (atomic_read(&new_user->processes) >=
1086 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 1088 current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
1087 new_user != &root_user) { 1089 new_user != current->nsproxy->user_ns->root_user) {
1088 free_uid(new_user); 1090 free_uid(new_user);
1089 return -EAGAIN; 1091 return -EAGAIN;
1090 } 1092 }
@@ -2241,6 +2243,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
2241 error = SET_ENDIAN(current, arg2); 2243 error = SET_ENDIAN(current, arg2);
2242 break; 2244 break;
2243 2245
2246 case PR_GET_SECCOMP:
2247 error = prctl_get_seccomp();
2248 break;
2249 case PR_SET_SECCOMP:
2250 error = prctl_set_seccomp(arg2);
2251 break;
2252
2244 default: 2253 default:
2245 error = -EINVAL; 2254 error = -EINVAL;
2246 break; 2255 break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7e11e2c98bf9..b0ec498a18d9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -14,6 +14,7 @@ asmlinkage long sys_ni_syscall(void)
14 14
15cond_syscall(sys_nfsservctl); 15cond_syscall(sys_nfsservctl);
16cond_syscall(sys_quotactl); 16cond_syscall(sys_quotactl);
17cond_syscall(sys32_quotactl);
17cond_syscall(sys_acct); 18cond_syscall(sys_acct);
18cond_syscall(sys_lookup_dcookie); 19cond_syscall(sys_lookup_dcookie);
19cond_syscall(sys_swapon); 20cond_syscall(sys_swapon);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d93e13d93f24..7dca326648d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
61 61
62/* External variables not in a header file. */ 62/* External variables not in a header file. */
63extern int C_A_D; 63extern int C_A_D;
64extern int print_fatal_signals;
64extern int sysctl_overcommit_memory; 65extern int sysctl_overcommit_memory;
65extern int sysctl_overcommit_ratio; 66extern int sysctl_overcommit_ratio;
66extern int sysctl_panic_on_oom; 67extern int sysctl_panic_on_oom;
@@ -202,7 +203,10 @@ static ctl_table root_table[] = {
202 .mode = 0555, 203 .mode = 0555,
203 .child = dev_table, 204 .child = dev_table,
204 }, 205 },
205 206/*
207 * NOTE: do not add new entries to this table unless you have read
208 * Documentation/sysctl/ctl_unnumbered.txt
209 */
206 { .ctl_name = 0 } 210 { .ctl_name = 0 }
207}; 211};
208 212
@@ -340,6 +344,14 @@ static ctl_table kern_table[] = {
340 .proc_handler = &proc_dointvec, 344 .proc_handler = &proc_dointvec,
341 }, 345 },
342#endif 346#endif
347 {
348 .ctl_name = CTL_UNNUMBERED,
349 .procname = "print-fatal-signals",
350 .data = &print_fatal_signals,
351 .maxlen = sizeof(int),
352 .mode = 0644,
353 .proc_handler = &proc_dointvec,
354 },
343#ifdef __sparc__ 355#ifdef __sparc__
344 { 356 {
345 .ctl_name = KERN_SPARC_REBOOT, 357 .ctl_name = KERN_SPARC_REBOOT,
@@ -958,6 +970,17 @@ static ctl_table vm_table[] = {
958 .mode = 0644, 970 .mode = 0644,
959 .proc_handler = &proc_doulongvec_minmax, 971 .proc_handler = &proc_doulongvec_minmax,
960 }, 972 },
973#ifdef CONFIG_NUMA
974 {
975 .ctl_name = CTL_UNNUMBERED,
976 .procname = "numa_zonelist_order",
977 .data = &numa_zonelist_order,
978 .maxlen = NUMA_ZONELIST_ORDER_LEN,
979 .mode = 0644,
980 .proc_handler = &numa_zonelist_order_handler,
981 .strategy = &sysctl_string,
982 },
983#endif
961#endif 984#endif
962#if defined(CONFIG_X86_32) || \ 985#if defined(CONFIG_X86_32) || \
963 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 986 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
@@ -972,6 +995,14 @@ static ctl_table vm_table[] = {
972 .extra1 = &zero, 995 .extra1 = &zero,
973 }, 996 },
974#endif 997#endif
998/*
999 * NOTE: do not add new entries to this table unless you have read
1000 * Documentation/sysctl/ctl_unnumbered.txt
1001 */
1002/*
1003 * NOTE: do not add new entries to this table unless you have read
1004 * Documentation/sysctl/ctl_unnumbered.txt
1005 */
975 { .ctl_name = 0 } 1006 { .ctl_name = 0 }
976}; 1007};
977 1008
@@ -1112,6 +1143,14 @@ static ctl_table fs_table[] = {
1112 .child = binfmt_misc_table, 1143 .child = binfmt_misc_table,
1113 }, 1144 },
1114#endif 1145#endif
1146/*
1147 * NOTE: do not add new entries to this table unless you have read
1148 * Documentation/sysctl/ctl_unnumbered.txt
1149 */
1150/*
1151 * NOTE: do not add new entries to this table unless you have read
1152 * Documentation/sysctl/ctl_unnumbered.txt
1153 */
1115 { .ctl_name = 0 } 1154 { .ctl_name = 0 }
1116}; 1155};
1117 1156
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 906cae771585..059431ed67db 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
196 196
197 /* fill in basic acct fields */ 197 /* fill in basic acct fields */
198 stats->version = TASKSTATS_VERSION; 198 stats->version = TASKSTATS_VERSION;
199 stats->nvcsw = tsk->nvcsw;
200 stats->nivcsw = tsk->nivcsw;
199 bacct_add_tsk(stats, tsk); 201 bacct_add_tsk(stats, tsk);
200 202
201 /* fill in extended acct fields */ 203 /* fill in extended acct fields */
@@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
242 */ 244 */
243 delayacct_add_tsk(stats, tsk); 245 delayacct_add_tsk(stats, tsk);
244 246
247 stats->nvcsw += tsk->nvcsw;
248 stats->nivcsw += tsk->nivcsw;
245 } while_each_thread(first, tsk); 249 } while_each_thread(first, tsk);
246 250
247 unlock_task_sighand(first, &flags); 251 unlock_task_sighand(first, &flags);
diff --git a/kernel/time.c b/kernel/time.c
index f04791f69408..ffe19149d770 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz);
57 */ 57 */
58asmlinkage long sys_time(time_t __user * tloc) 58asmlinkage long sys_time(time_t __user * tloc)
59{ 59{
60 time_t i; 60 /*
61 struct timeval tv; 61 * We read xtime.tv_sec atomically - it's updated
62 * atomically by update_wall_time(), so no need to
63 * even read-lock the xtime seqlock:
64 */
65 time_t i = xtime.tv_sec;
62 66
63 do_gettimeofday(&tv); 67 smp_rmb(); /* sys_time() results are coherent */
64 i = tv.tv_sec;
65 68
66 if (tloc) { 69 if (tloc) {
67 if (put_user(i,tloc)) 70 if (put_user(i, tloc))
68 i = -EFAULT; 71 i = -EFAULT;
69 } 72 }
70 return i; 73 return i;
@@ -373,12 +376,25 @@ void do_gettimeofday (struct timeval *tv)
373 376
374 tv->tv_sec = sec; 377 tv->tv_sec = sec;
375 tv->tv_usec = usec; 378 tv->tv_usec = usec;
376}
377 379
380 /*
381 * Make sure xtime.tv_sec [returned by sys_time()] always
382 * follows the gettimeofday() result precisely. This
383 * condition is extremely unlikely, it can hit at most
384 * once per second:
385 */
386 if (unlikely(xtime.tv_sec != tv->tv_sec)) {
387 unsigned long flags;
388
389 write_seqlock_irqsave(&xtime_lock, flags);
390 update_wall_time();
391 write_sequnlock_irqrestore(&xtime_lock, flags);
392 }
393}
378EXPORT_SYMBOL(do_gettimeofday); 394EXPORT_SYMBOL(do_gettimeofday);
379 395
396#else /* CONFIG_TIME_INTERPOLATION */
380 397
381#else
382#ifndef CONFIG_GENERIC_TIME 398#ifndef CONFIG_GENERIC_TIME
383/* 399/*
384 * Simulate gettimeofday using do_gettimeofday which only allows a timeval 400 * Simulate gettimeofday using do_gettimeofday which only allows a timeval
@@ -394,7 +410,7 @@ void getnstimeofday(struct timespec *tv)
394} 410}
395EXPORT_SYMBOL_GPL(getnstimeofday); 411EXPORT_SYMBOL_GPL(getnstimeofday);
396#endif 412#endif
397#endif 413#endif /* CONFIG_TIME_INTERPOLATION */
398 414
399/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. 415/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
400 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 416 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 76212b2a99de..2ad1c37b8dfe 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -205,47 +205,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
205} 205}
206 206
207/** 207/**
208 * clockevents_request_device
209 */
210struct clock_event_device *clockevents_request_device(unsigned int features,
211 cpumask_t cpumask)
212{
213 struct clock_event_device *cur, *dev = NULL;
214 struct list_head *tmp;
215
216 spin_lock(&clockevents_lock);
217
218 list_for_each(tmp, &clockevent_devices) {
219 cur = list_entry(tmp, struct clock_event_device, list);
220
221 if ((cur->features & features) == features &&
222 cpus_equal(cpumask, cur->cpumask)) {
223 if (!dev || dev->rating < cur->rating)
224 dev = cur;
225 }
226 }
227
228 clockevents_exchange_device(NULL, dev);
229
230 spin_unlock(&clockevents_lock);
231
232 return dev;
233}
234
235/**
236 * clockevents_release_device
237 */
238void clockevents_release_device(struct clock_event_device *dev)
239{
240 spin_lock(&clockevents_lock);
241
242 clockevents_exchange_device(dev, NULL);
243 clockevents_notify_released();
244
245 spin_unlock(&clockevents_lock);
246}
247
248/**
249 * clockevents_notify - notification about relevant events 208 * clockevents_notify - notification about relevant events
250 */ 209 */
251void clockevents_notify(unsigned long reason, void *arg) 210void clockevents_notify(unsigned long reason, void *arg)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index cf53bb5814cb..438c6b723ee2 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -13,7 +13,7 @@
13#include <linux/timex.h> 13#include <linux/timex.h>
14#include <linux/jiffies.h> 14#include <linux/jiffies.h>
15#include <linux/hrtimer.h> 15#include <linux/hrtimer.h>
16 16#include <linux/capability.h>
17#include <asm/div64.h> 17#include <asm/div64.h>
18#include <asm/timex.h> 18#include <asm/timex.h>
19 19
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3d1042f82a68..728cedfd3cbd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -36,9 +36,17 @@ EXPORT_SYMBOL(xtime_lock);
36 * at zero at system boot time, so wall_to_monotonic will be negative, 36 * at zero at system boot time, so wall_to_monotonic will be negative,
37 * however, we will ALWAYS keep the tv_nsec part positive so we can use 37 * however, we will ALWAYS keep the tv_nsec part positive so we can use
38 * the usual normalization. 38 * the usual normalization.
39 *
40 * wall_to_monotonic is moved after resume from suspend for the monotonic
41 * time not to jump. We need to add total_sleep_time to wall_to_monotonic
42 * to get the real boot based time offset.
43 *
44 * - wall_to_monotonic is no longer the boot time, getboottime must be
45 * used instead.
39 */ 46 */
40struct timespec xtime __attribute__ ((aligned (16))); 47struct timespec xtime __attribute__ ((aligned (16)));
41struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 48struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
49static unsigned long total_sleep_time; /* seconds */
42 50
43EXPORT_SYMBOL(xtime); 51EXPORT_SYMBOL(xtime);
44 52
@@ -251,6 +259,7 @@ void __init timekeeping_init(void)
251 xtime.tv_nsec = 0; 259 xtime.tv_nsec = 0;
252 set_normalized_timespec(&wall_to_monotonic, 260 set_normalized_timespec(&wall_to_monotonic,
253 -xtime.tv_sec, -xtime.tv_nsec); 261 -xtime.tv_sec, -xtime.tv_nsec);
262 total_sleep_time = 0;
254 263
255 write_sequnlock_irqrestore(&xtime_lock, flags); 264 write_sequnlock_irqrestore(&xtime_lock, flags);
256} 265}
@@ -282,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev)
282 291
283 xtime.tv_sec += sleep_length; 292 xtime.tv_sec += sleep_length;
284 wall_to_monotonic.tv_sec -= sleep_length; 293 wall_to_monotonic.tv_sec -= sleep_length;
294 total_sleep_time += sleep_length;
285 } 295 }
286 /* re-base the last cycle value */ 296 /* re-base the last cycle value */
287 clock->cycle_last = clocksource_read(clock); 297 clock->cycle_last = clocksource_read(clock);
@@ -476,3 +486,30 @@ void update_wall_time(void)
476 change_clocksource(); 486 change_clocksource();
477 update_vsyscall(&xtime, clock); 487 update_vsyscall(&xtime, clock);
478} 488}
489
490/**
491 * getboottime - Return the real time of system boot.
492 * @ts: pointer to the timespec to be set
493 *
494 * Returns the time of day in a timespec.
495 *
496 * This is based on the wall_to_monotonic offset and the total suspend
497 * time. Calls to settimeofday will affect the value returned (which
498 * basically means that however wrong your real time clock is at boot time,
499 * you get the right time here).
500 */
501void getboottime(struct timespec *ts)
502{
503 set_normalized_timespec(ts,
504 - (wall_to_monotonic.tv_sec + total_sleep_time),
505 - wall_to_monotonic.tv_nsec);
506}
507
508/**
509 * monotonic_to_bootbased - Convert the monotonic time to boot based.
510 * @ts: pointer to the timespec to be converted
511 */
512void monotonic_to_bootbased(struct timespec *ts)
513{
514 ts->tv_sec += total_sleep_time;
515}
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 321693724ad7..9b8a826236dd 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -68,6 +68,7 @@ struct entry {
68 * Number of timeout events: 68 * Number of timeout events:
69 */ 69 */
70 unsigned long count; 70 unsigned long count;
71 unsigned int timer_flag;
71 72
72 /* 73 /*
73 * We save the command-line string to preserve 74 * We save the command-line string to preserve
@@ -231,7 +232,8 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
231 * incremented. Otherwise the timer is registered in a free slot. 232 * incremented. Otherwise the timer is registered in a free slot.
232 */ 233 */
233void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 234void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
234 void *timerf, char * comm) 235 void *timerf, char *comm,
236 unsigned int timer_flag)
235{ 237{
236 /* 238 /*
237 * It doesnt matter which lock we take: 239 * It doesnt matter which lock we take:
@@ -249,6 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
249 input.start_func = startf; 251 input.start_func = startf;
250 input.expire_func = timerf; 252 input.expire_func = timerf;
251 input.pid = pid; 253 input.pid = pid;
254 input.timer_flag = timer_flag;
252 255
253 spin_lock_irqsave(lock, flags); 256 spin_lock_irqsave(lock, flags);
254 if (!active) 257 if (!active)
@@ -295,7 +298,7 @@ static int tstats_show(struct seq_file *m, void *v)
295 period = ktime_to_timespec(time); 298 period = ktime_to_timespec(time);
296 ms = period.tv_nsec / 1000000; 299 ms = period.tv_nsec / 1000000;
297 300
298 seq_puts(m, "Timer Stats Version: v0.1\n"); 301 seq_puts(m, "Timer Stats Version: v0.2\n");
299 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); 302 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
300 if (atomic_read(&overflow_count)) 303 if (atomic_read(&overflow_count))
301 seq_printf(m, "Overflow: %d entries\n", 304 seq_printf(m, "Overflow: %d entries\n",
@@ -303,8 +306,13 @@ static int tstats_show(struct seq_file *m, void *v)
303 306
304 for (i = 0; i < nr_entries; i++) { 307 for (i = 0; i < nr_entries; i++) {
305 entry = entries + i; 308 entry = entries + i;
306 seq_printf(m, "%4lu, %5d %-16s ", 309 if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
310 seq_printf(m, "%4luD, %5d %-16s ",
307 entry->count, entry->pid, entry->comm); 311 entry->count, entry->pid, entry->comm);
312 } else {
313 seq_printf(m, " %4lu, %5d %-16s ",
314 entry->count, entry->pid, entry->comm);
315 }
308 316
309 print_name_offset(m, (unsigned long)entry->start_func); 317 print_name_offset(m, (unsigned long)entry->start_func);
310 seq_puts(m, " ("); 318 seq_puts(m, " (");
diff --git a/kernel/timer.c b/kernel/timer.c
index 1a69705c2fb9..1258371e0d2b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -305,6 +305,20 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
306 timer->start_pid = current->pid; 306 timer->start_pid = current->pid;
307} 307}
308
309static void timer_stats_account_timer(struct timer_list *timer)
310{
311 unsigned int flag = 0;
312
313 if (unlikely(tbase_get_deferrable(timer->base)))
314 flag |= TIMER_STATS_FLAG_DEFERRABLE;
315
316 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
317 timer->function, timer->start_comm, flag);
318}
319
320#else
321static void timer_stats_account_timer(struct timer_list *timer) {}
308#endif 322#endif
309 323
310/** 324/**
@@ -1114,6 +1128,7 @@ int do_sysinfo(struct sysinfo *info)
1114 getnstimeofday(&tp); 1128 getnstimeofday(&tp);
1115 tp.tv_sec += wall_to_monotonic.tv_sec; 1129 tp.tv_sec += wall_to_monotonic.tv_sec;
1116 tp.tv_nsec += wall_to_monotonic.tv_nsec; 1130 tp.tv_nsec += wall_to_monotonic.tv_nsec;
1131 monotonic_to_bootbased(&tp);
1117 if (tp.tv_nsec - NSEC_PER_SEC >= 0) { 1132 if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
1118 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; 1133 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1119 tp.tv_sec++; 1134 tp.tv_sec++;
diff --git a/kernel/user.c b/kernel/user.c
index 4869563080e9..98b82507797a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -14,20 +14,19 @@
14#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/key.h> 15#include <linux/key.h>
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/module.h>
18#include <linux/user_namespace.h>
17 19
18/* 20/*
19 * UID task count cache, to get fast user lookup in "alloc_uid" 21 * UID task count cache, to get fast user lookup in "alloc_uid"
20 * when changing user ID's (ie setuid() and friends). 22 * when changing user ID's (ie setuid() and friends).
21 */ 23 */
22 24
23#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
24#define UIDHASH_SZ (1 << UIDHASH_BITS)
25#define UIDHASH_MASK (UIDHASH_SZ - 1) 25#define UIDHASH_MASK (UIDHASH_SZ - 1)
26#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) 26#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
27#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) 27#define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid)))
28 28
29static struct kmem_cache *uid_cachep; 29static struct kmem_cache *uid_cachep;
30static struct list_head uidhash_table[UIDHASH_SZ];
31 30
32/* 31/*
33 * The uidhash_lock is mostly taken from process context, but it is 32 * The uidhash_lock is mostly taken from process context, but it is
@@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid)
94{ 93{
95 struct user_struct *ret; 94 struct user_struct *ret;
96 unsigned long flags; 95 unsigned long flags;
96 struct user_namespace *ns = current->nsproxy->user_ns;
97 97
98 spin_lock_irqsave(&uidhash_lock, flags); 98 spin_lock_irqsave(&uidhash_lock, flags);
99 ret = uid_hash_find(uid, uidhashentry(uid)); 99 ret = uid_hash_find(uid, uidhashentry(ns, uid));
100 spin_unlock_irqrestore(&uidhash_lock, flags); 100 spin_unlock_irqrestore(&uidhash_lock, flags);
101 return ret; 101 return ret;
102} 102}
@@ -120,9 +120,9 @@ void free_uid(struct user_struct *up)
120 } 120 }
121} 121}
122 122
123struct user_struct * alloc_uid(uid_t uid) 123struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
124{ 124{
125 struct list_head *hashent = uidhashentry(uid); 125 struct list_head *hashent = uidhashentry(ns, uid);
126 struct user_struct *up; 126 struct user_struct *up;
127 127
128 spin_lock_irq(&uidhash_lock); 128 spin_lock_irq(&uidhash_lock);
@@ -211,11 +211,11 @@ static int __init uid_cache_init(void)
211 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 211 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
212 212
213 for(n = 0; n < UIDHASH_SZ; ++n) 213 for(n = 0; n < UIDHASH_SZ; ++n)
214 INIT_LIST_HEAD(uidhash_table + n); 214 INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
215 215
216 /* Insert the root user immediately (init already runs as root) */ 216 /* Insert the root user immediately (init already runs as root) */
217 spin_lock_irq(&uidhash_lock); 217 spin_lock_irq(&uidhash_lock);
218 uid_hash_insert(&root_user, uidhashentry(0)); 218 uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
219 spin_unlock_irq(&uidhash_lock); 219 spin_unlock_irq(&uidhash_lock);
220 220
221 return 0; 221 return 0;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
new file mode 100644
index 000000000000..d055d987850c
--- /dev/null
+++ b/kernel/user_namespace.c
@@ -0,0 +1,87 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License as
4 * published by the Free Software Foundation, version 2 of the
5 * License.
6 */
7
8#include <linux/module.h>
9#include <linux/version.h>
10#include <linux/nsproxy.h>
11#include <linux/user_namespace.h>
12
13struct user_namespace init_user_ns = {
14 .kref = {
15 .refcount = ATOMIC_INIT(2),
16 },
17 .root_user = &root_user,
18};
19
20EXPORT_SYMBOL_GPL(init_user_ns);
21
22#ifdef CONFIG_USER_NS
23
24/*
25 * Clone a new ns copying an original user ns, setting refcount to 1
26 * @old_ns: namespace to clone
27 * Return NULL on error (failure to kmalloc), new ns otherwise
28 */
29static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
30{
31 struct user_namespace *ns;
32 struct user_struct *new_user;
33 int n;
34
35 ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
36 if (!ns)
37 return ERR_PTR(-ENOMEM);
38
39 kref_init(&ns->kref);
40
41 for (n = 0; n < UIDHASH_SZ; ++n)
42 INIT_LIST_HEAD(ns->uidhash_table + n);
43
44 /* Insert new root user. */
45 ns->root_user = alloc_uid(ns, 0);
46 if (!ns->root_user) {
47 kfree(ns);
48 return ERR_PTR(-ENOMEM);
49 }
50
51 /* Reset current->user with a new one */
52 new_user = alloc_uid(ns, current->uid);
53 if (!new_user) {
54 free_uid(ns->root_user);
55 kfree(ns);
56 return ERR_PTR(-ENOMEM);
57 }
58
59 switch_uid(new_user);
60 return ns;
61}
62
63struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
64{
65 struct user_namespace *new_ns;
66
67 BUG_ON(!old_ns);
68 get_user_ns(old_ns);
69
70 if (!(flags & CLONE_NEWUSER))
71 return old_ns;
72
73 new_ns = clone_user_ns(old_ns);
74
75 put_user_ns(old_ns);
76 return new_ns;
77}
78
79void free_user_ns(struct kref *kref)
80{
81 struct user_namespace *ns;
82
83 ns = container_of(kref, struct user_namespace, kref);
84 kfree(ns);
85}
86
87#endif /* CONFIG_USER_NS */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 160c8c5136bd..9d8180a0f0d8 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -13,6 +13,7 @@
13#include <linux/uts.h> 13#include <linux/uts.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/version.h> 15#include <linux/version.h>
16#include <linux/err.h>
16 17
17/* 18/*
18 * Clone a new ns copying an original utsname, setting refcount to 1 19 * Clone a new ns copying an original utsname, setting refcount to 1
@@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
24 struct uts_namespace *ns; 25 struct uts_namespace *ns;
25 26
26 ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); 27 ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
27 if (ns) { 28 if (!ns)
28 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 29 return ERR_PTR(-ENOMEM);
29 kref_init(&ns->kref); 30
30 } 31 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
32 kref_init(&ns->kref);
31 return ns; 33 return ns;
32} 34}
33 35
@@ -37,7 +39,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
37 * utsname of this process won't be seen by parent, and vice 39 * utsname of this process won't be seen by parent, and vice
38 * versa. 40 * versa.
39 */ 41 */
40struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) 42struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
41{ 43{
42 struct uts_namespace *new_ns; 44 struct uts_namespace *new_ns;
43 45
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index f22b9dbd2a9c..c76c06466bfd 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -18,10 +18,7 @@
18static void *get_uts(ctl_table *table, int write) 18static void *get_uts(ctl_table *table, int write)
19{ 19{
20 char *which = table->data; 20 char *which = table->data;
21#ifdef CONFIG_UTS_NS 21
22 struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
23 which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
24#endif
25 if (!write) 22 if (!write)
26 down_read(&uts_sem); 23 down_read(&uts_sem);
27 else 24 else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3bebf73be976..d7d3fa3072e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
382EXPORT_SYMBOL_GPL(flush_workqueue); 382EXPORT_SYMBOL_GPL(flush_workqueue);
383 383
384/* 384/*
385 * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit, 385 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
386 * so this work can't be re-armed in any way. 386 * so this work can't be re-armed in any way.
387 */ 387 */
388static int try_to_grab_pending(struct work_struct *work) 388static int try_to_grab_pending(struct work_struct *work)
389{ 389{
390 struct cpu_workqueue_struct *cwq; 390 struct cpu_workqueue_struct *cwq;
391 int ret = 0; 391 int ret = -1;
392 392
393 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) 393 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
394 return 1; 394 return 0;
395 395
396 /* 396 /*
397 * The queueing is in progress, or it is already queued. Try to 397 * The queueing is in progress, or it is already queued. Try to
@@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work)
457 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 457 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
458} 458}
459 459
460static int __cancel_work_timer(struct work_struct *work,
461 struct timer_list* timer)
462{
463 int ret;
464
465 do {
466 ret = (timer && likely(del_timer(timer)));
467 if (!ret)
468 ret = try_to_grab_pending(work);
469 wait_on_work(work);
470 } while (unlikely(ret < 0));
471
472 work_clear_pending(work);
473 return ret;
474}
475
460/** 476/**
461 * cancel_work_sync - block until a work_struct's callback has terminated 477 * cancel_work_sync - block until a work_struct's callback has terminated
462 * @work: the work which is to be flushed 478 * @work: the work which is to be flushed
463 * 479 *
480 * Returns true if @work was pending.
481 *
464 * cancel_work_sync() will cancel the work if it is queued. If the work's 482 * cancel_work_sync() will cancel the work if it is queued. If the work's
465 * callback appears to be running, cancel_work_sync() will block until it 483 * callback appears to be running, cancel_work_sync() will block until it
466 * has completed. 484 * has completed.
@@ -476,31 +494,26 @@ static void wait_on_work(struct work_struct *work)
476 * The caller must ensure that workqueue_struct on which this work was last 494 * The caller must ensure that workqueue_struct on which this work was last
477 * queued can't be destroyed before this function returns. 495 * queued can't be destroyed before this function returns.
478 */ 496 */
479void cancel_work_sync(struct work_struct *work) 497int cancel_work_sync(struct work_struct *work)
480{ 498{
481 while (!try_to_grab_pending(work)) 499 return __cancel_work_timer(work, NULL);
482 cpu_relax();
483 wait_on_work(work);
484 work_clear_pending(work);
485} 500}
486EXPORT_SYMBOL_GPL(cancel_work_sync); 501EXPORT_SYMBOL_GPL(cancel_work_sync);
487 502
488/** 503/**
489 * cancel_rearming_delayed_work - reliably kill off a delayed work. 504 * cancel_delayed_work_sync - reliably kill off a delayed work.
490 * @dwork: the delayed work struct 505 * @dwork: the delayed work struct
491 * 506 *
507 * Returns true if @dwork was pending.
508 *
492 * It is possible to use this function if @dwork rearms itself via queue_work() 509 * It is possible to use this function if @dwork rearms itself via queue_work()
493 * or queue_delayed_work(). See also the comment for cancel_work_sync(). 510 * or queue_delayed_work(). See also the comment for cancel_work_sync().
494 */ 511 */
495void cancel_rearming_delayed_work(struct delayed_work *dwork) 512int cancel_delayed_work_sync(struct delayed_work *dwork)
496{ 513{
497 while (!del_timer(&dwork->timer) && 514 return __cancel_work_timer(&dwork->work, &dwork->timer);
498 !try_to_grab_pending(&dwork->work))
499 cpu_relax();
500 wait_on_work(&dwork->work);
501 work_clear_pending(&dwork->work);
502} 515}
503EXPORT_SYMBOL(cancel_rearming_delayed_work); 516EXPORT_SYMBOL(cancel_delayed_work_sync);
504 517
505static struct workqueue_struct *keventd_wq __read_mostly; 518static struct workqueue_struct *keventd_wq __read_mostly;
506 519