aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/audit.c96
-rw-r--r--kernel/audit.h1
-rw-r--r--kernel/auditsc.c5
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/exit.c31
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/futex.c138
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kallsyms.c11
-rw-r--r--kernel/kfifo.c3
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c62
-rw-r--r--kernel/nsproxy.c72
-rw-r--r--kernel/params.c1
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/printk.c55
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/relay.c6
-rw-r--r--kernel/rtmutex-debug.c6
-rw-r--r--kernel/rtmutex.c6
-rw-r--r--kernel/rtmutex_common.h9
-rw-r--r--kernel/sched.c32
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/seccomp.c29
-rw-r--r--kernel/signal.c33
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/stop_machine.c8
-rw-r--r--kernel/sys.c13
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c51
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time.c32
-rw-r--r--kernel/time/clockevents.c41
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c37
-rw-r--r--kernel/time/timer_stats.c14
-rw-r--r--kernel/timer.c15
-rw-r--r--kernel/user.c18
-rw-r--r--kernel/user_namespace.c87
-rw-r--r--kernel/utsname.c12
-rw-r--r--kernel/utsname_sysctl.c5
-rw-r--r--kernel/workqueue.c45
43 files changed, 717 insertions, 316 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 642d4277c2ea..2a999836ca18 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -4,11 +4,12 @@
4 4
5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ 5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
6 exit.o itimer.o time.o softirq.o resource.o \ 6 exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
12 utsname.o
12 13
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 14obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-y += time/ 15obj-y += time/
@@ -48,7 +49,6 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
48obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
49obj-$(CONFIG_RELAY) += relay.o 50obj-$(CONFIG_RELAY) += relay.o
50obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 51obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
51obj-$(CONFIG_UTS_NS) += utsname.o
52obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 52obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
53obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 53obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
54 54
diff --git a/kernel/audit.c b/kernel/audit.c
index d13276d41410..5ce8851facf7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -58,6 +58,7 @@
58#include <linux/selinux.h> 58#include <linux/selinux.h>
59#include <linux/inotify.h> 59#include <linux/inotify.h>
60#include <linux/freezer.h> 60#include <linux/freezer.h>
61#include <linux/tty.h>
61 62
62#include "audit.h" 63#include "audit.h"
63 64
@@ -423,6 +424,31 @@ static int kauditd_thread(void *dummy)
423 return 0; 424 return 0;
424} 425}
425 426
427static int audit_prepare_user_tty(pid_t pid, uid_t loginuid)
428{
429 struct task_struct *tsk;
430 int err;
431
432 read_lock(&tasklist_lock);
433 tsk = find_task_by_pid(pid);
434 err = -ESRCH;
435 if (!tsk)
436 goto out;
437 err = 0;
438
439 spin_lock_irq(&tsk->sighand->siglock);
440 if (!tsk->signal->audit_tty)
441 err = -EPERM;
442 spin_unlock_irq(&tsk->sighand->siglock);
443 if (err)
444 goto out;
445
446 tty_audit_push_task(tsk, loginuid);
447out:
448 read_unlock(&tasklist_lock);
449 return err;
450}
451
426int audit_send_list(void *_dest) 452int audit_send_list(void *_dest)
427{ 453{
428 struct audit_netlink_list *dest = _dest; 454 struct audit_netlink_list *dest = _dest;
@@ -511,6 +537,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
511 case AUDIT_DEL: 537 case AUDIT_DEL:
512 case AUDIT_DEL_RULE: 538 case AUDIT_DEL_RULE:
513 case AUDIT_SIGNAL_INFO: 539 case AUDIT_SIGNAL_INFO:
540 case AUDIT_TTY_GET:
541 case AUDIT_TTY_SET:
514 if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) 542 if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
515 err = -EPERM; 543 err = -EPERM;
516 break; 544 break;
@@ -622,6 +650,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
622 err = audit_filter_user(&NETLINK_CB(skb), msg_type); 650 err = audit_filter_user(&NETLINK_CB(skb), msg_type);
623 if (err == 1) { 651 if (err == 1) {
624 err = 0; 652 err = 0;
653 if (msg_type == AUDIT_USER_TTY) {
654 err = audit_prepare_user_tty(pid, loginuid);
655 if (err)
656 break;
657 }
625 ab = audit_log_start(NULL, GFP_KERNEL, msg_type); 658 ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
626 if (ab) { 659 if (ab) {
627 audit_log_format(ab, 660 audit_log_format(ab,
@@ -638,8 +671,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
638 " subj=%s", ctx); 671 " subj=%s", ctx);
639 kfree(ctx); 672 kfree(ctx);
640 } 673 }
641 audit_log_format(ab, " msg='%.1024s'", 674 if (msg_type != AUDIT_USER_TTY)
642 (char *)data); 675 audit_log_format(ab, " msg='%.1024s'",
676 (char *)data);
677 else {
678 int size;
679
680 audit_log_format(ab, " msg=");
681 size = nlmsg_len(nlh);
682 audit_log_n_untrustedstring(ab, size,
683 data);
684 }
643 audit_set_pid(ab, pid); 685 audit_set_pid(ab, pid);
644 audit_log_end(ab); 686 audit_log_end(ab);
645 } 687 }
@@ -730,6 +772,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
730 0, 0, sig_data, sizeof(*sig_data) + len); 772 0, 0, sig_data, sizeof(*sig_data) + len);
731 kfree(sig_data); 773 kfree(sig_data);
732 break; 774 break;
775 case AUDIT_TTY_GET: {
776 struct audit_tty_status s;
777 struct task_struct *tsk;
778
779 read_lock(&tasklist_lock);
780 tsk = find_task_by_pid(pid);
781 if (!tsk)
782 err = -ESRCH;
783 else {
784 spin_lock_irq(&tsk->sighand->siglock);
785 s.enabled = tsk->signal->audit_tty != 0;
786 spin_unlock_irq(&tsk->sighand->siglock);
787 }
788 read_unlock(&tasklist_lock);
789 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0,
790 &s, sizeof(s));
791 break;
792 }
793 case AUDIT_TTY_SET: {
794 struct audit_tty_status *s;
795 struct task_struct *tsk;
796
797 if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
798 return -EINVAL;
799 s = data;
800 if (s->enabled != 0 && s->enabled != 1)
801 return -EINVAL;
802 read_lock(&tasklist_lock);
803 tsk = find_task_by_pid(pid);
804 if (!tsk)
805 err = -ESRCH;
806 else {
807 spin_lock_irq(&tsk->sighand->siglock);
808 tsk->signal->audit_tty = s->enabled != 0;
809 spin_unlock_irq(&tsk->sighand->siglock);
810 }
811 read_unlock(&tasklist_lock);
812 break;
813 }
733 default: 814 default:
734 err = -EINVAL; 815 err = -EINVAL;
735 break; 816 break;
@@ -1185,7 +1266,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1185} 1266}
1186 1267
1187/** 1268/**
1188 * audit_log_n_unstrustedstring - log a string that may contain random characters 1269 * audit_log_n_untrustedstring - log a string that may contain random characters
1189 * @ab: audit_buffer 1270 * @ab: audit_buffer
1190 * @len: lenth of string (not including trailing null) 1271 * @len: lenth of string (not including trailing null)
1191 * @string: string to be logged 1272 * @string: string to be logged
@@ -1201,25 +1282,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1201const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, 1282const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
1202 const char *string) 1283 const char *string)
1203{ 1284{
1204 const unsigned char *p = string; 1285 const unsigned char *p;
1205 1286
1206 while (*p) { 1287 for (p = string; p < (const unsigned char *)string + len && *p; p++) {
1207 if (*p == '"' || *p < 0x21 || *p > 0x7f) { 1288 if (*p == '"' || *p < 0x21 || *p > 0x7f) {
1208 audit_log_hex(ab, string, len); 1289 audit_log_hex(ab, string, len);
1209 return string + len + 1; 1290 return string + len + 1;
1210 } 1291 }
1211 p++;
1212 } 1292 }
1213 audit_log_n_string(ab, len, string); 1293 audit_log_n_string(ab, len, string);
1214 return p + 1; 1294 return p + 1;
1215} 1295}
1216 1296
1217/** 1297/**
1218 * audit_log_unstrustedstring - log a string that may contain random characters 1298 * audit_log_untrustedstring - log a string that may contain random characters
1219 * @ab: audit_buffer 1299 * @ab: audit_buffer
1220 * @string: string to be logged 1300 * @string: string to be logged
1221 * 1301 *
1222 * Same as audit_log_n_unstrustedstring(), except that strlen is used to 1302 * Same as audit_log_n_untrustedstring(), except that strlen is used to
1223 * determine string length. 1303 * determine string length.
1224 */ 1304 */
1225const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) 1305const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
diff --git a/kernel/audit.h b/kernel/audit.h
index 815d6f5c04ee..95877435c347 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
115extern void audit_send_reply(int pid, int seq, int type, 115extern void audit_send_reply(int pid, int seq, int type,
116 int done, int multi, 116 int done, int multi,
117 void *payload, int size); 117 void *payload, int size);
118extern void audit_log_lost(const char *message);
119extern void audit_panic(const char *message); 118extern void audit_panic(const char *message);
120 119
121struct audit_netlink_list { 120struct audit_netlink_list {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e36481ed61b4..b7640a5f382a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -71,9 +71,6 @@
71 71
72extern struct list_head audit_filter_list[]; 72extern struct list_head audit_filter_list[];
73 73
74/* No syscall auditing will take place unless audit_enabled != 0. */
75extern int audit_enabled;
76
77/* AUDIT_NAMES is the number of slots we reserve in the audit_context 74/* AUDIT_NAMES is the number of slots we reserve in the audit_context
78 * for saving names from getname(). */ 75 * for saving names from getname(). */
79#define AUDIT_NAMES 20 76#define AUDIT_NAMES 20
@@ -2040,7 +2037,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
2040 2037
2041/** 2038/**
2042 * audit_core_dumps - record information about processes that end abnormally 2039 * audit_core_dumps - record information about processes that end abnormally
2043 * @sig: signal value 2040 * @signr: signal value
2044 * 2041 *
2045 * If a process ends with a core dump, something fishy is going on and we 2042 * If a process ends with a core dump, something fishy is going on and we
2046 * should record the event for investigation. 2043 * should record the event for investigation.
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4c49188cc49b..824b1c01f410 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -981,10 +981,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
981 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); 981 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
982 if (!mmarray) 982 if (!mmarray)
983 goto done; 983 goto done;
984 write_lock_irq(&tasklist_lock); /* block fork */ 984 read_lock(&tasklist_lock); /* block fork */
985 if (atomic_read(&cs->count) <= ntasks) 985 if (atomic_read(&cs->count) <= ntasks)
986 break; /* got enough */ 986 break; /* got enough */
987 write_unlock_irq(&tasklist_lock); /* try again */ 987 read_unlock(&tasklist_lock); /* try again */
988 kfree(mmarray); 988 kfree(mmarray);
989 } 989 }
990 990
@@ -1006,7 +1006,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
1006 continue; 1006 continue;
1007 mmarray[n++] = mm; 1007 mmarray[n++] = mm;
1008 } while_each_thread(g, p); 1008 } while_each_thread(g, p);
1009 write_unlock_irq(&tasklist_lock); 1009 read_unlock(&tasklist_lock);
1010 1010
1011 /* 1011 /*
1012 * Now that we've dropped the tasklist spinlock, we can 1012 * Now that we've dropped the tasklist spinlock, we can
diff --git a/kernel/exit.c b/kernel/exit.c
index ca6a11b73023..57626692cd90 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -858,6 +858,34 @@ static void exit_notify(struct task_struct *tsk)
858 release_task(tsk); 858 release_task(tsk);
859} 859}
860 860
861#ifdef CONFIG_DEBUG_STACK_USAGE
862static void check_stack_usage(void)
863{
864 static DEFINE_SPINLOCK(low_water_lock);
865 static int lowest_to_date = THREAD_SIZE;
866 unsigned long *n = end_of_stack(current);
867 unsigned long free;
868
869 while (*n == 0)
870 n++;
871 free = (unsigned long)n - (unsigned long)end_of_stack(current);
872
873 if (free >= lowest_to_date)
874 return;
875
876 spin_lock(&low_water_lock);
877 if (free < lowest_to_date) {
878 printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
879 "left\n",
880 current->comm, free);
881 lowest_to_date = free;
882 }
883 spin_unlock(&low_water_lock);
884}
885#else
886static inline void check_stack_usage(void) {}
887#endif
888
861fastcall NORET_TYPE void do_exit(long code) 889fastcall NORET_TYPE void do_exit(long code)
862{ 890{
863 struct task_struct *tsk = current; 891 struct task_struct *tsk = current;
@@ -937,6 +965,8 @@ fastcall NORET_TYPE void do_exit(long code)
937 if (unlikely(tsk->compat_robust_list)) 965 if (unlikely(tsk->compat_robust_list))
938 compat_exit_robust_list(tsk); 966 compat_exit_robust_list(tsk);
939#endif 967#endif
968 if (group_dead)
969 tty_audit_exit();
940 if (unlikely(tsk->audit_context)) 970 if (unlikely(tsk->audit_context))
941 audit_free(tsk); 971 audit_free(tsk);
942 972
@@ -949,6 +979,7 @@ fastcall NORET_TYPE void do_exit(long code)
949 exit_sem(tsk); 979 exit_sem(tsk);
950 __exit_files(tsk); 980 __exit_files(tsk);
951 __exit_fs(tsk); 981 __exit_fs(tsk);
982 check_stack_usage();
952 exit_thread(); 983 exit_thread();
953 cpuset_exit(tsk); 984 cpuset_exit(tsk);
954 exit_keys(tsk); 985 exit_keys(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index da3a155bba0d..7c5c5888e00a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
49#include <linux/delayacct.h> 49#include <linux/delayacct.h>
50#include <linux/taskstats_kern.h> 50#include <linux/taskstats_kern.h>
51#include <linux/random.h> 51#include <linux/random.h>
52#include <linux/tty.h>
52 53
53#include <asm/pgtable.h> 54#include <asm/pgtable.h>
54#include <asm/pgalloc.h> 55#include <asm/pgalloc.h>
@@ -897,6 +898,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
897 } 898 }
898 acct_init_pacct(&sig->pacct); 899 acct_init_pacct(&sig->pacct);
899 900
901 tty_audit_fork(sig);
902
900 return 0; 903 return 0;
901} 904}
902 905
@@ -999,7 +1002,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
999 if (atomic_read(&p->user->processes) >= 1002 if (atomic_read(&p->user->processes) >=
1000 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 1003 p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
1001 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 1004 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
1002 p->user != &root_user) 1005 p->user != current->nsproxy->user_ns->root_user)
1003 goto bad_fork_free; 1006 goto bad_fork_free;
1004 } 1007 }
1005 1008
@@ -1059,6 +1062,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1059 1062
1060 p->lock_depth = -1; /* -1 = no lock */ 1063 p->lock_depth = -1; /* -1 = no lock */
1061 do_posix_clock_monotonic_gettime(&p->start_time); 1064 do_posix_clock_monotonic_gettime(&p->start_time);
1065 p->real_start_time = p->start_time;
1066 monotonic_to_bootbased(&p->real_start_time);
1062 p->security = NULL; 1067 p->security = NULL;
1063 p->io_context = NULL; 1068 p->io_context = NULL;
1064 p->io_wait = NULL; 1069 p->io_wait = NULL;
@@ -1601,7 +1606,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1601 err = -EINVAL; 1606 err = -EINVAL;
1602 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 1607 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1603 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 1608 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1604 CLONE_NEWUTS|CLONE_NEWIPC)) 1609 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
1605 goto bad_unshare_out; 1610 goto bad_unshare_out;
1606 1611
1607 if ((err = unshare_thread(unshare_flags))) 1612 if ((err = unshare_thread(unshare_flags)))
diff --git a/kernel/futex.c b/kernel/futex.c
index 45490bec5831..5c3f45d07c53 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -121,6 +121,24 @@ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
121static struct vfsmount *futex_mnt; 121static struct vfsmount *futex_mnt;
122 122
123/* 123/*
124 * Take mm->mmap_sem, when futex is shared
125 */
126static inline void futex_lock_mm(struct rw_semaphore *fshared)
127{
128 if (fshared)
129 down_read(fshared);
130}
131
132/*
133 * Release mm->mmap_sem, when the futex is shared
134 */
135static inline void futex_unlock_mm(struct rw_semaphore *fshared)
136{
137 if (fshared)
138 up_read(fshared);
139}
140
141/*
124 * We hash on the keys returned from get_futex_key (see below). 142 * We hash on the keys returned from get_futex_key (see below).
125 */ 143 */
126static struct futex_hash_bucket *hash_futex(union futex_key *key) 144static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -287,7 +305,18 @@ void drop_futex_key_refs(union futex_key *key)
287} 305}
288EXPORT_SYMBOL_GPL(drop_futex_key_refs); 306EXPORT_SYMBOL_GPL(drop_futex_key_refs);
289 307
290static inline int get_futex_value_locked(u32 *dest, u32 __user *from) 308static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
309{
310 u32 curval;
311
312 pagefault_disable();
313 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
314 pagefault_enable();
315
316 return curval;
317}
318
319static int get_futex_value_locked(u32 *dest, u32 __user *from)
291{ 320{
292 int ret; 321 int ret;
293 322
@@ -620,9 +649,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
620 649
621 newval = FUTEX_WAITERS | new_owner->pid; 650 newval = FUTEX_WAITERS | new_owner->pid;
622 651
623 pagefault_disable(); 652 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
624 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
625 pagefault_enable();
626 653
627 if (curval == -EFAULT) 654 if (curval == -EFAULT)
628 ret = -EFAULT; 655 ret = -EFAULT;
@@ -659,9 +686,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
659 * There is no waiter, so we unlock the futex. The owner died 686 * There is no waiter, so we unlock the futex. The owner died
660 * bit has not to be preserved here. We are the owner: 687 * bit has not to be preserved here. We are the owner:
661 */ 688 */
662 pagefault_disable(); 689 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
663 oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
664 pagefault_enable();
665 690
666 if (oldval == -EFAULT) 691 if (oldval == -EFAULT)
667 return oldval; 692 return oldval;
@@ -700,8 +725,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
700 union futex_key key; 725 union futex_key key;
701 int ret; 726 int ret;
702 727
703 if (fshared) 728 futex_lock_mm(fshared);
704 down_read(fshared);
705 729
706 ret = get_futex_key(uaddr, fshared, &key); 730 ret = get_futex_key(uaddr, fshared, &key);
707 if (unlikely(ret != 0)) 731 if (unlikely(ret != 0))
@@ -725,8 +749,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
725 749
726 spin_unlock(&hb->lock); 750 spin_unlock(&hb->lock);
727out: 751out:
728 if (fshared) 752 futex_unlock_mm(fshared);
729 up_read(fshared);
730 return ret; 753 return ret;
731} 754}
732 755
@@ -746,8 +769,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
746 int ret, op_ret, attempt = 0; 769 int ret, op_ret, attempt = 0;
747 770
748retryfull: 771retryfull:
749 if (fshared) 772 futex_lock_mm(fshared);
750 down_read(fshared);
751 773
752 ret = get_futex_key(uaddr1, fshared, &key1); 774 ret = get_futex_key(uaddr1, fshared, &key1);
753 if (unlikely(ret != 0)) 775 if (unlikely(ret != 0))
@@ -793,7 +815,7 @@ retry:
793 */ 815 */
794 if (attempt++) { 816 if (attempt++) {
795 ret = futex_handle_fault((unsigned long)uaddr2, 817 ret = futex_handle_fault((unsigned long)uaddr2,
796 fshared, attempt); 818 fshared, attempt);
797 if (ret) 819 if (ret)
798 goto out; 820 goto out;
799 goto retry; 821 goto retry;
@@ -803,8 +825,7 @@ retry:
803 * If we would have faulted, release mmap_sem, 825 * If we would have faulted, release mmap_sem,
804 * fault it in and start all over again. 826 * fault it in and start all over again.
805 */ 827 */
806 if (fshared) 828 futex_unlock_mm(fshared);
807 up_read(fshared);
808 829
809 ret = get_user(dummy, uaddr2); 830 ret = get_user(dummy, uaddr2);
810 if (ret) 831 if (ret)
@@ -841,8 +862,8 @@ retry:
841 if (hb1 != hb2) 862 if (hb1 != hb2)
842 spin_unlock(&hb2->lock); 863 spin_unlock(&hb2->lock);
843out: 864out:
844 if (fshared) 865 futex_unlock_mm(fshared);
845 up_read(fshared); 866
846 return ret; 867 return ret;
847} 868}
848 869
@@ -861,8 +882,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
861 int ret, drop_count = 0; 882 int ret, drop_count = 0;
862 883
863 retry: 884 retry:
864 if (fshared) 885 futex_lock_mm(fshared);
865 down_read(fshared);
866 886
867 ret = get_futex_key(uaddr1, fshared, &key1); 887 ret = get_futex_key(uaddr1, fshared, &key1);
868 if (unlikely(ret != 0)) 888 if (unlikely(ret != 0))
@@ -890,8 +910,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
890 * If we would have faulted, release mmap_sem, fault 910 * If we would have faulted, release mmap_sem, fault
891 * it in and start all over again. 911 * it in and start all over again.
892 */ 912 */
893 if (fshared) 913 futex_unlock_mm(fshared);
894 up_read(fshared);
895 914
896 ret = get_user(curval, uaddr1); 915 ret = get_user(curval, uaddr1);
897 916
@@ -944,8 +963,7 @@ out_unlock:
944 drop_futex_key_refs(&key1); 963 drop_futex_key_refs(&key1);
945 964
946out: 965out:
947 if (fshared) 966 futex_unlock_mm(fshared);
948 up_read(fshared);
949 return ret; 967 return ret;
950} 968}
951 969
@@ -1113,10 +1131,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1113 while (!ret) { 1131 while (!ret) {
1114 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1132 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1115 1133
1116 pagefault_disable(); 1134 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1117 curval = futex_atomic_cmpxchg_inatomic(uaddr,
1118 uval, newval);
1119 pagefault_enable();
1120 1135
1121 if (curval == -EFAULT) 1136 if (curval == -EFAULT)
1122 ret = -EFAULT; 1137 ret = -EFAULT;
@@ -1134,6 +1149,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1134#define ARG3_SHARED 1 1149#define ARG3_SHARED 1
1135 1150
1136static long futex_wait_restart(struct restart_block *restart); 1151static long futex_wait_restart(struct restart_block *restart);
1152
1137static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1153static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1138 u32 val, ktime_t *abs_time) 1154 u32 val, ktime_t *abs_time)
1139{ 1155{
@@ -1148,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1148 1164
1149 q.pi_state = NULL; 1165 q.pi_state = NULL;
1150 retry: 1166 retry:
1151 if (fshared) 1167 futex_lock_mm(fshared);
1152 down_read(fshared);
1153 1168
1154 ret = get_futex_key(uaddr, fshared, &q.key); 1169 ret = get_futex_key(uaddr, fshared, &q.key);
1155 if (unlikely(ret != 0)) 1170 if (unlikely(ret != 0))
@@ -1186,8 +1201,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1186 * If we would have faulted, release mmap_sem, fault it in and 1201 * If we would have faulted, release mmap_sem, fault it in and
1187 * start all over again. 1202 * start all over again.
1188 */ 1203 */
1189 if (fshared) 1204 futex_unlock_mm(fshared);
1190 up_read(fshared);
1191 1205
1192 ret = get_user(uval, uaddr); 1206 ret = get_user(uval, uaddr);
1193 1207
@@ -1206,8 +1220,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1206 * Now the futex is queued and we have checked the data, we 1220 * Now the futex is queued and we have checked the data, we
1207 * don't want to hold mmap_sem while we sleep. 1221 * don't want to hold mmap_sem while we sleep.
1208 */ 1222 */
1209 if (fshared) 1223 futex_unlock_mm(fshared);
1210 up_read(fshared);
1211 1224
1212 /* 1225 /*
1213 * There might have been scheduling since the queue_me(), as we 1226 * There might have been scheduling since the queue_me(), as we
@@ -1285,8 +1298,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1285 queue_unlock(&q, hb); 1298 queue_unlock(&q, hb);
1286 1299
1287 out_release_sem: 1300 out_release_sem:
1288 if (fshared) 1301 futex_unlock_mm(fshared);
1289 up_read(fshared);
1290 return ret; 1302 return ret;
1291} 1303}
1292 1304
@@ -1333,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1333 1345
1334 q.pi_state = NULL; 1346 q.pi_state = NULL;
1335 retry: 1347 retry:
1336 if (fshared) 1348 futex_lock_mm(fshared);
1337 down_read(fshared);
1338 1349
1339 ret = get_futex_key(uaddr, fshared, &q.key); 1350 ret = get_futex_key(uaddr, fshared, &q.key);
1340 if (unlikely(ret != 0)) 1351 if (unlikely(ret != 0))
@@ -1353,9 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1353 */ 1364 */
1354 newval = current->pid; 1365 newval = current->pid;
1355 1366
1356 pagefault_disable(); 1367 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
1357 curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
1358 pagefault_enable();
1359 1368
1360 if (unlikely(curval == -EFAULT)) 1369 if (unlikely(curval == -EFAULT))
1361 goto uaddr_faulted; 1370 goto uaddr_faulted;
@@ -1398,9 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1398 lock_taken = 1; 1407 lock_taken = 1;
1399 } 1408 }
1400 1409
1401 pagefault_disable(); 1410 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1402 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
1403 pagefault_enable();
1404 1411
1405 if (unlikely(curval == -EFAULT)) 1412 if (unlikely(curval == -EFAULT))
1406 goto uaddr_faulted; 1413 goto uaddr_faulted;
@@ -1428,8 +1435,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1428 * exit to complete. 1435 * exit to complete.
1429 */ 1436 */
1430 queue_unlock(&q, hb); 1437 queue_unlock(&q, hb);
1431 if (fshared) 1438 futex_unlock_mm(fshared);
1432 up_read(fshared);
1433 cond_resched(); 1439 cond_resched();
1434 goto retry; 1440 goto retry;
1435 1441
@@ -1465,8 +1471,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1465 * Now the futex is queued and we have checked the data, we 1471 * Now the futex is queued and we have checked the data, we
1466 * don't want to hold mmap_sem while we sleep. 1472 * don't want to hold mmap_sem while we sleep.
1467 */ 1473 */
1468 if (fshared) 1474 futex_unlock_mm(fshared);
1469 up_read(fshared);
1470 1475
1471 WARN_ON(!q.pi_state); 1476 WARN_ON(!q.pi_state);
1472 /* 1477 /*
@@ -1480,8 +1485,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1480 ret = ret ? 0 : -EWOULDBLOCK; 1485 ret = ret ? 0 : -EWOULDBLOCK;
1481 } 1486 }
1482 1487
1483 if (fshared) 1488 futex_lock_mm(fshared);
1484 down_read(fshared);
1485 spin_lock(q.lock_ptr); 1489 spin_lock(q.lock_ptr);
1486 1490
1487 if (!ret) { 1491 if (!ret) {
@@ -1518,8 +1522,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1518 1522
1519 /* Unqueue and drop the lock */ 1523 /* Unqueue and drop the lock */
1520 unqueue_me_pi(&q); 1524 unqueue_me_pi(&q);
1521 if (fshared) 1525 futex_unlock_mm(fshared);
1522 up_read(fshared);
1523 1526
1524 return ret != -EINTR ? ret : -ERESTARTNOINTR; 1527 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1525 1528
@@ -1527,8 +1530,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1527 queue_unlock(&q, hb); 1530 queue_unlock(&q, hb);
1528 1531
1529 out_release_sem: 1532 out_release_sem:
1530 if (fshared) 1533 futex_unlock_mm(fshared);
1531 up_read(fshared);
1532 return ret; 1534 return ret;
1533 1535
1534 uaddr_faulted: 1536 uaddr_faulted:
@@ -1550,8 +1552,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1550 goto retry_unlocked; 1552 goto retry_unlocked;
1551 } 1553 }
1552 1554
1553 if (fshared) 1555 futex_unlock_mm(fshared);
1554 up_read(fshared);
1555 1556
1556 ret = get_user(uval, uaddr); 1557 ret = get_user(uval, uaddr);
1557 if (!ret && (uval != -EFAULT)) 1558 if (!ret && (uval != -EFAULT))
@@ -1585,8 +1586,7 @@ retry:
1585 /* 1586 /*
1586 * First take all the futex related locks: 1587 * First take all the futex related locks:
1587 */ 1588 */
1588 if (fshared) 1589 futex_lock_mm(fshared);
1589 down_read(fshared);
1590 1590
1591 ret = get_futex_key(uaddr, fshared, &key); 1591 ret = get_futex_key(uaddr, fshared, &key);
1592 if (unlikely(ret != 0)) 1592 if (unlikely(ret != 0))
@@ -1601,11 +1601,9 @@ retry_unlocked:
1601 * again. If it succeeds then we can return without waking 1601 * again. If it succeeds then we can return without waking
1602 * anyone else up: 1602 * anyone else up:
1603 */ 1603 */
1604 if (!(uval & FUTEX_OWNER_DIED)) { 1604 if (!(uval & FUTEX_OWNER_DIED))
1605 pagefault_disable(); 1605 uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
1606 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1606
1607 pagefault_enable();
1608 }
1609 1607
1610 if (unlikely(uval == -EFAULT)) 1608 if (unlikely(uval == -EFAULT))
1611 goto pi_faulted; 1609 goto pi_faulted;
@@ -1647,8 +1645,7 @@ retry_unlocked:
1647out_unlock: 1645out_unlock:
1648 spin_unlock(&hb->lock); 1646 spin_unlock(&hb->lock);
1649out: 1647out:
1650 if (fshared) 1648 futex_unlock_mm(fshared);
1651 up_read(fshared);
1652 1649
1653 return ret; 1650 return ret;
1654 1651
@@ -1671,8 +1668,7 @@ pi_faulted:
1671 goto retry_unlocked; 1668 goto retry_unlocked;
1672 } 1669 }
1673 1670
1674 if (fshared) 1671 futex_unlock_mm(fshared);
1675 up_read(fshared);
1676 1672
1677 ret = get_user(uval, uaddr); 1673 ret = get_user(uval, uaddr);
1678 if (!ret && (uval != -EFAULT)) 1674 if (!ret && (uval != -EFAULT))
@@ -1729,8 +1725,8 @@ static int futex_fd(u32 __user *uaddr, int signal)
1729 1725
1730 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { 1726 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
1731 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " 1727 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
1732 "will be removed from the kernel in June 2007\n", 1728 "will be removed from the kernel in June 2007\n",
1733 current->comm); 1729 current->comm);
1734 } 1730 }
1735 1731
1736 ret = -EINVAL; 1732 ret = -EINVAL;
@@ -1908,10 +1904,8 @@ retry:
1908 * Wake robust non-PI futexes here. The wakeup of 1904 * Wake robust non-PI futexes here. The wakeup of
1909 * PI futexes happens in exit_pi_state(): 1905 * PI futexes happens in exit_pi_state():
1910 */ 1906 */
1911 if (!pi) { 1907 if (!pi && (uval & FUTEX_WAITERS))
1912 if (uval & FUTEX_WAITERS)
1913 futex_wake(uaddr, &curr->mm->mmap_sem, 1); 1908 futex_wake(uaddr, &curr->mm->mmap_sem, 1);
1914 }
1915 } 1909 }
1916 return 0; 1910 return 0;
1917} 1911}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index bd9e272d55e9..32b161972fad 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -172,7 +172,17 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
172 irqreturn_t action_ret) 172 irqreturn_t action_ret)
173{ 173{
174 if (unlikely(action_ret != IRQ_HANDLED)) { 174 if (unlikely(action_ret != IRQ_HANDLED)) {
175 desc->irqs_unhandled++; 175 /*
176 * If we are seeing only the odd spurious IRQ caused by
177 * bus asynchronicity then don't eventually trigger an error,
178 * otherwise the couter becomes a doomsday timer for otherwise
179 * working systems
180 */
181 if (jiffies - desc->last_unhandled > HZ/10)
182 desc->irqs_unhandled = 1;
183 else
184 desc->irqs_unhandled++;
185 desc->last_unhandled = jiffies;
176 if (unlikely(action_ret != IRQ_NONE)) 186 if (unlikely(action_ret != IRQ_NONE))
177 report_bad_irq(irq, desc, action_ret); 187 report_bad_irq(irq, desc, action_ret);
178 } 188 }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index fed54418626c..0d662475dd9f 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -317,13 +317,12 @@ int sprint_symbol(char *buffer, unsigned long address)
317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 317 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
318 if (!name) 318 if (!name)
319 return sprintf(buffer, "0x%lx", address); 319 return sprintf(buffer, "0x%lx", address);
320 else { 320
321 if (modname) 321 if (modname)
322 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, 322 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
323 size, modname); 323 size, modname);
324 else 324 else
325 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); 325 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
326 }
327} 326}
328 327
329/* Look up a kernel symbol and print it to the kernel messages. */ 328/* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index cee419143fd4..bc41ad0f24f8 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -24,6 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/kfifo.h> 26#include <linux/kfifo.h>
27#include <linux/log2.h>
27 28
28/** 29/**
29 * kfifo_init - allocates a new FIFO using a preallocated buffer 30 * kfifo_init - allocates a new FIFO using a preallocated buffer
@@ -41,7 +42,7 @@ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
41 struct kfifo *fifo; 42 struct kfifo *fifo;
42 43
43 /* size must be a power of 2 */ 44 /* size must be a power of 2 */
44 BUG_ON(size & (size - 1)); 45 BUG_ON(!is_power_of_2(size));
45 46
46 fifo = kmalloc(sizeof(struct kfifo), gfp_mask); 47 fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
47 if (!fifo) 48 if (!fifo)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index bbd51b81a3e8..a404f7ee7395 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -215,7 +215,7 @@ int kthread_stop(struct task_struct *k)
215EXPORT_SYMBOL(kthread_stop); 215EXPORT_SYMBOL(kthread_stop);
216 216
217 217
218static __init void kthreadd_setup(void) 218static noinline __init_refok void kthreadd_setup(void)
219{ 219{
220 struct task_struct *tsk = current; 220 struct task_struct *tsk = current;
221 221
diff --git a/kernel/module.c b/kernel/module.c
index 9bd93de01f4a..539fed9ac83c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -61,10 +61,8 @@ extern int module_sysfs_initialized;
61/* If this is set, the section belongs in the init part of the module */ 61/* If this is set, the section belongs in the init part of the module */
62#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 62#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
63 63
64/* Protects module list */ 64/* List of modules, protected by module_mutex or preempt_disable
65static DEFINE_SPINLOCK(modlist_lock); 65 * (add/delete uses stop_machine). */
66
67/* List of modules, protected by module_mutex AND modlist_lock */
68static DEFINE_MUTEX(module_mutex); 66static DEFINE_MUTEX(module_mutex);
69static LIST_HEAD(modules); 67static LIST_HEAD(modules);
70 68
@@ -488,8 +486,7 @@ static void free_modinfo_##field(struct module *mod) \
488 mod->field = NULL; \ 486 mod->field = NULL; \
489} \ 487} \
490static struct module_attribute modinfo_##field = { \ 488static struct module_attribute modinfo_##field = { \
491 .attr = { .name = __stringify(field), .mode = 0444, \ 489 .attr = { .name = __stringify(field), .mode = 0444 }, \
492 .owner = THIS_MODULE }, \
493 .show = show_modinfo_##field, \ 490 .show = show_modinfo_##field, \
494 .setup = setup_modinfo_##field, \ 491 .setup = setup_modinfo_##field, \
495 .test = modinfo_##field##_exists, \ 492 .test = modinfo_##field##_exists, \
@@ -761,14 +758,13 @@ static void print_unload_info(struct seq_file *m, struct module *mod)
761void __symbol_put(const char *symbol) 758void __symbol_put(const char *symbol)
762{ 759{
763 struct module *owner; 760 struct module *owner;
764 unsigned long flags;
765 const unsigned long *crc; 761 const unsigned long *crc;
766 762
767 spin_lock_irqsave(&modlist_lock, flags); 763 preempt_disable();
768 if (!__find_symbol(symbol, &owner, &crc, 1)) 764 if (!__find_symbol(symbol, &owner, &crc, 1))
769 BUG(); 765 BUG();
770 module_put(owner); 766 module_put(owner);
771 spin_unlock_irqrestore(&modlist_lock, flags); 767 preempt_enable();
772} 768}
773EXPORT_SYMBOL(__symbol_put); 769EXPORT_SYMBOL(__symbol_put);
774 770
@@ -793,7 +789,7 @@ static ssize_t show_refcnt(struct module_attribute *mattr,
793} 789}
794 790
795static struct module_attribute refcnt = { 791static struct module_attribute refcnt = {
796 .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, 792 .attr = { .name = "refcnt", .mode = 0444 },
797 .show = show_refcnt, 793 .show = show_refcnt,
798}; 794};
799 795
@@ -851,7 +847,7 @@ static ssize_t show_initstate(struct module_attribute *mattr,
851} 847}
852 848
853static struct module_attribute initstate = { 849static struct module_attribute initstate = {
854 .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, 850 .attr = { .name = "initstate", .mode = 0444 },
855 .show = show_initstate, 851 .show = show_initstate,
856}; 852};
857 853
@@ -1032,7 +1028,6 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
1032 sattr->mattr.show = module_sect_show; 1028 sattr->mattr.show = module_sect_show;
1033 sattr->mattr.store = NULL; 1029 sattr->mattr.store = NULL;
1034 sattr->mattr.attr.name = sattr->name; 1030 sattr->mattr.attr.name = sattr->name;
1035 sattr->mattr.attr.owner = mod;
1036 sattr->mattr.attr.mode = S_IRUGO; 1031 sattr->mattr.attr.mode = S_IRUGO;
1037 *(gattr++) = &(sattr++)->mattr.attr; 1032 *(gattr++) = &(sattr++)->mattr.attr;
1038 } 1033 }
@@ -1090,7 +1085,6 @@ int module_add_modinfo_attrs(struct module *mod)
1090 if (!attr->test || 1085 if (!attr->test ||
1091 (attr->test && attr->test(mod))) { 1086 (attr->test && attr->test(mod))) {
1092 memcpy(temp_attr, attr, sizeof(*temp_attr)); 1087 memcpy(temp_attr, attr, sizeof(*temp_attr));
1093 temp_attr->attr.owner = mod;
1094 error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); 1088 error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
1095 ++temp_attr; 1089 ++temp_attr;
1096 } 1090 }
@@ -1231,14 +1225,14 @@ static void free_module(struct module *mod)
1231void *__symbol_get(const char *symbol) 1225void *__symbol_get(const char *symbol)
1232{ 1226{
1233 struct module *owner; 1227 struct module *owner;
1234 unsigned long value, flags; 1228 unsigned long value;
1235 const unsigned long *crc; 1229 const unsigned long *crc;
1236 1230
1237 spin_lock_irqsave(&modlist_lock, flags); 1231 preempt_disable();
1238 value = __find_symbol(symbol, &owner, &crc, 1); 1232 value = __find_symbol(symbol, &owner, &crc, 1);
1239 if (value && !strong_try_module_get(owner)) 1233 if (value && !strong_try_module_get(owner))
1240 value = 0; 1234 value = 0;
1241 spin_unlock_irqrestore(&modlist_lock, flags); 1235 preempt_enable();
1242 1236
1243 return (void *)value; 1237 return (void *)value;
1244} 1238}
@@ -2235,26 +2229,13 @@ unsigned long module_kallsyms_lookup_name(const char *name)
2235/* Called by the /proc file system to return a list of modules. */ 2229/* Called by the /proc file system to return a list of modules. */
2236static void *m_start(struct seq_file *m, loff_t *pos) 2230static void *m_start(struct seq_file *m, loff_t *pos)
2237{ 2231{
2238 struct list_head *i;
2239 loff_t n = 0;
2240
2241 mutex_lock(&module_mutex); 2232 mutex_lock(&module_mutex);
2242 list_for_each(i, &modules) { 2233 return seq_list_start(&modules, *pos);
2243 if (n++ == *pos)
2244 break;
2245 }
2246 if (i == &modules)
2247 return NULL;
2248 return i;
2249} 2234}
2250 2235
2251static void *m_next(struct seq_file *m, void *p, loff_t *pos) 2236static void *m_next(struct seq_file *m, void *p, loff_t *pos)
2252{ 2237{
2253 struct list_head *i = p; 2238 return seq_list_next(p, &modules, pos);
2254 (*pos)++;
2255 if (i->next == &modules)
2256 return NULL;
2257 return i->next;
2258} 2239}
2259 2240
2260static void m_stop(struct seq_file *m, void *p) 2241static void m_stop(struct seq_file *m, void *p)
@@ -2324,11 +2305,10 @@ const struct seq_operations modules_op = {
2324/* Given an address, look for it in the module exception tables. */ 2305/* Given an address, look for it in the module exception tables. */
2325const struct exception_table_entry *search_module_extables(unsigned long addr) 2306const struct exception_table_entry *search_module_extables(unsigned long addr)
2326{ 2307{
2327 unsigned long flags;
2328 const struct exception_table_entry *e = NULL; 2308 const struct exception_table_entry *e = NULL;
2329 struct module *mod; 2309 struct module *mod;
2330 2310
2331 spin_lock_irqsave(&modlist_lock, flags); 2311 preempt_disable();
2332 list_for_each_entry(mod, &modules, list) { 2312 list_for_each_entry(mod, &modules, list) {
2333 if (mod->num_exentries == 0) 2313 if (mod->num_exentries == 0)
2334 continue; 2314 continue;
@@ -2339,7 +2319,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2339 if (e) 2319 if (e)
2340 break; 2320 break;
2341 } 2321 }
2342 spin_unlock_irqrestore(&modlist_lock, flags); 2322 preempt_enable();
2343 2323
2344 /* Now, if we found one, we are running inside it now, hence 2324 /* Now, if we found one, we are running inside it now, hence
2345 we cannot unload the module, hence no refcnt needed. */ 2325 we cannot unload the module, hence no refcnt needed. */
@@ -2351,25 +2331,24 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2351 */ 2331 */
2352int is_module_address(unsigned long addr) 2332int is_module_address(unsigned long addr)
2353{ 2333{
2354 unsigned long flags;
2355 struct module *mod; 2334 struct module *mod;
2356 2335
2357 spin_lock_irqsave(&modlist_lock, flags); 2336 preempt_disable();
2358 2337
2359 list_for_each_entry(mod, &modules, list) { 2338 list_for_each_entry(mod, &modules, list) {
2360 if (within(addr, mod->module_core, mod->core_size)) { 2339 if (within(addr, mod->module_core, mod->core_size)) {
2361 spin_unlock_irqrestore(&modlist_lock, flags); 2340 preempt_enable();
2362 return 1; 2341 return 1;
2363 } 2342 }
2364 } 2343 }
2365 2344
2366 spin_unlock_irqrestore(&modlist_lock, flags); 2345 preempt_enable();
2367 2346
2368 return 0; 2347 return 0;
2369} 2348}
2370 2349
2371 2350
2372/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2351/* Is this a valid kernel address? */
2373struct module *__module_text_address(unsigned long addr) 2352struct module *__module_text_address(unsigned long addr)
2374{ 2353{
2375 struct module *mod; 2354 struct module *mod;
@@ -2384,11 +2363,10 @@ struct module *__module_text_address(unsigned long addr)
2384struct module *module_text_address(unsigned long addr) 2363struct module *module_text_address(unsigned long addr)
2385{ 2364{
2386 struct module *mod; 2365 struct module *mod;
2387 unsigned long flags;
2388 2366
2389 spin_lock_irqsave(&modlist_lock, flags); 2367 preempt_disable();
2390 mod = __module_text_address(addr); 2368 mod = __module_text_address(addr);
2391 spin_unlock_irqrestore(&modlist_lock, flags); 2369 preempt_enable();
2392 2370
2393 return mod; 2371 return mod;
2394} 2372}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 9e83b589f754..10f0bbba382b 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -21,6 +21,8 @@
21#include <linux/utsname.h> 21#include <linux/utsname.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23 23
24static struct kmem_cache *nsproxy_cachep;
25
24struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 26struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
25 27
26static inline void get_nsproxy(struct nsproxy *ns) 28static inline void get_nsproxy(struct nsproxy *ns)
@@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
43{ 45{
44 struct nsproxy *ns; 46 struct nsproxy *ns;
45 47
46 ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); 48 ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
47 if (ns) 49 if (ns) {
50 memcpy(ns, orig, sizeof(struct nsproxy));
48 atomic_set(&ns->count, 1); 51 atomic_set(&ns->count, 1);
52 }
49 return ns; 53 return ns;
50} 54}
51 55
@@ -54,33 +58,51 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
54 * Return the newly created nsproxy. Do not attach this to the task, 58 * Return the newly created nsproxy. Do not attach this to the task,
55 * leave it to the caller to do proper locking and attach it to task. 59 * leave it to the caller to do proper locking and attach it to task.
56 */ 60 */
57static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, 61static struct nsproxy *create_new_namespaces(unsigned long flags,
58 struct fs_struct *new_fs) 62 struct task_struct *tsk, struct fs_struct *new_fs)
59{ 63{
60 struct nsproxy *new_nsp; 64 struct nsproxy *new_nsp;
65 int err;
61 66
62 new_nsp = clone_nsproxy(tsk->nsproxy); 67 new_nsp = clone_nsproxy(tsk->nsproxy);
63 if (!new_nsp) 68 if (!new_nsp)
64 return ERR_PTR(-ENOMEM); 69 return ERR_PTR(-ENOMEM);
65 70
66 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 71 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
67 if (IS_ERR(new_nsp->mnt_ns)) 72 if (IS_ERR(new_nsp->mnt_ns)) {
73 err = PTR_ERR(new_nsp->mnt_ns);
68 goto out_ns; 74 goto out_ns;
75 }
69 76
70 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); 77 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
71 if (IS_ERR(new_nsp->uts_ns)) 78 if (IS_ERR(new_nsp->uts_ns)) {
79 err = PTR_ERR(new_nsp->uts_ns);
72 goto out_uts; 80 goto out_uts;
81 }
73 82
74 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 83 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
75 if (IS_ERR(new_nsp->ipc_ns)) 84 if (IS_ERR(new_nsp->ipc_ns)) {
85 err = PTR_ERR(new_nsp->ipc_ns);
76 goto out_ipc; 86 goto out_ipc;
87 }
77 88
78 new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); 89 new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns);
79 if (IS_ERR(new_nsp->pid_ns)) 90 if (IS_ERR(new_nsp->pid_ns)) {
91 err = PTR_ERR(new_nsp->pid_ns);
80 goto out_pid; 92 goto out_pid;
93 }
94
95 new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
96 if (IS_ERR(new_nsp->user_ns)) {
97 err = PTR_ERR(new_nsp->user_ns);
98 goto out_user;
99 }
81 100
82 return new_nsp; 101 return new_nsp;
83 102
103out_user:
104 if (new_nsp->pid_ns)
105 put_pid_ns(new_nsp->pid_ns);
84out_pid: 106out_pid:
85 if (new_nsp->ipc_ns) 107 if (new_nsp->ipc_ns)
86 put_ipc_ns(new_nsp->ipc_ns); 108 put_ipc_ns(new_nsp->ipc_ns);
@@ -91,15 +113,15 @@ out_uts:
91 if (new_nsp->mnt_ns) 113 if (new_nsp->mnt_ns)
92 put_mnt_ns(new_nsp->mnt_ns); 114 put_mnt_ns(new_nsp->mnt_ns);
93out_ns: 115out_ns:
94 kfree(new_nsp); 116 kmem_cache_free(nsproxy_cachep, new_nsp);
95 return ERR_PTR(-ENOMEM); 117 return ERR_PTR(err);
96} 118}
97 119
98/* 120/*
99 * called from clone. This now handles copy for nsproxy and all 121 * called from clone. This now handles copy for nsproxy and all
100 * namespaces therein. 122 * namespaces therein.
101 */ 123 */
102int copy_namespaces(int flags, struct task_struct *tsk) 124int copy_namespaces(unsigned long flags, struct task_struct *tsk)
103{ 125{
104 struct nsproxy *old_ns = tsk->nsproxy; 126 struct nsproxy *old_ns = tsk->nsproxy;
105 struct nsproxy *new_ns; 127 struct nsproxy *new_ns;
@@ -110,7 +132,7 @@ int copy_namespaces(int flags, struct task_struct *tsk)
110 132
111 get_nsproxy(old_ns); 133 get_nsproxy(old_ns);
112 134
113 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) 135 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
114 return 0; 136 return 0;
115 137
116 if (!capable(CAP_SYS_ADMIN)) { 138 if (!capable(CAP_SYS_ADMIN)) {
@@ -140,7 +162,9 @@ void free_nsproxy(struct nsproxy *ns)
140 put_ipc_ns(ns->ipc_ns); 162 put_ipc_ns(ns->ipc_ns);
141 if (ns->pid_ns) 163 if (ns->pid_ns)
142 put_pid_ns(ns->pid_ns); 164 put_pid_ns(ns->pid_ns);
143 kfree(ns); 165 if (ns->user_ns)
166 put_user_ns(ns->user_ns);
167 kmem_cache_free(nsproxy_cachep, ns);
144} 168}
145 169
146/* 170/*
@@ -152,19 +176,10 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
152{ 176{
153 int err = 0; 177 int err = 0;
154 178
155 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) 179 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
180 CLONE_NEWUSER)))
156 return 0; 181 return 0;
157 182
158#ifndef CONFIG_IPC_NS
159 if (unshare_flags & CLONE_NEWIPC)
160 return -EINVAL;
161#endif
162
163#ifndef CONFIG_UTS_NS
164 if (unshare_flags & CLONE_NEWUTS)
165 return -EINVAL;
166#endif
167
168 if (!capable(CAP_SYS_ADMIN)) 183 if (!capable(CAP_SYS_ADMIN))
169 return -EPERM; 184 return -EPERM;
170 185
@@ -174,3 +189,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
174 err = PTR_ERR(*new_nsp); 189 err = PTR_ERR(*new_nsp);
175 return err; 190 return err;
176} 191}
192
193static int __init nsproxy_cache_init(void)
194{
195 nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
196 0, SLAB_PANIC, NULL, NULL);
197 return 0;
198}
199
200module_init(nsproxy_cache_init);
diff --git a/kernel/params.c b/kernel/params.c
index e61c46c97ce7..effbaaedd7f3 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -491,7 +491,6 @@ param_sysfs_setup(struct module_kobject *mk,
491 pattr->mattr.show = param_attr_show; 491 pattr->mattr.show = param_attr_show;
492 pattr->mattr.store = param_attr_store; 492 pattr->mattr.store = param_attr_store;
493 pattr->mattr.attr.name = (char *)&kp->name[name_skip]; 493 pattr->mattr.attr.name = (char *)&kp->name[name_skip];
494 pattr->mattr.attr.owner = mk->mod;
495 pattr->mattr.attr.mode = kp->perm; 494 pattr->mattr.attr.mode = kp->perm;
496 *(gattr++) = &(pattr++)->mattr.attr; 495 *(gattr++) = &(pattr++)->mattr.attr;
497 } 496 }
diff --git a/kernel/pid.c b/kernel/pid.c
index eb66bd2953ab..c6e3f9ffff87 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -365,7 +365,7 @@ struct pid *find_ge_pid(int nr)
365} 365}
366EXPORT_SYMBOL_GPL(find_get_pid); 366EXPORT_SYMBOL_GPL(find_get_pid);
367 367
368struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) 368struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
369{ 369{
370 BUG_ON(!old_ns); 370 BUG_ON(!old_ns);
371 get_pid_ns(old_ns); 371 get_pid_ns(old_ns);
diff --git a/kernel/printk.c b/kernel/printk.c
index 0bbdeac2810c..051d27e36a6c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -449,13 +449,16 @@ static int printk_time = 1;
449#else 449#else
450static int printk_time = 0; 450static int printk_time = 0;
451#endif 451#endif
452module_param(printk_time, int, S_IRUGO | S_IWUSR); 452module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
453 453
454static int __init printk_time_setup(char *str) 454static int __init printk_time_setup(char *str)
455{ 455{
456 if (*str) 456 if (*str)
457 return 0; 457 return 0;
458 printk_time = 1; 458 printk_time = 1;
459 printk(KERN_NOTICE "The 'time' option is deprecated and "
460 "is scheduled for removal in early 2008\n");
461 printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n");
459 return 1; 462 return 1;
460} 463}
461 464
@@ -483,6 +486,9 @@ static int have_callable_console(void)
483 * @fmt: format string 486 * @fmt: format string
484 * 487 *
485 * This is printk(). It can be called from any context. We want it to work. 488 * This is printk(). It can be called from any context. We want it to work.
489 * Be aware of the fact that if oops_in_progress is not set, we might try to
490 * wake klogd up which could deadlock on runqueue lock if printk() is called
491 * from scheduler code.
486 * 492 *
487 * We try to grab the console_sem. If we succeed, it's easy - we log the output and 493 * We try to grab the console_sem. If we succeed, it's easy - we log the output and
488 * call the console drivers. If we fail to get the semaphore we place the output 494 * call the console drivers. If we fail to get the semaphore we place the output
@@ -654,7 +660,7 @@ static void call_console_drivers(unsigned long start, unsigned long end)
654 */ 660 */
655static int __init console_setup(char *str) 661static int __init console_setup(char *str)
656{ 662{
657 char name[sizeof(console_cmdline[0].name)]; 663 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
658 char *s, *options; 664 char *s, *options;
659 int idx; 665 int idx;
660 666
@@ -662,27 +668,27 @@ static int __init console_setup(char *str)
662 * Decode str into name, index, options. 668 * Decode str into name, index, options.
663 */ 669 */
664 if (str[0] >= '0' && str[0] <= '9') { 670 if (str[0] >= '0' && str[0] <= '9') {
665 strcpy(name, "ttyS"); 671 strcpy(buf, "ttyS");
666 strncpy(name + 4, str, sizeof(name) - 5); 672 strncpy(buf + 4, str, sizeof(buf) - 5);
667 } else { 673 } else {
668 strncpy(name, str, sizeof(name) - 1); 674 strncpy(buf, str, sizeof(buf) - 1);
669 } 675 }
670 name[sizeof(name) - 1] = 0; 676 buf[sizeof(buf) - 1] = 0;
671 if ((options = strchr(str, ',')) != NULL) 677 if ((options = strchr(str, ',')) != NULL)
672 *(options++) = 0; 678 *(options++) = 0;
673#ifdef __sparc__ 679#ifdef __sparc__
674 if (!strcmp(str, "ttya")) 680 if (!strcmp(str, "ttya"))
675 strcpy(name, "ttyS0"); 681 strcpy(buf, "ttyS0");
676 if (!strcmp(str, "ttyb")) 682 if (!strcmp(str, "ttyb"))
677 strcpy(name, "ttyS1"); 683 strcpy(buf, "ttyS1");
678#endif 684#endif
679 for (s = name; *s; s++) 685 for (s = buf; *s; s++)
680 if ((*s >= '0' && *s <= '9') || *s == ',') 686 if ((*s >= '0' && *s <= '9') || *s == ',')
681 break; 687 break;
682 idx = simple_strtoul(s, NULL, 10); 688 idx = simple_strtoul(s, NULL, 10);
683 *s = 0; 689 *s = 0;
684 690
685 add_preferred_console(name, idx, options); 691 add_preferred_console(buf, idx, options);
686 return 1; 692 return 1;
687} 693}
688__setup("console=", console_setup); 694__setup("console=", console_setup);
@@ -709,7 +715,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
709 * See if this tty is not yet registered, and 715 * See if this tty is not yet registered, and
710 * if we have a slot free. 716 * if we have a slot free.
711 */ 717 */
712 for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) 718 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
713 if (strcmp(console_cmdline[i].name, name) == 0 && 719 if (strcmp(console_cmdline[i].name, name) == 0 &&
714 console_cmdline[i].index == idx) { 720 console_cmdline[i].index == idx) {
715 selected_console = i; 721 selected_console = i;
@@ -726,6 +732,25 @@ int __init add_preferred_console(char *name, int idx, char *options)
726 return 0; 732 return 0;
727} 733}
728 734
735int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
736{
737 struct console_cmdline *c;
738 int i;
739
740 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
741 if (strcmp(console_cmdline[i].name, name) == 0 &&
742 console_cmdline[i].index == idx) {
743 c = &console_cmdline[i];
744 memcpy(c->name, name_new, sizeof(c->name));
745 c->name[sizeof(c->name) - 1] = 0;
746 c->options = options;
747 c->index = idx_new;
748 return i;
749 }
750 /* not found */
751 return -1;
752}
753
729#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND 754#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
730/** 755/**
731 * suspend_console - suspend the console subsystem 756 * suspend_console - suspend the console subsystem
@@ -942,6 +967,9 @@ void register_console(struct console *console)
942 if (preferred_console < 0 || bootconsole || !console_drivers) 967 if (preferred_console < 0 || bootconsole || !console_drivers)
943 preferred_console = selected_console; 968 preferred_console = selected_console;
944 969
970 if (console->early_setup)
971 console->early_setup();
972
945 /* 973 /*
946 * See if we want to use this console driver. If we 974 * See if we want to use this console driver. If we
947 * didn't select a console we take the first one 975 * didn't select a console we take the first one
@@ -985,12 +1013,15 @@ void register_console(struct console *console)
985 if (!(console->flags & CON_ENABLED)) 1013 if (!(console->flags & CON_ENABLED))
986 return; 1014 return;
987 1015
988 if (bootconsole) { 1016 if (bootconsole && (console->flags & CON_CONSDEV)) {
989 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", 1017 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n",
990 bootconsole->name, bootconsole->index, 1018 bootconsole->name, bootconsole->index,
991 console->name, console->index); 1019 console->name, console->index);
992 unregister_console(bootconsole); 1020 unregister_console(bootconsole);
993 console->flags &= ~CON_PRINTBUFFER; 1021 console->flags &= ~CON_PRINTBUFFER;
1022 } else {
1023 printk(KERN_INFO "console [%s%d] enabled\n",
1024 console->name, console->index);
994 } 1025 }
995 1026
996 /* 1027 /*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ad7949a589dd..b1d11f1c7cf7 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -161,6 +161,7 @@ int ptrace_may_attach(struct task_struct *task)
161int ptrace_attach(struct task_struct *task) 161int ptrace_attach(struct task_struct *task)
162{ 162{
163 int retval; 163 int retval;
164 unsigned long flags;
164 165
165 audit_ptrace(task); 166 audit_ptrace(task);
166 167
@@ -181,9 +182,7 @@ repeat:
181 * cpu's that may have task_lock). 182 * cpu's that may have task_lock).
182 */ 183 */
183 task_lock(task); 184 task_lock(task);
184 local_irq_disable(); 185 if (!write_trylock_irqsave(&tasklist_lock, flags)) {
185 if (!write_trylock(&tasklist_lock)) {
186 local_irq_enable();
187 task_unlock(task); 186 task_unlock(task);
188 do { 187 do {
189 cpu_relax(); 188 cpu_relax();
@@ -211,7 +210,7 @@ repeat:
211 force_sig_specific(SIGSTOP, task); 210 force_sig_specific(SIGSTOP, task);
212 211
213bad: 212bad:
214 write_unlock_irq(&tasklist_lock); 213 write_unlock_irqrestore(&tasklist_lock, flags);
215 task_unlock(task); 214 task_unlock(task);
216out: 215out:
217 return retval; 216 return retval;
diff --git a/kernel/relay.c b/kernel/relay.c
index 3b299fb3855c..a615a8f513fc 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1061,7 +1061,7 @@ static struct pipe_buf_operations relay_pipe_buf_ops = {
1061 .get = generic_pipe_buf_get, 1061 .get = generic_pipe_buf_get,
1062}; 1062};
1063 1063
1064/** 1064/*
1065 * subbuf_splice_actor - splice up to one subbuf's worth of data 1065 * subbuf_splice_actor - splice up to one subbuf's worth of data
1066 */ 1066 */
1067static int subbuf_splice_actor(struct file *in, 1067static int subbuf_splice_actor(struct file *in,
@@ -1074,7 +1074,9 @@ static int subbuf_splice_actor(struct file *in,
1074 unsigned int pidx, poff, total_len, subbuf_pages, ret; 1074 unsigned int pidx, poff, total_len, subbuf_pages, ret;
1075 struct rchan_buf *rbuf = in->private_data; 1075 struct rchan_buf *rbuf = in->private_data;
1076 unsigned int subbuf_size = rbuf->chan->subbuf_size; 1076 unsigned int subbuf_size = rbuf->chan->subbuf_size;
1077 size_t read_start = ((size_t)*ppos) % rbuf->chan->alloc_size; 1077 uint64_t pos = (uint64_t) *ppos;
1078 uint32_t alloc_size = (uint32_t) rbuf->chan->alloc_size;
1079 size_t read_start = (size_t) do_div(pos, alloc_size);
1078 size_t read_subbuf = read_start / subbuf_size; 1080 size_t read_subbuf = read_start / subbuf_size;
1079 size_t padding = rbuf->padding[read_subbuf]; 1081 size_t padding = rbuf->padding[read_subbuf];
1080 size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding; 1082 size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index da8d6bf46457..5aedbee014df 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -29,12 +29,6 @@
29 29
30#include "rtmutex_common.h" 30#include "rtmutex_common.h"
31 31
32#ifdef CONFIG_DEBUG_RT_MUTEXES
33# include "rtmutex-debug.h"
34#else
35# include "rtmutex.h"
36#endif
37
38# define TRACE_WARN_ON(x) WARN_ON(x) 32# define TRACE_WARN_ON(x) WARN_ON(x)
39# define TRACE_BUG_ON(x) BUG_ON(x) 33# define TRACE_BUG_ON(x) BUG_ON(x)
40 34
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 17d28ce20300..8cd9bd2cdb34 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -17,12 +17,6 @@
17 17
18#include "rtmutex_common.h" 18#include "rtmutex_common.h"
19 19
20#ifdef CONFIG_DEBUG_RT_MUTEXES
21# include "rtmutex-debug.h"
22#else
23# include "rtmutex.h"
24#endif
25
26/* 20/*
27 * lock->owner state tracking: 21 * lock->owner state tracking:
28 * 22 *
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 9c75856e791e..2d3b83593ca3 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -103,7 +103,7 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
103 103
104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) 104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
105{ 105{
106 return (struct task_struct *) 106 return (struct task_struct *)
107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
108} 108}
109 109
@@ -120,4 +120,11 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
120 struct task_struct *proxy_owner); 120 struct task_struct *proxy_owner);
121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, 121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
122 struct task_struct *proxy_owner); 122 struct task_struct *proxy_owner);
123
124#ifdef CONFIG_DEBUG_RT_MUTEXES
125# include "rtmutex-debug.h"
126#else
127# include "rtmutex.h"
128#endif
129
123#endif 130#endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 9fbced64bfee..3332bbb5d5cf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -750,8 +750,8 @@ static const u32 prio_to_wmult[40] = {
750 48356, 60446, 75558, 94446, 118058, 147573, 750 48356, 60446, 75558, 94446, 118058, 147573,
751 184467, 230589, 288233, 360285, 450347, 751 184467, 230589, 288233, 360285, 450347,
752 562979, 703746, 879575, 1099582, 1374389, 752 562979, 703746, 879575, 1099582, 1374389,
753 717986, 2147483, 2684354, 3355443, 4194304, 753 1717986, 2147483, 2684354, 3355443, 4194304,
754 244160, 6557201, 8196502, 10250518, 12782640, 754 5244160, 6557201, 8196502, 10250518, 12782640,
755 16025997, 19976592, 24970740, 31350126, 39045157, 755 16025997, 19976592, 24970740, 31350126, 39045157,
756 49367440, 61356675, 76695844, 95443717, 119304647, 756 49367440, 61356675, 76695844, 95443717, 119304647,
757 148102320, 186737708, 238609294, 286331153, 757 148102320, 186737708, 238609294, 286331153,
@@ -4647,14 +4647,14 @@ static void show_task(struct task_struct *p)
4647 state = p->state ? __ffs(p->state) + 1 : 0; 4647 state = p->state ? __ffs(p->state) + 1 : 0;
4648 printk("%-13.13s %c", p->comm, 4648 printk("%-13.13s %c", p->comm,
4649 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); 4649 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
4650#if (BITS_PER_LONG == 32) 4650#if BITS_PER_LONG == 32
4651 if (state == TASK_RUNNING) 4651 if (state == TASK_RUNNING)
4652 printk(" running "); 4652 printk(" running ");
4653 else 4653 else
4654 printk(" %08lX ", thread_saved_pc(p)); 4654 printk(" %08lx ", thread_saved_pc(p));
4655#else 4655#else
4656 if (state == TASK_RUNNING) 4656 if (state == TASK_RUNNING)
4657 printk(" running task "); 4657 printk(" running task ");
4658 else 4658 else
4659 printk(" %016lx ", thread_saved_pc(p)); 4659 printk(" %016lx ", thread_saved_pc(p));
4660#endif 4660#endif
@@ -4666,11 +4666,7 @@ static void show_task(struct task_struct *p)
4666 free = (unsigned long)n - (unsigned long)end_of_stack(p); 4666 free = (unsigned long)n - (unsigned long)end_of_stack(p);
4667 } 4667 }
4668#endif 4668#endif
4669 printk("%5lu %5d %6d", free, p->pid, p->parent->pid); 4669 printk("%5lu %5d %6d\n", free, p->pid, p->parent->pid);
4670 if (!p->mm)
4671 printk(" (L-TLB)\n");
4672 else
4673 printk(" (NOTLB)\n");
4674 4670
4675 if (state != TASK_RUNNING) 4671 if (state != TASK_RUNNING)
4676 show_stack(p, NULL); 4672 show_stack(p, NULL);
@@ -4680,14 +4676,12 @@ void show_state_filter(unsigned long state_filter)
4680{ 4676{
4681 struct task_struct *g, *p; 4677 struct task_struct *g, *p;
4682 4678
4683#if (BITS_PER_LONG == 32) 4679#if BITS_PER_LONG == 32
4684 printk("\n" 4680 printk(KERN_INFO
4685 " free sibling\n"); 4681 " task PC stack pid father\n");
4686 printk(" task PC stack pid father child younger older\n");
4687#else 4682#else
4688 printk("\n" 4683 printk(KERN_INFO
4689 " free sibling\n"); 4684 " task PC stack pid father\n");
4690 printk(" task PC stack pid father child younger older\n");
4691#endif 4685#endif
4692 read_lock(&tasklist_lock); 4686 read_lock(&tasklist_lock);
4693 do_each_thread(g, p) { 4687 do_each_thread(g, p) {
@@ -4778,7 +4772,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4778static inline void sched_init_granularity(void) 4772static inline void sched_init_granularity(void)
4779{ 4773{
4780 unsigned int factor = 1 + ilog2(num_online_cpus()); 4774 unsigned int factor = 1 + ilog2(num_online_cpus());
4781 const unsigned long gran_limit = 10000000; 4775 const unsigned long gran_limit = 100000000;
4782 4776
4783 sysctl_sched_granularity *= factor; 4777 sysctl_sched_granularity *= factor;
4784 if (sysctl_sched_granularity > gran_limit) 4778 if (sysctl_sched_granularity > gran_limit)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 1baf87cceb7c..29f2c21e7da2 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -171,7 +171,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
171 u64 now = ktime_to_ns(ktime_get()); 171 u64 now = ktime_to_ns(ktime_get());
172 int cpu; 172 int cpu;
173 173
174 SEQ_printf(m, "Sched Debug Version: v0.04, cfs-v20, %s %.*s\n", 174 SEQ_printf(m, "Sched Debug Version: v0.05, %s %.*s\n",
175 init_utsname()->release, 175 init_utsname()->release,
176 (int)strcspn(init_utsname()->version, " "), 176 (int)strcspn(init_utsname()->version, " "),
177 init_utsname()->version); 177 init_utsname()->version);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index c3391b6020e8..ad64fcb731f2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -10,6 +10,7 @@
10#include <linux/sched.h> 10#include <linux/sched.h>
11 11
12/* #define SECCOMP_DEBUG 1 */ 12/* #define SECCOMP_DEBUG 1 */
13#define NR_SECCOMP_MODES 1
13 14
14/* 15/*
15 * Secure computing mode 1 allows only read/write/exit/sigreturn. 16 * Secure computing mode 1 allows only read/write/exit/sigreturn.
@@ -54,3 +55,31 @@ void __secure_computing(int this_syscall)
54#endif 55#endif
55 do_exit(SIGKILL); 56 do_exit(SIGKILL);
56} 57}
58
59long prctl_get_seccomp(void)
60{
61 return current->seccomp.mode;
62}
63
64long prctl_set_seccomp(unsigned long seccomp_mode)
65{
66 long ret;
67
68 /* can set it only once to be even more secure */
69 ret = -EPERM;
70 if (unlikely(current->seccomp.mode))
71 goto out;
72
73 ret = -EINVAL;
74 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
75 current->seccomp.mode = seccomp_mode;
76 set_thread_flag(TIF_SECCOMP);
77#ifdef TIF_NOTSC
78 disable_TSC();
79#endif
80 ret = 0;
81 }
82
83 out:
84 return ret;
85}
diff --git a/kernel/signal.c b/kernel/signal.c
index f9405609774e..39d122753bac 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -718,6 +718,37 @@ out_set:
718#define LEGACY_QUEUE(sigptr, sig) \ 718#define LEGACY_QUEUE(sigptr, sig) \
719 (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) 719 (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
720 720
721int print_fatal_signals;
722
723static void print_fatal_signal(struct pt_regs *regs, int signr)
724{
725 printk("%s/%d: potentially unexpected fatal signal %d.\n",
726 current->comm, current->pid, signr);
727
728#ifdef __i386__
729 printk("code at %08lx: ", regs->eip);
730 {
731 int i;
732 for (i = 0; i < 16; i++) {
733 unsigned char insn;
734
735 __get_user(insn, (unsigned char *)(regs->eip + i));
736 printk("%02x ", insn);
737 }
738 }
739#endif
740 printk("\n");
741 show_regs(regs);
742}
743
744static int __init setup_print_fatal_signals(char *str)
745{
746 get_option (&str, &print_fatal_signals);
747
748 return 1;
749}
750
751__setup("print-fatal-signals=", setup_print_fatal_signals);
721 752
722static int 753static int
723specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) 754specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
@@ -1855,6 +1886,8 @@ relock:
1855 * Anything else is fatal, maybe with a core dump. 1886 * Anything else is fatal, maybe with a core dump.
1856 */ 1887 */
1857 current->flags |= PF_SIGNALED; 1888 current->flags |= PF_SIGNALED;
1889 if ((signr != SIGKILL) && print_fatal_signals)
1890 print_fatal_signal(regs, signr);
1858 if (sig_kernel_coredump(signr)) { 1891 if (sig_kernel_coredump(signr)) {
1859 /* 1892 /*
1860 * If it was able to dump core, this kills all 1893 * If it was able to dump core, this kills all
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 73217a9e2875..8de267790166 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -614,12 +614,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
614 kthread_bind(per_cpu(ksoftirqd, hotcpu), 614 kthread_bind(per_cpu(ksoftirqd, hotcpu),
615 any_online_cpu(cpu_online_map)); 615 any_online_cpu(cpu_online_map));
616 case CPU_DEAD: 616 case CPU_DEAD:
617 case CPU_DEAD_FROZEN: 617 case CPU_DEAD_FROZEN: {
618 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
619
618 p = per_cpu(ksoftirqd, hotcpu); 620 p = per_cpu(ksoftirqd, hotcpu);
619 per_cpu(ksoftirqd, hotcpu) = NULL; 621 per_cpu(ksoftirqd, hotcpu) = NULL;
622 sched_setscheduler(p, SCHED_FIFO, &param);
620 kthread_stop(p); 623 kthread_stop(p);
621 takeover_tasklets(hotcpu); 624 takeover_tasklets(hotcpu);
622 break; 625 break;
626 }
623#endif /* CONFIG_HOTPLUG_CPU */ 627#endif /* CONFIG_HOTPLUG_CPU */
624 } 628 }
625 return NOTIFY_OK; 629 return NOTIFY_OK;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index fcee2a8e6da3..319821ef78af 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -93,10 +93,6 @@ static void stopmachine_set_state(enum stopmachine_state state)
93static int stop_machine(void) 93static int stop_machine(void)
94{ 94{
95 int i, ret = 0; 95 int i, ret = 0;
96 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
97
98 /* One high-prio thread per cpu. We'll do this one. */
99 sched_setscheduler(current, SCHED_FIFO, &param);
100 96
101 atomic_set(&stopmachine_thread_ack, 0); 97 atomic_set(&stopmachine_thread_ack, 0);
102 stopmachine_num_threads = 0; 98 stopmachine_num_threads = 0;
@@ -189,6 +185,10 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
189 185
190 p = kthread_create(do_stop, &smdata, "kstopmachine"); 186 p = kthread_create(do_stop, &smdata, "kstopmachine");
191 if (!IS_ERR(p)) { 187 if (!IS_ERR(p)) {
188 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
189
190 /* One high-prio thread per cpu. We'll do this one. */
191 sched_setscheduler(p, SCHED_FIFO, &param);
192 kthread_bind(p, cpu); 192 kthread_bind(p, cpu);
193 wake_up_process(p); 193 wake_up_process(p);
194 wait_for_completion(&smdata.done); 194 wait_for_completion(&smdata.done);
diff --git a/kernel/sys.c b/kernel/sys.c
index 872271ccc384..4d141ae3e802 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -31,10 +31,12 @@
31#include <linux/cn_proc.h> 31#include <linux/cn_proc.h>
32#include <linux/getcpu.h> 32#include <linux/getcpu.h>
33#include <linux/task_io_accounting_ops.h> 33#include <linux/task_io_accounting_ops.h>
34#include <linux/seccomp.h>
34 35
35#include <linux/compat.h> 36#include <linux/compat.h>
36#include <linux/syscalls.h> 37#include <linux/syscalls.h>
37#include <linux/kprobes.h> 38#include <linux/kprobes.h>
39#include <linux/user_namespace.h>
38 40
39#include <asm/uaccess.h> 41#include <asm/uaccess.h>
40#include <asm/io.h> 42#include <asm/io.h>
@@ -1078,13 +1080,13 @@ static int set_user(uid_t new_ruid, int dumpclear)
1078{ 1080{
1079 struct user_struct *new_user; 1081 struct user_struct *new_user;
1080 1082
1081 new_user = alloc_uid(new_ruid); 1083 new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
1082 if (!new_user) 1084 if (!new_user)
1083 return -EAGAIN; 1085 return -EAGAIN;
1084 1086
1085 if (atomic_read(&new_user->processes) >= 1087 if (atomic_read(&new_user->processes) >=
1086 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 1088 current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
1087 new_user != &root_user) { 1089 new_user != current->nsproxy->user_ns->root_user) {
1088 free_uid(new_user); 1090 free_uid(new_user);
1089 return -EAGAIN; 1091 return -EAGAIN;
1090 } 1092 }
@@ -2241,6 +2243,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
2241 error = SET_ENDIAN(current, arg2); 2243 error = SET_ENDIAN(current, arg2);
2242 break; 2244 break;
2243 2245
2246 case PR_GET_SECCOMP:
2247 error = prctl_get_seccomp();
2248 break;
2249 case PR_SET_SECCOMP:
2250 error = prctl_set_seccomp(arg2);
2251 break;
2252
2244 default: 2253 default:
2245 error = -EINVAL; 2254 error = -EINVAL;
2246 break; 2255 break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7e11e2c98bf9..b0ec498a18d9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -14,6 +14,7 @@ asmlinkage long sys_ni_syscall(void)
14 14
15cond_syscall(sys_nfsservctl); 15cond_syscall(sys_nfsservctl);
16cond_syscall(sys_quotactl); 16cond_syscall(sys_quotactl);
17cond_syscall(sys32_quotactl);
17cond_syscall(sys_acct); 18cond_syscall(sys_acct);
18cond_syscall(sys_lookup_dcookie); 19cond_syscall(sys_lookup_dcookie);
19cond_syscall(sys_swapon); 20cond_syscall(sys_swapon);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 51f5dac42a00..7dca326648d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
61 61
62/* External variables not in a header file. */ 62/* External variables not in a header file. */
63extern int C_A_D; 63extern int C_A_D;
64extern int print_fatal_signals;
64extern int sysctl_overcommit_memory; 65extern int sysctl_overcommit_memory;
65extern int sysctl_overcommit_ratio; 66extern int sysctl_overcommit_ratio;
66extern int sysctl_panic_on_oom; 67extern int sysctl_panic_on_oom;
@@ -202,7 +203,10 @@ static ctl_table root_table[] = {
202 .mode = 0555, 203 .mode = 0555,
203 .child = dev_table, 204 .child = dev_table,
204 }, 205 },
205 206/*
207 * NOTE: do not add new entries to this table unless you have read
208 * Documentation/sysctl/ctl_unnumbered.txt
209 */
206 { .ctl_name = 0 } 210 { .ctl_name = 0 }
207}; 211};
208 212
@@ -340,6 +344,14 @@ static ctl_table kern_table[] = {
340 .proc_handler = &proc_dointvec, 344 .proc_handler = &proc_dointvec,
341 }, 345 },
342#endif 346#endif
347 {
348 .ctl_name = CTL_UNNUMBERED,
349 .procname = "print-fatal-signals",
350 .data = &print_fatal_signals,
351 .maxlen = sizeof(int),
352 .mode = 0644,
353 .proc_handler = &proc_dointvec,
354 },
343#ifdef __sparc__ 355#ifdef __sparc__
344 { 356 {
345 .ctl_name = KERN_SPARC_REBOOT, 357 .ctl_name = KERN_SPARC_REBOOT,
@@ -949,6 +961,27 @@ static ctl_table vm_table[] = {
949 .strategy = &sysctl_jiffies, 961 .strategy = &sysctl_jiffies,
950 }, 962 },
951#endif 963#endif
964#ifdef CONFIG_SECURITY
965 {
966 .ctl_name = CTL_UNNUMBERED,
967 .procname = "mmap_min_addr",
968 .data = &mmap_min_addr,
969 .maxlen = sizeof(unsigned long),
970 .mode = 0644,
971 .proc_handler = &proc_doulongvec_minmax,
972 },
973#ifdef CONFIG_NUMA
974 {
975 .ctl_name = CTL_UNNUMBERED,
976 .procname = "numa_zonelist_order",
977 .data = &numa_zonelist_order,
978 .maxlen = NUMA_ZONELIST_ORDER_LEN,
979 .mode = 0644,
980 .proc_handler = &numa_zonelist_order_handler,
981 .strategy = &sysctl_string,
982 },
983#endif
984#endif
952#if defined(CONFIG_X86_32) || \ 985#if defined(CONFIG_X86_32) || \
953 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 986 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
954 { 987 {
@@ -962,6 +995,14 @@ static ctl_table vm_table[] = {
962 .extra1 = &zero, 995 .extra1 = &zero,
963 }, 996 },
964#endif 997#endif
998/*
999 * NOTE: do not add new entries to this table unless you have read
1000 * Documentation/sysctl/ctl_unnumbered.txt
1001 */
1002/*
1003 * NOTE: do not add new entries to this table unless you have read
1004 * Documentation/sysctl/ctl_unnumbered.txt
1005 */
965 { .ctl_name = 0 } 1006 { .ctl_name = 0 }
966}; 1007};
967 1008
@@ -1102,6 +1143,14 @@ static ctl_table fs_table[] = {
1102 .child = binfmt_misc_table, 1143 .child = binfmt_misc_table,
1103 }, 1144 },
1104#endif 1145#endif
1146/*
1147 * NOTE: do not add new entries to this table unless you have read
1148 * Documentation/sysctl/ctl_unnumbered.txt
1149 */
1150/*
1151 * NOTE: do not add new entries to this table unless you have read
1152 * Documentation/sysctl/ctl_unnumbered.txt
1153 */
1105 { .ctl_name = 0 } 1154 { .ctl_name = 0 }
1106}; 1155};
1107 1156
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 906cae771585..059431ed67db 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
196 196
197 /* fill in basic acct fields */ 197 /* fill in basic acct fields */
198 stats->version = TASKSTATS_VERSION; 198 stats->version = TASKSTATS_VERSION;
199 stats->nvcsw = tsk->nvcsw;
200 stats->nivcsw = tsk->nivcsw;
199 bacct_add_tsk(stats, tsk); 201 bacct_add_tsk(stats, tsk);
200 202
201 /* fill in extended acct fields */ 203 /* fill in extended acct fields */
@@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
242 */ 244 */
243 delayacct_add_tsk(stats, tsk); 245 delayacct_add_tsk(stats, tsk);
244 246
247 stats->nvcsw += tsk->nvcsw;
248 stats->nivcsw += tsk->nivcsw;
245 } while_each_thread(first, tsk); 249 } while_each_thread(first, tsk);
246 250
247 unlock_task_sighand(first, &flags); 251 unlock_task_sighand(first, &flags);
diff --git a/kernel/time.c b/kernel/time.c
index f04791f69408..ffe19149d770 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz);
57 */ 57 */
58asmlinkage long sys_time(time_t __user * tloc) 58asmlinkage long sys_time(time_t __user * tloc)
59{ 59{
60 time_t i; 60 /*
61 struct timeval tv; 61 * We read xtime.tv_sec atomically - it's updated
62 * atomically by update_wall_time(), so no need to
63 * even read-lock the xtime seqlock:
64 */
65 time_t i = xtime.tv_sec;
62 66
63 do_gettimeofday(&tv); 67 smp_rmb(); /* sys_time() results are coherent */
64 i = tv.tv_sec;
65 68
66 if (tloc) { 69 if (tloc) {
67 if (put_user(i,tloc)) 70 if (put_user(i, tloc))
68 i = -EFAULT; 71 i = -EFAULT;
69 } 72 }
70 return i; 73 return i;
@@ -373,12 +376,25 @@ void do_gettimeofday (struct timeval *tv)
373 376
374 tv->tv_sec = sec; 377 tv->tv_sec = sec;
375 tv->tv_usec = usec; 378 tv->tv_usec = usec;
376}
377 379
380 /*
381 * Make sure xtime.tv_sec [returned by sys_time()] always
382 * follows the gettimeofday() result precisely. This
383 * condition is extremely unlikely, it can hit at most
384 * once per second:
385 */
386 if (unlikely(xtime.tv_sec != tv->tv_sec)) {
387 unsigned long flags;
388
389 write_seqlock_irqsave(&xtime_lock, flags);
390 update_wall_time();
391 write_sequnlock_irqrestore(&xtime_lock, flags);
392 }
393}
378EXPORT_SYMBOL(do_gettimeofday); 394EXPORT_SYMBOL(do_gettimeofday);
379 395
396#else /* CONFIG_TIME_INTERPOLATION */
380 397
381#else
382#ifndef CONFIG_GENERIC_TIME 398#ifndef CONFIG_GENERIC_TIME
383/* 399/*
384 * Simulate gettimeofday using do_gettimeofday which only allows a timeval 400 * Simulate gettimeofday using do_gettimeofday which only allows a timeval
@@ -394,7 +410,7 @@ void getnstimeofday(struct timespec *tv)
394} 410}
395EXPORT_SYMBOL_GPL(getnstimeofday); 411EXPORT_SYMBOL_GPL(getnstimeofday);
396#endif 412#endif
397#endif 413#endif /* CONFIG_TIME_INTERPOLATION */
398 414
399/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. 415/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
400 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 416 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 76212b2a99de..2ad1c37b8dfe 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -205,47 +205,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
205} 205}
206 206
207/** 207/**
208 * clockevents_request_device
209 */
210struct clock_event_device *clockevents_request_device(unsigned int features,
211 cpumask_t cpumask)
212{
213 struct clock_event_device *cur, *dev = NULL;
214 struct list_head *tmp;
215
216 spin_lock(&clockevents_lock);
217
218 list_for_each(tmp, &clockevent_devices) {
219 cur = list_entry(tmp, struct clock_event_device, list);
220
221 if ((cur->features & features) == features &&
222 cpus_equal(cpumask, cur->cpumask)) {
223 if (!dev || dev->rating < cur->rating)
224 dev = cur;
225 }
226 }
227
228 clockevents_exchange_device(NULL, dev);
229
230 spin_unlock(&clockevents_lock);
231
232 return dev;
233}
234
235/**
236 * clockevents_release_device
237 */
238void clockevents_release_device(struct clock_event_device *dev)
239{
240 spin_lock(&clockevents_lock);
241
242 clockevents_exchange_device(dev, NULL);
243 clockevents_notify_released();
244
245 spin_unlock(&clockevents_lock);
246}
247
248/**
249 * clockevents_notify - notification about relevant events 208 * clockevents_notify - notification about relevant events
250 */ 209 */
251void clockevents_notify(unsigned long reason, void *arg) 210void clockevents_notify(unsigned long reason, void *arg)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index cf53bb5814cb..438c6b723ee2 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -13,7 +13,7 @@
13#include <linux/timex.h> 13#include <linux/timex.h>
14#include <linux/jiffies.h> 14#include <linux/jiffies.h>
15#include <linux/hrtimer.h> 15#include <linux/hrtimer.h>
16 16#include <linux/capability.h>
17#include <asm/div64.h> 17#include <asm/div64.h>
18#include <asm/timex.h> 18#include <asm/timex.h>
19 19
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3d1042f82a68..728cedfd3cbd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -36,9 +36,17 @@ EXPORT_SYMBOL(xtime_lock);
36 * at zero at system boot time, so wall_to_monotonic will be negative, 36 * at zero at system boot time, so wall_to_monotonic will be negative,
37 * however, we will ALWAYS keep the tv_nsec part positive so we can use 37 * however, we will ALWAYS keep the tv_nsec part positive so we can use
38 * the usual normalization. 38 * the usual normalization.
39 *
40 * wall_to_monotonic is moved after resume from suspend for the monotonic
41 * time not to jump. We need to add total_sleep_time to wall_to_monotonic
42 * to get the real boot based time offset.
43 *
44 * - wall_to_monotonic is no longer the boot time, getboottime must be
45 * used instead.
39 */ 46 */
40struct timespec xtime __attribute__ ((aligned (16))); 47struct timespec xtime __attribute__ ((aligned (16)));
41struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 48struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
49static unsigned long total_sleep_time; /* seconds */
42 50
43EXPORT_SYMBOL(xtime); 51EXPORT_SYMBOL(xtime);
44 52
@@ -251,6 +259,7 @@ void __init timekeeping_init(void)
251 xtime.tv_nsec = 0; 259 xtime.tv_nsec = 0;
252 set_normalized_timespec(&wall_to_monotonic, 260 set_normalized_timespec(&wall_to_monotonic,
253 -xtime.tv_sec, -xtime.tv_nsec); 261 -xtime.tv_sec, -xtime.tv_nsec);
262 total_sleep_time = 0;
254 263
255 write_sequnlock_irqrestore(&xtime_lock, flags); 264 write_sequnlock_irqrestore(&xtime_lock, flags);
256} 265}
@@ -282,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev)
282 291
283 xtime.tv_sec += sleep_length; 292 xtime.tv_sec += sleep_length;
284 wall_to_monotonic.tv_sec -= sleep_length; 293 wall_to_monotonic.tv_sec -= sleep_length;
294 total_sleep_time += sleep_length;
285 } 295 }
286 /* re-base the last cycle value */ 296 /* re-base the last cycle value */
287 clock->cycle_last = clocksource_read(clock); 297 clock->cycle_last = clocksource_read(clock);
@@ -476,3 +486,30 @@ void update_wall_time(void)
476 change_clocksource(); 486 change_clocksource();
477 update_vsyscall(&xtime, clock); 487 update_vsyscall(&xtime, clock);
478} 488}
489
490/**
491 * getboottime - Return the real time of system boot.
492 * @ts: pointer to the timespec to be set
493 *
494 * Returns the time of day in a timespec.
495 *
496 * This is based on the wall_to_monotonic offset and the total suspend
497 * time. Calls to settimeofday will affect the value returned (which
498 * basically means that however wrong your real time clock is at boot time,
499 * you get the right time here).
500 */
501void getboottime(struct timespec *ts)
502{
503 set_normalized_timespec(ts,
504 - (wall_to_monotonic.tv_sec + total_sleep_time),
505 - wall_to_monotonic.tv_nsec);
506}
507
508/**
509 * monotonic_to_bootbased - Convert the monotonic time to boot based.
510 * @ts: pointer to the timespec to be converted
511 */
512void monotonic_to_bootbased(struct timespec *ts)
513{
514 ts->tv_sec += total_sleep_time;
515}
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 321693724ad7..9b8a826236dd 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -68,6 +68,7 @@ struct entry {
68 * Number of timeout events: 68 * Number of timeout events:
69 */ 69 */
70 unsigned long count; 70 unsigned long count;
71 unsigned int timer_flag;
71 72
72 /* 73 /*
73 * We save the command-line string to preserve 74 * We save the command-line string to preserve
@@ -231,7 +232,8 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
231 * incremented. Otherwise the timer is registered in a free slot. 232 * incremented. Otherwise the timer is registered in a free slot.
232 */ 233 */
233void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 234void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
234 void *timerf, char * comm) 235 void *timerf, char *comm,
236 unsigned int timer_flag)
235{ 237{
236 /* 238 /*
237 * It doesnt matter which lock we take: 239 * It doesnt matter which lock we take:
@@ -249,6 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
249 input.start_func = startf; 251 input.start_func = startf;
250 input.expire_func = timerf; 252 input.expire_func = timerf;
251 input.pid = pid; 253 input.pid = pid;
254 input.timer_flag = timer_flag;
252 255
253 spin_lock_irqsave(lock, flags); 256 spin_lock_irqsave(lock, flags);
254 if (!active) 257 if (!active)
@@ -295,7 +298,7 @@ static int tstats_show(struct seq_file *m, void *v)
295 period = ktime_to_timespec(time); 298 period = ktime_to_timespec(time);
296 ms = period.tv_nsec / 1000000; 299 ms = period.tv_nsec / 1000000;
297 300
298 seq_puts(m, "Timer Stats Version: v0.1\n"); 301 seq_puts(m, "Timer Stats Version: v0.2\n");
299 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); 302 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
300 if (atomic_read(&overflow_count)) 303 if (atomic_read(&overflow_count))
301 seq_printf(m, "Overflow: %d entries\n", 304 seq_printf(m, "Overflow: %d entries\n",
@@ -303,8 +306,13 @@ static int tstats_show(struct seq_file *m, void *v)
303 306
304 for (i = 0; i < nr_entries; i++) { 307 for (i = 0; i < nr_entries; i++) {
305 entry = entries + i; 308 entry = entries + i;
306 seq_printf(m, "%4lu, %5d %-16s ", 309 if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
310 seq_printf(m, "%4luD, %5d %-16s ",
307 entry->count, entry->pid, entry->comm); 311 entry->count, entry->pid, entry->comm);
312 } else {
313 seq_printf(m, " %4lu, %5d %-16s ",
314 entry->count, entry->pid, entry->comm);
315 }
308 316
309 print_name_offset(m, (unsigned long)entry->start_func); 317 print_name_offset(m, (unsigned long)entry->start_func);
310 seq_puts(m, " ("); 318 seq_puts(m, " (");
diff --git a/kernel/timer.c b/kernel/timer.c
index 1a69705c2fb9..1258371e0d2b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -305,6 +305,20 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
306 timer->start_pid = current->pid; 306 timer->start_pid = current->pid;
307} 307}
308
309static void timer_stats_account_timer(struct timer_list *timer)
310{
311 unsigned int flag = 0;
312
313 if (unlikely(tbase_get_deferrable(timer->base)))
314 flag |= TIMER_STATS_FLAG_DEFERRABLE;
315
316 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
317 timer->function, timer->start_comm, flag);
318}
319
320#else
321static void timer_stats_account_timer(struct timer_list *timer) {}
308#endif 322#endif
309 323
310/** 324/**
@@ -1114,6 +1128,7 @@ int do_sysinfo(struct sysinfo *info)
1114 getnstimeofday(&tp); 1128 getnstimeofday(&tp);
1115 tp.tv_sec += wall_to_monotonic.tv_sec; 1129 tp.tv_sec += wall_to_monotonic.tv_sec;
1116 tp.tv_nsec += wall_to_monotonic.tv_nsec; 1130 tp.tv_nsec += wall_to_monotonic.tv_nsec;
1131 monotonic_to_bootbased(&tp);
1117 if (tp.tv_nsec - NSEC_PER_SEC >= 0) { 1132 if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
1118 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; 1133 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1119 tp.tv_sec++; 1134 tp.tv_sec++;
diff --git a/kernel/user.c b/kernel/user.c
index 4869563080e9..98b82507797a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -14,20 +14,19 @@
14#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/key.h> 15#include <linux/key.h>
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/module.h>
18#include <linux/user_namespace.h>
17 19
18/* 20/*
19 * UID task count cache, to get fast user lookup in "alloc_uid" 21 * UID task count cache, to get fast user lookup in "alloc_uid"
20 * when changing user ID's (ie setuid() and friends). 22 * when changing user ID's (ie setuid() and friends).
21 */ 23 */
22 24
23#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
24#define UIDHASH_SZ (1 << UIDHASH_BITS)
25#define UIDHASH_MASK (UIDHASH_SZ - 1) 25#define UIDHASH_MASK (UIDHASH_SZ - 1)
26#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) 26#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
27#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) 27#define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid)))
28 28
29static struct kmem_cache *uid_cachep; 29static struct kmem_cache *uid_cachep;
30static struct list_head uidhash_table[UIDHASH_SZ];
31 30
32/* 31/*
33 * The uidhash_lock is mostly taken from process context, but it is 32 * The uidhash_lock is mostly taken from process context, but it is
@@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid)
94{ 93{
95 struct user_struct *ret; 94 struct user_struct *ret;
96 unsigned long flags; 95 unsigned long flags;
96 struct user_namespace *ns = current->nsproxy->user_ns;
97 97
98 spin_lock_irqsave(&uidhash_lock, flags); 98 spin_lock_irqsave(&uidhash_lock, flags);
99 ret = uid_hash_find(uid, uidhashentry(uid)); 99 ret = uid_hash_find(uid, uidhashentry(ns, uid));
100 spin_unlock_irqrestore(&uidhash_lock, flags); 100 spin_unlock_irqrestore(&uidhash_lock, flags);
101 return ret; 101 return ret;
102} 102}
@@ -120,9 +120,9 @@ void free_uid(struct user_struct *up)
120 } 120 }
121} 121}
122 122
123struct user_struct * alloc_uid(uid_t uid) 123struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
124{ 124{
125 struct list_head *hashent = uidhashentry(uid); 125 struct list_head *hashent = uidhashentry(ns, uid);
126 struct user_struct *up; 126 struct user_struct *up;
127 127
128 spin_lock_irq(&uidhash_lock); 128 spin_lock_irq(&uidhash_lock);
@@ -211,11 +211,11 @@ static int __init uid_cache_init(void)
211 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 211 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
212 212
213 for(n = 0; n < UIDHASH_SZ; ++n) 213 for(n = 0; n < UIDHASH_SZ; ++n)
214 INIT_LIST_HEAD(uidhash_table + n); 214 INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
215 215
216 /* Insert the root user immediately (init already runs as root) */ 216 /* Insert the root user immediately (init already runs as root) */
217 spin_lock_irq(&uidhash_lock); 217 spin_lock_irq(&uidhash_lock);
218 uid_hash_insert(&root_user, uidhashentry(0)); 218 uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
219 spin_unlock_irq(&uidhash_lock); 219 spin_unlock_irq(&uidhash_lock);
220 220
221 return 0; 221 return 0;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
new file mode 100644
index 000000000000..d055d987850c
--- /dev/null
+++ b/kernel/user_namespace.c
@@ -0,0 +1,87 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License as
4 * published by the Free Software Foundation, version 2 of the
5 * License.
6 */
7
8#include <linux/module.h>
9#include <linux/version.h>
10#include <linux/nsproxy.h>
11#include <linux/user_namespace.h>
12
13struct user_namespace init_user_ns = {
14 .kref = {
15 .refcount = ATOMIC_INIT(2),
16 },
17 .root_user = &root_user,
18};
19
20EXPORT_SYMBOL_GPL(init_user_ns);
21
22#ifdef CONFIG_USER_NS
23
24/*
25 * Clone a new ns copying an original user ns, setting refcount to 1
26 * @old_ns: namespace to clone
27 * Return NULL on error (failure to kmalloc), new ns otherwise
28 */
29static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
30{
31 struct user_namespace *ns;
32 struct user_struct *new_user;
33 int n;
34
35 ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
36 if (!ns)
37 return ERR_PTR(-ENOMEM);
38
39 kref_init(&ns->kref);
40
41 for (n = 0; n < UIDHASH_SZ; ++n)
42 INIT_LIST_HEAD(ns->uidhash_table + n);
43
44 /* Insert new root user. */
45 ns->root_user = alloc_uid(ns, 0);
46 if (!ns->root_user) {
47 kfree(ns);
48 return ERR_PTR(-ENOMEM);
49 }
50
51 /* Reset current->user with a new one */
52 new_user = alloc_uid(ns, current->uid);
53 if (!new_user) {
54 free_uid(ns->root_user);
55 kfree(ns);
56 return ERR_PTR(-ENOMEM);
57 }
58
59 switch_uid(new_user);
60 return ns;
61}
62
63struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
64{
65 struct user_namespace *new_ns;
66
67 BUG_ON(!old_ns);
68 get_user_ns(old_ns);
69
70 if (!(flags & CLONE_NEWUSER))
71 return old_ns;
72
73 new_ns = clone_user_ns(old_ns);
74
75 put_user_ns(old_ns);
76 return new_ns;
77}
78
79void free_user_ns(struct kref *kref)
80{
81 struct user_namespace *ns;
82
83 ns = container_of(kref, struct user_namespace, kref);
84 kfree(ns);
85}
86
87#endif /* CONFIG_USER_NS */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 160c8c5136bd..9d8180a0f0d8 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -13,6 +13,7 @@
13#include <linux/uts.h> 13#include <linux/uts.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/version.h> 15#include <linux/version.h>
16#include <linux/err.h>
16 17
17/* 18/*
18 * Clone a new ns copying an original utsname, setting refcount to 1 19 * Clone a new ns copying an original utsname, setting refcount to 1
@@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
24 struct uts_namespace *ns; 25 struct uts_namespace *ns;
25 26
26 ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); 27 ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
27 if (ns) { 28 if (!ns)
28 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 29 return ERR_PTR(-ENOMEM);
29 kref_init(&ns->kref); 30
30 } 31 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
32 kref_init(&ns->kref);
31 return ns; 33 return ns;
32} 34}
33 35
@@ -37,7 +39,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
37 * utsname of this process won't be seen by parent, and vice 39 * utsname of this process won't be seen by parent, and vice
38 * versa. 40 * versa.
39 */ 41 */
40struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) 42struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
41{ 43{
42 struct uts_namespace *new_ns; 44 struct uts_namespace *new_ns;
43 45
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index f22b9dbd2a9c..c76c06466bfd 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -18,10 +18,7 @@
18static void *get_uts(ctl_table *table, int write) 18static void *get_uts(ctl_table *table, int write)
19{ 19{
20 char *which = table->data; 20 char *which = table->data;
21#ifdef CONFIG_UTS_NS 21
22 struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
23 which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
24#endif
25 if (!write) 22 if (!write)
26 down_read(&uts_sem); 23 down_read(&uts_sem);
27 else 24 else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3bebf73be976..d7d3fa3072e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
382EXPORT_SYMBOL_GPL(flush_workqueue); 382EXPORT_SYMBOL_GPL(flush_workqueue);
383 383
384/* 384/*
385 * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit, 385 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
386 * so this work can't be re-armed in any way. 386 * so this work can't be re-armed in any way.
387 */ 387 */
388static int try_to_grab_pending(struct work_struct *work) 388static int try_to_grab_pending(struct work_struct *work)
389{ 389{
390 struct cpu_workqueue_struct *cwq; 390 struct cpu_workqueue_struct *cwq;
391 int ret = 0; 391 int ret = -1;
392 392
393 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) 393 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
394 return 1; 394 return 0;
395 395
396 /* 396 /*
397 * The queueing is in progress, or it is already queued. Try to 397 * The queueing is in progress, or it is already queued. Try to
@@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work)
457 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 457 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
458} 458}
459 459
460static int __cancel_work_timer(struct work_struct *work,
461 struct timer_list* timer)
462{
463 int ret;
464
465 do {
466 ret = (timer && likely(del_timer(timer)));
467 if (!ret)
468 ret = try_to_grab_pending(work);
469 wait_on_work(work);
470 } while (unlikely(ret < 0));
471
472 work_clear_pending(work);
473 return ret;
474}
475
460/** 476/**
461 * cancel_work_sync - block until a work_struct's callback has terminated 477 * cancel_work_sync - block until a work_struct's callback has terminated
462 * @work: the work which is to be flushed 478 * @work: the work which is to be flushed
463 * 479 *
480 * Returns true if @work was pending.
481 *
464 * cancel_work_sync() will cancel the work if it is queued. If the work's 482 * cancel_work_sync() will cancel the work if it is queued. If the work's
465 * callback appears to be running, cancel_work_sync() will block until it 483 * callback appears to be running, cancel_work_sync() will block until it
466 * has completed. 484 * has completed.
@@ -476,31 +494,26 @@ static void wait_on_work(struct work_struct *work)
476 * The caller must ensure that workqueue_struct on which this work was last 494 * The caller must ensure that workqueue_struct on which this work was last
477 * queued can't be destroyed before this function returns. 495 * queued can't be destroyed before this function returns.
478 */ 496 */
479void cancel_work_sync(struct work_struct *work) 497int cancel_work_sync(struct work_struct *work)
480{ 498{
481 while (!try_to_grab_pending(work)) 499 return __cancel_work_timer(work, NULL);
482 cpu_relax();
483 wait_on_work(work);
484 work_clear_pending(work);
485} 500}
486EXPORT_SYMBOL_GPL(cancel_work_sync); 501EXPORT_SYMBOL_GPL(cancel_work_sync);
487 502
488/** 503/**
489 * cancel_rearming_delayed_work - reliably kill off a delayed work. 504 * cancel_delayed_work_sync - reliably kill off a delayed work.
490 * @dwork: the delayed work struct 505 * @dwork: the delayed work struct
491 * 506 *
507 * Returns true if @dwork was pending.
508 *
492 * It is possible to use this function if @dwork rearms itself via queue_work() 509 * It is possible to use this function if @dwork rearms itself via queue_work()
493 * or queue_delayed_work(). See also the comment for cancel_work_sync(). 510 * or queue_delayed_work(). See also the comment for cancel_work_sync().
494 */ 511 */
495void cancel_rearming_delayed_work(struct delayed_work *dwork) 512int cancel_delayed_work_sync(struct delayed_work *dwork)
496{ 513{
497 while (!del_timer(&dwork->timer) && 514 return __cancel_work_timer(&dwork->work, &dwork->timer);
498 !try_to_grab_pending(&dwork->work))
499 cpu_relax();
500 wait_on_work(&dwork->work);
501 work_clear_pending(&dwork->work);
502} 515}
503EXPORT_SYMBOL(cancel_rearming_delayed_work); 516EXPORT_SYMBOL(cancel_delayed_work_sync);
504 517
505static struct workqueue_struct *keventd_wq __read_mostly; 518static struct workqueue_struct *keventd_wq __read_mostly;
506 519