aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c7
-rw-r--r--kernel/audit.c10
-rw-r--r--kernel/cgroup.c15
-rw-r--r--kernel/compat.c84
-rw-r--r--kernel/configs.c2
-rw-r--r--kernel/cpu/idle.c11
-rw-r--r--kernel/cpuset.c15
-rw-r--r--kernel/debug/debug_core.c2
-rw-r--r--kernel/events/ring_buffer.c14
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/extable.c6
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/irq/proc.c20
-rw-r--r--kernel/kallsyms.c26
-rw-r--r--kernel/kexec.c30
-rw-r--r--kernel/kmod.c98
-rw-r--r--kernel/kthread.c19
-rw-r--r--kernel/modsign_certificate.S13
-rw-r--r--kernel/module.c18
-rw-r--r--kernel/nsproxy.c6
-rw-r--r--kernel/panic.c6
-rw-r--r--kernel/pid.c12
-rw-r--r--kernel/pid_namespace.c4
-rw-r--r--kernel/posix-timers.c121
-rw-r--r--kernel/power/console.c116
-rw-r--r--kernel/power/poweroff.c2
-rw-r--r--kernel/power/suspend.c22
-rw-r--r--kernel/printk.c62
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/ptrace.c80
-rw-r--r--kernel/range.c3
-rw-r--r--kernel/rcutree_plugin.h4
-rw-r--r--kernel/rcutree_trace.c8
-rw-r--r--kernel/relay.c14
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/cputime.c80
-rw-r--r--kernel/sched/stats.c7
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/semaphore.c8
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/smp.c91
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/sys.c235
-rw-r--r--kernel/sys_ni.c3
-rw-r--r--kernel/time.c11
-rw-r--r--kernel/time/ntp.c105
-rw-r--r--kernel/time/ntp_internal.h12
-rw-r--r--kernel/time/tick-broadcast.c239
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/tick-internal.h5
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/time/timekeeping.c396
-rw-r--r--kernel/time/timer_list.c104
-rw-r--r--kernel/timer.c143
-rw-r--r--kernel/uid16.c55
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/user_namespace.c2
-rw-r--r--kernel/utsname.c2
-rw-r--r--kernel/workqueue.c79
-rw-r--r--kernel/workqueue_internal.h12
62 files changed, 1662 insertions, 851 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d1574d47cf27..271fd3119af9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -176,7 +176,7 @@ signing_key.priv signing_key.x509: x509.genkey
176 openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \ 176 openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
177 -batch -x509 -config x509.genkey \ 177 -batch -x509 -config x509.genkey \
178 -outform DER -out signing_key.x509 \ 178 -outform DER -out signing_key.x509 \
179 -keyout signing_key.priv 179 -keyout signing_key.priv 2>&1
180 @echo "###" 180 @echo "###"
181 @echo "### Key pair generated." 181 @echo "### Key pair generated."
182 @echo "###" 182 @echo "###"
diff --git a/kernel/acct.c b/kernel/acct.c
index b9bd7f098ee5..8d6e145138bb 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -540,6 +540,12 @@ static void do_acct_process(struct bsd_acct_struct *acct,
540 ac.ac_swaps = encode_comp_t(0); 540 ac.ac_swaps = encode_comp_t(0);
541 541
542 /* 542 /*
543 * Get freeze protection. If the fs is frozen, just skip the write
544 * as we could deadlock the system otherwise.
545 */
546 if (!file_start_write_trylock(file))
547 goto out;
548 /*
543 * Kernel segment override to datasegment and write it 549 * Kernel segment override to datasegment and write it
544 * to the accounting file. 550 * to the accounting file.
545 */ 551 */
@@ -554,6 +560,7 @@ static void do_acct_process(struct bsd_acct_struct *acct,
554 sizeof(acct_t), &file->f_pos); 560 sizeof(acct_t), &file->f_pos);
555 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; 561 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
556 set_fs(fs); 562 set_fs(fs);
563 file_end_write(file);
557out: 564out:
558 revert_creds(orig_cred); 565 revert_creds(orig_cred);
559} 566}
diff --git a/kernel/audit.c b/kernel/audit.c
index 9816a1b96cfc..0b084fa44b1f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -58,7 +58,7 @@
58#ifdef CONFIG_SECURITY 58#ifdef CONFIG_SECURITY
59#include <linux/security.h> 59#include <linux/security.h>
60#endif 60#endif
61#include <linux/netlink.h> 61#include <net/netlink.h>
62#include <linux/freezer.h> 62#include <linux/freezer.h>
63#include <linux/tty.h> 63#include <linux/tty.h>
64#include <linux/pid_namespace.h> 64#include <linux/pid_namespace.h>
@@ -910,7 +910,7 @@ static void audit_receive_skb(struct sk_buff *skb)
910{ 910{
911 struct nlmsghdr *nlh; 911 struct nlmsghdr *nlh;
912 /* 912 /*
913 * len MUST be signed for NLMSG_NEXT to be able to dec it below 0 913 * len MUST be signed for nlmsg_next to be able to dec it below 0
914 * if the nlmsg_len was not aligned 914 * if the nlmsg_len was not aligned
915 */ 915 */
916 int len; 916 int len;
@@ -919,13 +919,13 @@ static void audit_receive_skb(struct sk_buff *skb)
919 nlh = nlmsg_hdr(skb); 919 nlh = nlmsg_hdr(skb);
920 len = skb->len; 920 len = skb->len;
921 921
922 while (NLMSG_OK(nlh, len)) { 922 while (nlmsg_ok(nlh, len)) {
923 err = audit_receive_msg(skb, nlh); 923 err = audit_receive_msg(skb, nlh);
924 /* if err or if this message says it wants a response */ 924 /* if err or if this message says it wants a response */
925 if (err || (nlh->nlmsg_flags & NLM_F_ACK)) 925 if (err || (nlh->nlmsg_flags & NLM_F_ACK))
926 netlink_ack(skb, nlh, err); 926 netlink_ack(skb, nlh, err);
927 927
928 nlh = NLMSG_NEXT(nlh, len); 928 nlh = nlmsg_next(nlh, &len);
929 } 929 }
930} 930}
931 931
@@ -1483,7 +1483,7 @@ void audit_log_end(struct audit_buffer *ab)
1483 audit_log_lost("rate limit exceeded"); 1483 audit_log_lost("rate limit exceeded");
1484 } else { 1484 } else {
1485 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); 1485 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
1486 nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); 1486 nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN;
1487 1487
1488 if (audit_pid) { 1488 if (audit_pid) {
1489 skb_queue_tail(&audit_skb_queue, ab->skb); 1489 skb_queue_tail(&audit_skb_queue, ab->skb);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d3abce2d6455..2a9926275f80 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4678,7 +4678,7 @@ out:
4678 */ 4678 */
4679 4679
4680/* TODO: Use a proper seq_file iterator */ 4680/* TODO: Use a proper seq_file iterator */
4681static int proc_cgroup_show(struct seq_file *m, void *v) 4681int proc_cgroup_show(struct seq_file *m, void *v)
4682{ 4682{
4683 struct pid *pid; 4683 struct pid *pid;
4684 struct task_struct *tsk; 4684 struct task_struct *tsk;
@@ -4730,19 +4730,6 @@ out:
4730 return retval; 4730 return retval;
4731} 4731}
4732 4732
4733static int cgroup_open(struct inode *inode, struct file *file)
4734{
4735 struct pid *pid = PROC_I(inode)->pid;
4736 return single_open(file, proc_cgroup_show, pid);
4737}
4738
4739const struct file_operations proc_cgroup_operations = {
4740 .open = cgroup_open,
4741 .read = seq_read,
4742 .llseek = seq_lseek,
4743 .release = single_release,
4744};
4745
4746/* Display information about each subsystem and each hierarchy */ 4733/* Display information about each subsystem and each hierarchy */
4747static int proc_cgroupstats_show(struct seq_file *m, void *v) 4734static int proc_cgroupstats_show(struct seq_file *m, void *v)
4748{ 4735{
diff --git a/kernel/compat.c b/kernel/compat.c
index 19971d8c7299..0a09e481b70b 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -516,25 +516,6 @@ int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru)
516 return 0; 516 return 0;
517} 517}
518 518
519asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
520{
521 struct rusage r;
522 int ret;
523 mm_segment_t old_fs = get_fs();
524
525 set_fs(KERNEL_DS);
526 ret = sys_getrusage(who, (struct rusage __user *) &r);
527 set_fs(old_fs);
528
529 if (ret)
530 return ret;
531
532 if (put_compat_rusage(&r, ru))
533 return -EFAULT;
534
535 return 0;
536}
537
538COMPAT_SYSCALL_DEFINE4(wait4, 519COMPAT_SYSCALL_DEFINE4(wait4,
539 compat_pid_t, pid, 520 compat_pid_t, pid,
540 compat_uint_t __user *, stat_addr, 521 compat_uint_t __user *, stat_addr,
@@ -1138,71 +1119,6 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
1138} 1119}
1139#endif 1120#endif
1140 1121
1141struct compat_sysinfo {
1142 s32 uptime;
1143 u32 loads[3];
1144 u32 totalram;
1145 u32 freeram;
1146 u32 sharedram;
1147 u32 bufferram;
1148 u32 totalswap;
1149 u32 freeswap;
1150 u16 procs;
1151 u16 pad;
1152 u32 totalhigh;
1153 u32 freehigh;
1154 u32 mem_unit;
1155 char _f[20-2*sizeof(u32)-sizeof(int)];
1156};
1157
1158asmlinkage long
1159compat_sys_sysinfo(struct compat_sysinfo __user *info)
1160{
1161 struct sysinfo s;
1162
1163 do_sysinfo(&s);
1164
1165 /* Check to see if any memory value is too large for 32-bit and scale
1166 * down if needed
1167 */
1168 if ((s.totalram >> 32) || (s.totalswap >> 32)) {
1169 int bitcount = 0;
1170
1171 while (s.mem_unit < PAGE_SIZE) {
1172 s.mem_unit <<= 1;
1173 bitcount++;
1174 }
1175
1176 s.totalram >>= bitcount;
1177 s.freeram >>= bitcount;
1178 s.sharedram >>= bitcount;
1179 s.bufferram >>= bitcount;
1180 s.totalswap >>= bitcount;
1181 s.freeswap >>= bitcount;
1182 s.totalhigh >>= bitcount;
1183 s.freehigh >>= bitcount;
1184 }
1185
1186 if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
1187 __put_user (s.uptime, &info->uptime) ||
1188 __put_user (s.loads[0], &info->loads[0]) ||
1189 __put_user (s.loads[1], &info->loads[1]) ||
1190 __put_user (s.loads[2], &info->loads[2]) ||
1191 __put_user (s.totalram, &info->totalram) ||
1192 __put_user (s.freeram, &info->freeram) ||
1193 __put_user (s.sharedram, &info->sharedram) ||
1194 __put_user (s.bufferram, &info->bufferram) ||
1195 __put_user (s.totalswap, &info->totalswap) ||
1196 __put_user (s.freeswap, &info->freeswap) ||
1197 __put_user (s.procs, &info->procs) ||
1198 __put_user (s.totalhigh, &info->totalhigh) ||
1199 __put_user (s.freehigh, &info->freehigh) ||
1200 __put_user (s.mem_unit, &info->mem_unit))
1201 return -EFAULT;
1202
1203 return 0;
1204}
1205
1206COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, 1122COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
1207 compat_pid_t, pid, 1123 compat_pid_t, pid,
1208 struct compat_timespec __user *, interval) 1124 struct compat_timespec __user *, interval)
diff --git a/kernel/configs.c b/kernel/configs.c
index 42e8fa075eed..c18b1f1ae515 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -79,7 +79,7 @@ static int __init ikconfig_init(void)
79 if (!entry) 79 if (!entry)
80 return -ENOMEM; 80 return -ENOMEM;
81 81
82 entry->size = kernel_config_data_size; 82 proc_set_size(entry, kernel_config_data_size);
83 83
84 return 0; 84 return 0;
85} 85}
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index 168cf407a254..8b86c0c68edf 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -76,7 +76,16 @@ static void cpu_idle_loop(void)
76 local_irq_disable(); 76 local_irq_disable();
77 arch_cpu_idle_enter(); 77 arch_cpu_idle_enter();
78 78
79 if (cpu_idle_force_poll) { 79 /*
80 * In poll mode we reenable interrupts and spin.
81 *
82 * Also if we detected in the wakeup from idle
83 * path that the tick broadcast device expired
84 * for us, we don't want to go deep idle as we
85 * know that the IPI is going to arrive right
86 * away
87 */
88 if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
80 cpu_idle_poll(); 89 cpu_idle_poll();
81 } else { 90 } else {
82 current_clr_polling(); 91 current_clr_polling();
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 12331120767c..64b3f791bbe5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2609,7 +2609,7 @@ void __cpuset_memory_pressure_bump(void)
2609 * and we take cpuset_mutex, keeping cpuset_attach() from changing it 2609 * and we take cpuset_mutex, keeping cpuset_attach() from changing it
2610 * anyway. 2610 * anyway.
2611 */ 2611 */
2612static int proc_cpuset_show(struct seq_file *m, void *unused_v) 2612int proc_cpuset_show(struct seq_file *m, void *unused_v)
2613{ 2613{
2614 struct pid *pid; 2614 struct pid *pid;
2615 struct task_struct *tsk; 2615 struct task_struct *tsk;
@@ -2643,19 +2643,6 @@ out_free:
2643out: 2643out:
2644 return retval; 2644 return retval;
2645} 2645}
2646
2647static int cpuset_open(struct inode *inode, struct file *file)
2648{
2649 struct pid *pid = PROC_I(inode)->pid;
2650 return single_open(file, proc_cpuset_show, pid);
2651}
2652
2653const struct file_operations proc_cpuset_operations = {
2654 .open = cpuset_open,
2655 .read = seq_read,
2656 .llseek = seq_lseek,
2657 .release = single_release,
2658};
2659#endif /* CONFIG_PROC_PID_CPUSET */ 2646#endif /* CONFIG_PROC_PID_CPUSET */
2660 2647
2661/* Display task mems_allowed in /proc/<pid>/status file. */ 2648/* Display task mems_allowed in /proc/<pid>/status file. */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index c26278fd4851..0506d447aed2 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -775,7 +775,7 @@ static void sysrq_handle_dbg(int key)
775 775
776static struct sysrq_key_op sysrq_dbg_op = { 776static struct sysrq_key_op sysrq_dbg_op = {
777 .handler = sysrq_handle_dbg, 777 .handler = sysrq_handle_dbg,
778 .help_msg = "debug(G)", 778 .help_msg = "debug(g)",
779 .action_msg = "DEBUG", 779 .action_msg = "DEBUG",
780}; 780};
781#endif 781#endif
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 97fddb09762b..cd55144270b5 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -326,11 +326,16 @@ void rb_free(struct ring_buffer *rb)
326} 326}
327 327
328#else 328#else
329static int data_page_nr(struct ring_buffer *rb)
330{
331 return rb->nr_pages << page_order(rb);
332}
329 333
330struct page * 334struct page *
331perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) 335perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
332{ 336{
333 if (pgoff > (1UL << page_order(rb))) 337 /* The '>' counts in the user page. */
338 if (pgoff > data_page_nr(rb))
334 return NULL; 339 return NULL;
335 340
336 return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); 341 return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -350,10 +355,11 @@ static void rb_free_work(struct work_struct *work)
350 int i, nr; 355 int i, nr;
351 356
352 rb = container_of(work, struct ring_buffer, work); 357 rb = container_of(work, struct ring_buffer, work);
353 nr = 1 << page_order(rb); 358 nr = data_page_nr(rb);
354 359
355 base = rb->user_page; 360 base = rb->user_page;
356 for (i = 0; i < nr + 1; i++) 361 /* The '<=' counts in the user page. */
362 for (i = 0; i <= nr; i++)
357 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 363 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
358 364
359 vfree(base); 365 vfree(base);
@@ -387,7 +393,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
387 rb->user_page = all_buf; 393 rb->user_page = all_buf;
388 rb->data_pages[0] = all_buf + PAGE_SIZE; 394 rb->data_pages[0] = all_buf + PAGE_SIZE;
389 rb->page_order = ilog2(nr_pages); 395 rb->page_order = ilog2(nr_pages);
390 rb->nr_pages = 1; 396 rb->nr_pages = !!nr_pages;
391 397
392 ring_buffer_init(rb, watermark, flags); 398 ring_buffer_init(rb, watermark, flags);
393 399
diff --git a/kernel/exit.c b/kernel/exit.c
index 60bc027c61c3..af2eb3cbd499 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -847,7 +847,7 @@ void do_exit(long code)
847 exit_io_context(tsk); 847 exit_io_context(tsk);
848 848
849 if (tsk->splice_pipe) 849 if (tsk->splice_pipe)
850 __free_pipe_info(tsk->splice_pipe); 850 free_pipe_info(tsk->splice_pipe);
851 851
852 if (tsk->task_frag.page) 852 if (tsk->task_frag.page)
853 put_page(tsk->task_frag.page); 853 put_page(tsk->task_frag.page);
@@ -1629,9 +1629,6 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1629 } 1629 }
1630 1630
1631 put_pid(pid); 1631 put_pid(pid);
1632
1633 /* avoid REGPARM breakage on x86: */
1634 asmlinkage_protect(5, ret, which, upid, infop, options, ru);
1635 return ret; 1632 return ret;
1636} 1633}
1637 1634
@@ -1669,8 +1666,6 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1669 ret = do_wait(&wo); 1666 ret = do_wait(&wo);
1670 put_pid(pid); 1667 put_pid(pid);
1671 1668
1672 /* avoid REGPARM breakage on x86: */
1673 asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
1674 return ret; 1669 return ret;
1675} 1670}
1676 1671
diff --git a/kernel/extable.c b/kernel/extable.c
index fe35a634bf76..67460b93b1a1 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -41,10 +41,10 @@ u32 __initdata main_extable_sort_needed = 1;
41/* Sort the kernel's built-in exception table */ 41/* Sort the kernel's built-in exception table */
42void __init sort_main_extable(void) 42void __init sort_main_extable(void)
43{ 43{
44 if (main_extable_sort_needed) 44 if (main_extable_sort_needed) {
45 pr_notice("Sorting __ex_table...\n");
45 sort_extable(__start___ex_table, __stop___ex_table); 46 sort_extable(__start___ex_table, __stop___ex_table);
46 else 47 }
47 pr_notice("__ex_table already sorted, skipping sort\n");
48} 48}
49 49
50/* Given an address, look for it in the exception tables. */ 50/* Given an address, look for it in the exception tables. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 339f60dfd62b..7d40687b1434 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1677,10 +1677,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1677 int, tls_val) 1677 int, tls_val)
1678#endif 1678#endif
1679{ 1679{
1680 long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); 1680 return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
1681 asmlinkage_protect(5, ret, clone_flags, newsp,
1682 parent_tidptr, child_tidptr, tls_val);
1683 return ret;
1684} 1681}
1685#endif 1682#endif
1686 1683
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index abfd89d687ac..fd4b13b131f8 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -84,6 +84,12 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
84 .get_time = &ktime_get_boottime, 84 .get_time = &ktime_get_boottime,
85 .resolution = KTIME_LOW_RES, 85 .resolution = KTIME_LOW_RES,
86 }, 86 },
87 {
88 .index = HRTIMER_BASE_TAI,
89 .clockid = CLOCK_TAI,
90 .get_time = &ktime_get_clocktai,
91 .resolution = KTIME_LOW_RES,
92 },
87 } 93 }
88}; 94};
89 95
@@ -91,6 +97,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
91 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, 97 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
92 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, 98 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
93 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, 99 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
100 [CLOCK_TAI] = HRTIMER_BASE_TAI,
94}; 101};
95 102
96static inline int hrtimer_clockid_to_base(clockid_t clock_id) 103static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -107,8 +114,10 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
107{ 114{
108 ktime_t xtim, mono, boot; 115 ktime_t xtim, mono, boot;
109 struct timespec xts, tom, slp; 116 struct timespec xts, tom, slp;
117 s32 tai_offset;
110 118
111 get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp); 119 get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
120 tai_offset = timekeeping_get_tai_offset();
112 121
113 xtim = timespec_to_ktime(xts); 122 xtim = timespec_to_ktime(xts);
114 mono = ktime_add(xtim, timespec_to_ktime(tom)); 123 mono = ktime_add(xtim, timespec_to_ktime(tom));
@@ -116,6 +125,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
116 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; 125 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
117 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; 126 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
118 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; 127 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
128 base->clock_base[HRTIMER_BASE_TAI].softirq_time =
129 ktime_add(xtim, ktime_set(tai_offset, 0));
119} 130}
120 131
121/* 132/*
@@ -276,6 +287,10 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
276 } else { 287 } else {
277 unsigned long rem = do_div(nsec, NSEC_PER_SEC); 288 unsigned long rem = do_div(nsec, NSEC_PER_SEC);
278 289
290 /* Make sure nsec fits into long */
291 if (unlikely(nsec > KTIME_SEC_MAX))
292 return (ktime_t){ .tv64 = KTIME_MAX };
293
279 tmp = ktime_set((long)nsec, rem); 294 tmp = ktime_set((long)nsec, rem);
280 } 295 }
281 296
@@ -652,8 +667,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
652{ 667{
653 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 668 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
654 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 669 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
670 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
655 671
656 return ktime_get_update_offsets(offs_real, offs_boot); 672 return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
657} 673}
658 674
659/* 675/*
@@ -1011,7 +1027,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1011 * @timer: the timer to be added 1027 * @timer: the timer to be added
1012 * @tim: expiry time 1028 * @tim: expiry time
1013 * @delta_ns: "slack" range for the timer 1029 * @delta_ns: "slack" range for the timer
1014 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 1030 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1031 * relative (HRTIMER_MODE_REL)
1015 * 1032 *
1016 * Returns: 1033 * Returns:
1017 * 0 on success 1034 * 0 on success
@@ -1028,7 +1045,8 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1028 * hrtimer_start - (re)start an hrtimer on the current CPU 1045 * hrtimer_start - (re)start an hrtimer on the current CPU
1029 * @timer: the timer to be added 1046 * @timer: the timer to be added
1030 * @tim: expiry time 1047 * @tim: expiry time
1031 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 1048 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1049 * relative (HRTIMER_MODE_REL)
1032 * 1050 *
1033 * Returns: 1051 * Returns:
1034 * 0 on success 1052 * 0 on success
@@ -1310,6 +1328,8 @@ retry:
1310 1328
1311 expires = ktime_sub(hrtimer_get_expires(timer), 1329 expires = ktime_sub(hrtimer_get_expires(timer),
1312 base->offset); 1330 base->offset);
1331 if (expires.tv64 < 0)
1332 expires.tv64 = KTIME_MAX;
1313 if (expires.tv64 < expires_next.tv64) 1333 if (expires.tv64 < expires_next.tv64)
1314 expires_next = expires; 1334 expires_next = expires;
1315 break; 1335 break;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 397db02209ed..19ed5c425c3b 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -76,7 +76,7 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
76static ssize_t write_irq_affinity(int type, struct file *file, 76static ssize_t write_irq_affinity(int type, struct file *file,
77 const char __user *buffer, size_t count, loff_t *pos) 77 const char __user *buffer, size_t count, loff_t *pos)
78{ 78{
79 unsigned int irq = (int)(long)PDE(file_inode(file))->data; 79 unsigned int irq = (int)(long)PDE_DATA(file_inode(file));
80 cpumask_var_t new_value; 80 cpumask_var_t new_value;
81 int err; 81 int err;
82 82
@@ -131,17 +131,17 @@ static ssize_t irq_affinity_list_proc_write(struct file *file,
131 131
132static int irq_affinity_proc_open(struct inode *inode, struct file *file) 132static int irq_affinity_proc_open(struct inode *inode, struct file *file)
133{ 133{
134 return single_open(file, irq_affinity_proc_show, PDE(inode)->data); 134 return single_open(file, irq_affinity_proc_show, PDE_DATA(inode));
135} 135}
136 136
137static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) 137static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
138{ 138{
139 return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data); 139 return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode));
140} 140}
141 141
142static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) 142static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
143{ 143{
144 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); 144 return single_open(file, irq_affinity_hint_proc_show, PDE_DATA(inode));
145} 145}
146 146
147static const struct file_operations irq_affinity_proc_fops = { 147static const struct file_operations irq_affinity_proc_fops = {
@@ -212,7 +212,7 @@ out:
212 212
213static int default_affinity_open(struct inode *inode, struct file *file) 213static int default_affinity_open(struct inode *inode, struct file *file)
214{ 214{
215 return single_open(file, default_affinity_show, PDE(inode)->data); 215 return single_open(file, default_affinity_show, PDE_DATA(inode));
216} 216}
217 217
218static const struct file_operations default_affinity_proc_fops = { 218static const struct file_operations default_affinity_proc_fops = {
@@ -233,7 +233,7 @@ static int irq_node_proc_show(struct seq_file *m, void *v)
233 233
234static int irq_node_proc_open(struct inode *inode, struct file *file) 234static int irq_node_proc_open(struct inode *inode, struct file *file)
235{ 235{
236 return single_open(file, irq_node_proc_show, PDE(inode)->data); 236 return single_open(file, irq_node_proc_show, PDE_DATA(inode));
237} 237}
238 238
239static const struct file_operations irq_node_proc_fops = { 239static const struct file_operations irq_node_proc_fops = {
@@ -256,7 +256,7 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v)
256 256
257static int irq_spurious_proc_open(struct inode *inode, struct file *file) 257static int irq_spurious_proc_open(struct inode *inode, struct file *file)
258{ 258{
259 return single_open(file, irq_spurious_proc_show, PDE(inode)->data); 259 return single_open(file, irq_spurious_proc_show, PDE_DATA(inode));
260} 260}
261 261
262static const struct file_operations irq_spurious_proc_fops = { 262static const struct file_operations irq_spurious_proc_fops = {
@@ -366,11 +366,7 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
366 366
367void unregister_handler_proc(unsigned int irq, struct irqaction *action) 367void unregister_handler_proc(unsigned int irq, struct irqaction *action)
368{ 368{
369 if (action->dir) { 369 proc_remove(action->dir);
370 struct irq_desc *desc = irq_to_desc(irq);
371
372 remove_proc_entry(action->dir->name, desc->dir);
373 }
374} 370}
375 371
376static void register_default_affinity_proc(void) 372static void register_default_affinity_proc(void)
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 2169feeba529..3127ad52cdb2 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -84,9 +84,11 @@ static int is_ksym_addr(unsigned long addr)
84 84
85/* 85/*
86 * Expand a compressed symbol data into the resulting uncompressed string, 86 * Expand a compressed symbol data into the resulting uncompressed string,
87 * if uncompressed string is too long (>= maxlen), it will be truncated,
87 * given the offset to where the symbol is in the compressed stream. 88 * given the offset to where the symbol is in the compressed stream.
88 */ 89 */
89static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) 90static unsigned int kallsyms_expand_symbol(unsigned int off,
91 char *result, size_t maxlen)
90{ 92{
91 int len, skipped_first = 0; 93 int len, skipped_first = 0;
92 const u8 *tptr, *data; 94 const u8 *tptr, *data;
@@ -113,15 +115,20 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
113 115
114 while (*tptr) { 116 while (*tptr) {
115 if (skipped_first) { 117 if (skipped_first) {
118 if (maxlen <= 1)
119 goto tail;
116 *result = *tptr; 120 *result = *tptr;
117 result++; 121 result++;
122 maxlen--;
118 } else 123 } else
119 skipped_first = 1; 124 skipped_first = 1;
120 tptr++; 125 tptr++;
121 } 126 }
122 } 127 }
123 128
124 *result = '\0'; 129tail:
130 if (maxlen)
131 *result = '\0';
125 132
126 /* Return to offset to the next symbol. */ 133 /* Return to offset to the next symbol. */
127 return off; 134 return off;
@@ -176,7 +183,7 @@ unsigned long kallsyms_lookup_name(const char *name)
176 unsigned int off; 183 unsigned int off;
177 184
178 for (i = 0, off = 0; i < kallsyms_num_syms; i++) { 185 for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
179 off = kallsyms_expand_symbol(off, namebuf); 186 off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
180 187
181 if (strcmp(namebuf, name) == 0) 188 if (strcmp(namebuf, name) == 0)
182 return kallsyms_addresses[i]; 189 return kallsyms_addresses[i];
@@ -195,7 +202,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
195 int ret; 202 int ret;
196 203
197 for (i = 0, off = 0; i < kallsyms_num_syms; i++) { 204 for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
198 off = kallsyms_expand_symbol(off, namebuf); 205 off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
199 ret = fn(data, namebuf, NULL, kallsyms_addresses[i]); 206 ret = fn(data, namebuf, NULL, kallsyms_addresses[i]);
200 if (ret != 0) 207 if (ret != 0)
201 return ret; 208 return ret;
@@ -294,7 +301,8 @@ const char *kallsyms_lookup(unsigned long addr,
294 301
295 pos = get_symbol_pos(addr, symbolsize, offset); 302 pos = get_symbol_pos(addr, symbolsize, offset);
296 /* Grab name */ 303 /* Grab name */
297 kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); 304 kallsyms_expand_symbol(get_symbol_offset(pos),
305 namebuf, KSYM_NAME_LEN);
298 if (modname) 306 if (modname)
299 *modname = NULL; 307 *modname = NULL;
300 return namebuf; 308 return namebuf;
@@ -315,7 +323,8 @@ int lookup_symbol_name(unsigned long addr, char *symname)
315 323
316 pos = get_symbol_pos(addr, NULL, NULL); 324 pos = get_symbol_pos(addr, NULL, NULL);
317 /* Grab name */ 325 /* Grab name */
318 kallsyms_expand_symbol(get_symbol_offset(pos), symname); 326 kallsyms_expand_symbol(get_symbol_offset(pos),
327 symname, KSYM_NAME_LEN);
319 return 0; 328 return 0;
320 } 329 }
321 /* See if it's in a module. */ 330 /* See if it's in a module. */
@@ -333,7 +342,8 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
333 342
334 pos = get_symbol_pos(addr, size, offset); 343 pos = get_symbol_pos(addr, size, offset);
335 /* Grab name */ 344 /* Grab name */
336 kallsyms_expand_symbol(get_symbol_offset(pos), name); 345 kallsyms_expand_symbol(get_symbol_offset(pos),
346 name, KSYM_NAME_LEN);
337 modname[0] = '\0'; 347 modname[0] = '\0';
338 return 0; 348 return 0;
339 } 349 }
@@ -463,7 +473,7 @@ static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
463 473
464 iter->type = kallsyms_get_symbol_type(off); 474 iter->type = kallsyms_get_symbol_type(off);
465 475
466 off = kallsyms_expand_symbol(off, iter->name); 476 off = kallsyms_expand_symbol(off, iter->name, ARRAY_SIZE(iter->name));
467 477
468 return off - iter->nameoff; 478 return off - iter->nameoff;
469} 479}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index b574920cbd4b..59f7b55ba745 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -786,7 +786,7 @@ static int kimage_load_normal_segment(struct kimage *image,
786 struct kexec_segment *segment) 786 struct kexec_segment *segment)
787{ 787{
788 unsigned long maddr; 788 unsigned long maddr;
789 unsigned long ubytes, mbytes; 789 size_t ubytes, mbytes;
790 int result; 790 int result;
791 unsigned char __user *buf; 791 unsigned char __user *buf;
792 792
@@ -819,13 +819,9 @@ static int kimage_load_normal_segment(struct kimage *image,
819 /* Start with a clear page */ 819 /* Start with a clear page */
820 clear_page(ptr); 820 clear_page(ptr);
821 ptr += maddr & ~PAGE_MASK; 821 ptr += maddr & ~PAGE_MASK;
822 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); 822 mchunk = min_t(size_t, mbytes,
823 if (mchunk > mbytes) 823 PAGE_SIZE - (maddr & ~PAGE_MASK));
824 mchunk = mbytes; 824 uchunk = min(ubytes, mchunk);
825
826 uchunk = mchunk;
827 if (uchunk > ubytes)
828 uchunk = ubytes;
829 825
830 result = copy_from_user(ptr, buf, uchunk); 826 result = copy_from_user(ptr, buf, uchunk);
831 kunmap(page); 827 kunmap(page);
@@ -850,7 +846,7 @@ static int kimage_load_crash_segment(struct kimage *image,
850 * We do things a page at a time for the sake of kmap. 846 * We do things a page at a time for the sake of kmap.
851 */ 847 */
852 unsigned long maddr; 848 unsigned long maddr;
853 unsigned long ubytes, mbytes; 849 size_t ubytes, mbytes;
854 int result; 850 int result;
855 unsigned char __user *buf; 851 unsigned char __user *buf;
856 852
@@ -871,13 +867,10 @@ static int kimage_load_crash_segment(struct kimage *image,
871 } 867 }
872 ptr = kmap(page); 868 ptr = kmap(page);
873 ptr += maddr & ~PAGE_MASK; 869 ptr += maddr & ~PAGE_MASK;
874 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); 870 mchunk = min_t(size_t, mbytes,
875 if (mchunk > mbytes) 871 PAGE_SIZE - (maddr & ~PAGE_MASK));
876 mchunk = mbytes; 872 uchunk = min(ubytes, mchunk);
877 873 if (mchunk > uchunk) {
878 uchunk = mchunk;
879 if (uchunk > ubytes) {
880 uchunk = ubytes;
881 /* Zero the trailing part of the page */ 874 /* Zero the trailing part of the page */
882 memset(ptr + uchunk, 0, mchunk - uchunk); 875 memset(ptr + uchunk, 0, mchunk - uchunk);
883 } 876 }
@@ -1540,14 +1533,13 @@ void vmcoreinfo_append_str(const char *fmt, ...)
1540{ 1533{
1541 va_list args; 1534 va_list args;
1542 char buf[0x50]; 1535 char buf[0x50];
1543 int r; 1536 size_t r;
1544 1537
1545 va_start(args, fmt); 1538 va_start(args, fmt);
1546 r = vsnprintf(buf, sizeof(buf), fmt, args); 1539 r = vsnprintf(buf, sizeof(buf), fmt, args);
1547 va_end(args); 1540 va_end(args);
1548 1541
1549 if (r + vmcoreinfo_size > vmcoreinfo_max_size) 1542 r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1550 r = vmcoreinfo_max_size - vmcoreinfo_size;
1551 1543
1552 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1544 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1553 1545
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 56dd34976d7b..1296e72e4161 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -77,6 +77,7 @@ static void free_modprobe_argv(struct subprocess_info *info)
77 77
78static int call_modprobe(char *module_name, int wait) 78static int call_modprobe(char *module_name, int wait)
79{ 79{
80 struct subprocess_info *info;
80 static char *envp[] = { 81 static char *envp[] = {
81 "HOME=/", 82 "HOME=/",
82 "TERM=linux", 83 "TERM=linux",
@@ -98,8 +99,15 @@ static int call_modprobe(char *module_name, int wait)
98 argv[3] = module_name; /* check free_modprobe_argv() */ 99 argv[3] = module_name; /* check free_modprobe_argv() */
99 argv[4] = NULL; 100 argv[4] = NULL;
100 101
101 return call_usermodehelper_fns(modprobe_path, argv, envp, 102 info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
102 wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL); 103 NULL, free_modprobe_argv, NULL);
104 if (!info)
105 goto free_module_name;
106
107 return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
108
109free_module_name:
110 kfree(module_name);
103free_argv: 111free_argv:
104 kfree(argv); 112 kfree(argv);
105out: 113out:
@@ -502,14 +510,28 @@ static void helper_unlock(void)
502 * @argv: arg vector for process 510 * @argv: arg vector for process
503 * @envp: environment for process 511 * @envp: environment for process
504 * @gfp_mask: gfp mask for memory allocation 512 * @gfp_mask: gfp mask for memory allocation
513 * @cleanup: a cleanup function
514 * @init: an init function
515 * @data: arbitrary context sensitive data
505 * 516 *
506 * Returns either %NULL on allocation failure, or a subprocess_info 517 * Returns either %NULL on allocation failure, or a subprocess_info
507 * structure. This should be passed to call_usermodehelper_exec to 518 * structure. This should be passed to call_usermodehelper_exec to
508 * exec the process and free the structure. 519 * exec the process and free the structure.
520 *
521 * The init function is used to customize the helper process prior to
522 * exec. A non-zero return code causes the process to error out, exit,
523 * and return the failure to the calling process
524 *
525 * The cleanup function is just before ethe subprocess_info is about to
526 * be freed. This can be used for freeing the argv and envp. The
527 * Function must be runnable in either a process context or the
528 * context in which call_usermodehelper_exec is called.
509 */ 529 */
510static
511struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, 530struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
512 char **envp, gfp_t gfp_mask) 531 char **envp, gfp_t gfp_mask,
532 int (*init)(struct subprocess_info *info, struct cred *new),
533 void (*cleanup)(struct subprocess_info *info),
534 void *data)
513{ 535{
514 struct subprocess_info *sub_info; 536 struct subprocess_info *sub_info;
515 sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); 537 sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
@@ -520,50 +542,27 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
520 sub_info->path = path; 542 sub_info->path = path;
521 sub_info->argv = argv; 543 sub_info->argv = argv;
522 sub_info->envp = envp; 544 sub_info->envp = envp;
545
546 sub_info->cleanup = cleanup;
547 sub_info->init = init;
548 sub_info->data = data;
523 out: 549 out:
524 return sub_info; 550 return sub_info;
525} 551}
526 552EXPORT_SYMBOL(call_usermodehelper_setup);
527/**
528 * call_usermodehelper_setfns - set a cleanup/init function
529 * @info: a subprocess_info returned by call_usermodehelper_setup
530 * @cleanup: a cleanup function
531 * @init: an init function
532 * @data: arbitrary context sensitive data
533 *
534 * The init function is used to customize the helper process prior to
535 * exec. A non-zero return code causes the process to error out, exit,
536 * and return the failure to the calling process
537 *
538 * The cleanup function is just before ethe subprocess_info is about to
539 * be freed. This can be used for freeing the argv and envp. The
540 * Function must be runnable in either a process context or the
541 * context in which call_usermodehelper_exec is called.
542 */
543static
544void call_usermodehelper_setfns(struct subprocess_info *info,
545 int (*init)(struct subprocess_info *info, struct cred *new),
546 void (*cleanup)(struct subprocess_info *info),
547 void *data)
548{
549 info->cleanup = cleanup;
550 info->init = init;
551 info->data = data;
552}
553 553
554/** 554/**
555 * call_usermodehelper_exec - start a usermode application 555 * call_usermodehelper_exec - start a usermode application
556 * @sub_info: information about the subprocessa 556 * @sub_info: information about the subprocessa
557 * @wait: wait for the application to finish and return status. 557 * @wait: wait for the application to finish and return status.
558 * when -1 don't wait at all, but you get no useful error back when 558 * when UMH_NO_WAIT don't wait at all, but you get no useful error back
559 * the program couldn't be exec'ed. This makes it safe to call 559 * when the program couldn't be exec'ed. This makes it safe to call
560 * from interrupt context. 560 * from interrupt context.
561 * 561 *
562 * Runs a user-space application. The application is started 562 * Runs a user-space application. The application is started
563 * asynchronously if wait is not set, and runs as a child of keventd. 563 * asynchronously if wait is not set, and runs as a child of keventd.
564 * (ie. it runs with full root capabilities). 564 * (ie. it runs with full root capabilities).
565 */ 565 */
566static
567int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) 566int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
568{ 567{
569 DECLARE_COMPLETION_ONSTACK(done); 568 DECLARE_COMPLETION_ONSTACK(done);
@@ -615,31 +614,34 @@ unlock:
615 helper_unlock(); 614 helper_unlock();
616 return retval; 615 return retval;
617} 616}
617EXPORT_SYMBOL(call_usermodehelper_exec);
618 618
619/* 619/**
620 * call_usermodehelper_fns() will not run the caller-provided cleanup function 620 * call_usermodehelper() - prepare and start a usermode application
621 * if a memory allocation failure is experienced. So the caller might need to 621 * @path: path to usermode executable
622 * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform 622 * @argv: arg vector for process
623 * the necessaary cleanup within the caller. 623 * @envp: environment for process
624 * @wait: wait for the application to finish and return status.
625 * when UMH_NO_WAIT don't wait at all, but you get no useful error back
626 * when the program couldn't be exec'ed. This makes it safe to call
627 * from interrupt context.
628 *
629 * This function is the equivalent to use call_usermodehelper_setup() and
630 * call_usermodehelper_exec().
624 */ 631 */
625int call_usermodehelper_fns( 632int call_usermodehelper(char *path, char **argv, char **envp, int wait)
626 char *path, char **argv, char **envp, int wait,
627 int (*init)(struct subprocess_info *info, struct cred *new),
628 void (*cleanup)(struct subprocess_info *), void *data)
629{ 633{
630 struct subprocess_info *info; 634 struct subprocess_info *info;
631 gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; 635 gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
632 636
633 info = call_usermodehelper_setup(path, argv, envp, gfp_mask); 637 info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
634 638 NULL, NULL, NULL);
635 if (info == NULL) 639 if (info == NULL)
636 return -ENOMEM; 640 return -ENOMEM;
637 641
638 call_usermodehelper_setfns(info, init, cleanup, data);
639
640 return call_usermodehelper_exec(info, wait); 642 return call_usermodehelper_exec(info, wait);
641} 643}
642EXPORT_SYMBOL(call_usermodehelper_fns); 644EXPORT_SYMBOL(call_usermodehelper);
643 645
644static int proc_cap_handler(struct ctl_table *table, int write, 646static int proc_cap_handler(struct ctl_table *table, int write,
645 void __user *buffer, size_t *lenp, loff_t *ppos) 647 void __user *buffer, size_t *lenp, loff_t *ppos)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 16d8ddd268b1..760e86df8c20 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -17,6 +17,7 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/freezer.h> 18#include <linux/freezer.h>
19#include <linux/ptrace.h> 19#include <linux/ptrace.h>
20#include <linux/uaccess.h>
20#include <trace/events/sched.h> 21#include <trace/events/sched.h>
21 22
22static DEFINE_SPINLOCK(kthread_create_lock); 23static DEFINE_SPINLOCK(kthread_create_lock);
@@ -135,6 +136,24 @@ void *kthread_data(struct task_struct *task)
135 return to_kthread(task)->data; 136 return to_kthread(task)->data;
136} 137}
137 138
139/**
140 * probe_kthread_data - speculative version of kthread_data()
141 * @task: possible kthread task in question
142 *
143 * @task could be a kthread task. Return the data value specified when it
144 * was created if accessible. If @task isn't a kthread task or its data is
145 * inaccessible for any reason, %NULL is returned. This function requires
146 * that @task itself is safe to dereference.
147 */
148void *probe_kthread_data(struct task_struct *task)
149{
150 struct kthread *kthread = to_kthread(task);
151 void *data = NULL;
152
153 probe_kernel_read(&data, &kthread->data, sizeof(data));
154 return data;
155}
156
138static void __kthread_parkme(struct kthread *self) 157static void __kthread_parkme(struct kthread *self)
139{ 158{
140 __set_current_state(TASK_PARKED); 159 __set_current_state(TASK_PARKED);
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S
index 246b4c6e6135..4a9a86d12c8b 100644
--- a/kernel/modsign_certificate.S
+++ b/kernel/modsign_certificate.S
@@ -1,15 +1,8 @@
1/* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */ 1#include <linux/export.h>
2#ifndef SYMBOL_PREFIX
3#define ASM_SYMBOL(sym) sym
4#else
5#define PASTE2(x,y) x##y
6#define PASTE(x,y) PASTE2(x,y)
7#define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
8#endif
9 2
10#define GLOBAL(name) \ 3#define GLOBAL(name) \
11 .globl ASM_SYMBOL(name); \ 4 .globl VMLINUX_SYMBOL(name); \
12 ASM_SYMBOL(name): 5 VMLINUX_SYMBOL(name):
13 6
14 .section ".init.data","aw" 7 .section ".init.data","aw"
15 8
diff --git a/kernel/module.c b/kernel/module.c
index 0925c9a71975..b049939177f6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1209,10 +1209,11 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
1209 1209
1210 /* Since this should be found in kernel (which can't be removed), 1210 /* Since this should be found in kernel (which can't be removed),
1211 * no locking is necessary. */ 1211 * no locking is necessary. */
1212 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, 1212 if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
1213 &crc, true, false)) 1213 &crc, true, false))
1214 BUG(); 1214 BUG();
1215 return check_version(sechdrs, versindex, "module_layout", mod, crc, 1215 return check_version(sechdrs, versindex,
1216 VMLINUX_SYMBOL_STR(module_layout), mod, crc,
1216 NULL); 1217 NULL);
1217} 1218}
1218 1219
@@ -1861,12 +1862,12 @@ static void free_module(struct module *mod)
1861{ 1862{
1862 trace_module_free(mod); 1863 trace_module_free(mod);
1863 1864
1864 /* Delete from various lists */
1865 mutex_lock(&module_mutex);
1866 stop_machine(__unlink_module, mod, NULL);
1867 mutex_unlock(&module_mutex);
1868 mod_sysfs_teardown(mod); 1865 mod_sysfs_teardown(mod);
1869 1866
1867 /* We leave it in list to prevent duplicate loads, but make sure
1868 * that noone uses it while it's being deconstructed. */
1869 mod->state = MODULE_STATE_UNFORMED;
1870
1870 /* Remove dynamic debug info */ 1871 /* Remove dynamic debug info */
1871 ddebug_remove_module(mod->name); 1872 ddebug_remove_module(mod->name);
1872 1873
@@ -1879,6 +1880,11 @@ static void free_module(struct module *mod)
1879 /* Free any allocated parameters. */ 1880 /* Free any allocated parameters. */
1880 destroy_params(mod->kp, mod->num_kp); 1881 destroy_params(mod->kp, mod->num_kp);
1881 1882
1883 /* Now we can delete it from the lists */
1884 mutex_lock(&module_mutex);
1885 stop_machine(__unlink_module, mod, NULL);
1886 mutex_unlock(&module_mutex);
1887
1882 /* This may be NULL, but that's OK */ 1888 /* This may be NULL, but that's OK */
1883 unset_module_init_ro_nx(mod); 1889 unset_module_init_ro_nx(mod);
1884 module_free(mod, mod->module_init); 1890 module_free(mod, mod->module_init);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index afc0456f227a..364ceab15f0c 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,7 +22,7 @@
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <net/net_namespace.h> 23#include <net/net_namespace.h>
24#include <linux/ipc_namespace.h> 24#include <linux/ipc_namespace.h>
25#include <linux/proc_fs.h> 25#include <linux/proc_ns.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/syscalls.h> 27#include <linux/syscalls.h>
28 28
@@ -241,7 +241,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
241 const struct proc_ns_operations *ops; 241 const struct proc_ns_operations *ops;
242 struct task_struct *tsk = current; 242 struct task_struct *tsk = current;
243 struct nsproxy *new_nsproxy; 243 struct nsproxy *new_nsproxy;
244 struct proc_inode *ei; 244 struct proc_ns *ei;
245 struct file *file; 245 struct file *file;
246 int err; 246 int err;
247 247
@@ -250,7 +250,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
250 return PTR_ERR(file); 250 return PTR_ERR(file);
251 251
252 err = -EINVAL; 252 err = -EINVAL;
253 ei = PROC_I(file_inode(file)); 253 ei = get_proc_ns(file_inode(file));
254 ops = ei->ns_ops; 254 ops = ei->ns_ops;
255 if (nstype && (ops->type != nstype)) 255 if (nstype && (ops->type != nstype))
256 goto out; 256 goto out;
diff --git a/kernel/panic.c b/kernel/panic.c
index 7c57cc9eee2c..167ec097ce8b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -22,7 +22,6 @@
22#include <linux/sysrq.h> 22#include <linux/sysrq.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/nmi.h> 24#include <linux/nmi.h>
25#include <linux/dmi.h>
26 25
27#define PANIC_TIMER_STEP 100 26#define PANIC_TIMER_STEP 100
28#define PANIC_BLINK_SPD 18 27#define PANIC_BLINK_SPD 18
@@ -400,13 +399,8 @@ struct slowpath_args {
400static void warn_slowpath_common(const char *file, int line, void *caller, 399static void warn_slowpath_common(const char *file, int line, void *caller,
401 unsigned taint, struct slowpath_args *args) 400 unsigned taint, struct slowpath_args *args)
402{ 401{
403 const char *board;
404
405 printk(KERN_WARNING "------------[ cut here ]------------\n"); 402 printk(KERN_WARNING "------------[ cut here ]------------\n");
406 printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); 403 printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller);
407 board = dmi_get_system_info(DMI_PRODUCT_NAME);
408 if (board)
409 printk(KERN_WARNING "Hardware name: %s\n", board);
410 404
411 if (args) 405 if (args)
412 vprintk(args->fmt, args->args); 406 vprintk(args->fmt, args->args);
diff --git a/kernel/pid.c b/kernel/pid.c
index 047dc6264638..0db3e791a06d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
36#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
37#include <linux/init_task.h> 37#include <linux/init_task.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/proc_ns.h>
39#include <linux/proc_fs.h> 40#include <linux/proc_fs.h>
40 41
41#define pid_hashfn(nr, ns) \ 42#define pid_hashfn(nr, ns) \
@@ -51,9 +52,6 @@ int pid_max = PID_MAX_DEFAULT;
51int pid_max_min = RESERVED_PIDS + 1; 52int pid_max_min = RESERVED_PIDS + 1;
52int pid_max_max = PID_MAX_LIMIT; 53int pid_max_max = PID_MAX_LIMIT;
53 54
54#define BITS_PER_PAGE (PAGE_SIZE*8)
55#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
56
57static inline int mk_pid(struct pid_namespace *pid_ns, 55static inline int mk_pid(struct pid_namespace *pid_ns,
58 struct pidmap *map, int off) 56 struct pidmap *map, int off)
59{ 57{
@@ -183,15 +181,19 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
183 break; 181 break;
184 } 182 }
185 if (likely(atomic_read(&map->nr_free))) { 183 if (likely(atomic_read(&map->nr_free))) {
186 do { 184 for ( ; ; ) {
187 if (!test_and_set_bit(offset, map->page)) { 185 if (!test_and_set_bit(offset, map->page)) {
188 atomic_dec(&map->nr_free); 186 atomic_dec(&map->nr_free);
189 set_last_pid(pid_ns, last, pid); 187 set_last_pid(pid_ns, last, pid);
190 return pid; 188 return pid;
191 } 189 }
192 offset = find_next_offset(map, offset); 190 offset = find_next_offset(map, offset);
191 if (offset >= BITS_PER_PAGE)
192 break;
193 pid = mk_pid(pid_ns, map, offset); 193 pid = mk_pid(pid_ns, map, offset);
194 } while (offset < BITS_PER_PAGE && pid < pid_max); 194 if (pid >= pid_max)
195 break;
196 }
195 } 197 }
196 if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { 198 if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
197 ++map; 199 ++map;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index bea15bdf82b0..6917e8edb48e 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,12 +15,10 @@
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/acct.h> 16#include <linux/acct.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_ns.h>
19#include <linux/reboot.h> 19#include <linux/reboot.h>
20#include <linux/export.h> 20#include <linux/export.h>
21 21
22#define BITS_PER_PAGE (PAGE_SIZE*8)
23
24struct pid_cache { 22struct pid_cache {
25 int nr_ids; 23 int nr_ids;
26 char name[16]; 24 char name[16];
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 6edbb2c55c22..424c2d4265c9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -40,38 +40,31 @@
40#include <linux/list.h> 40#include <linux/list.h>
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/compiler.h> 42#include <linux/compiler.h>
43#include <linux/idr.h> 43#include <linux/hash.h>
44#include <linux/posix-clock.h> 44#include <linux/posix-clock.h>
45#include <linux/posix-timers.h> 45#include <linux/posix-timers.h>
46#include <linux/syscalls.h> 46#include <linux/syscalls.h>
47#include <linux/wait.h> 47#include <linux/wait.h>
48#include <linux/workqueue.h> 48#include <linux/workqueue.h>
49#include <linux/export.h> 49#include <linux/export.h>
50#include <linux/hashtable.h>
50 51
51/* 52/*
52 * Management arrays for POSIX timers. Timers are kept in slab memory 53 * Management arrays for POSIX timers. Timers are now kept in static hash table
53 * Timer ids are allocated by an external routine that keeps track of the 54 * with 512 entries.
54 * id and the timer. The external interface is: 55 * Timer ids are allocated by local routine, which selects proper hash head by
55 * 56 * key, constructed from current->signal address and per signal struct counter.
56 * void *idr_find(struct idr *idp, int id); to find timer_id <id> 57 * This keeps timer ids unique per process, but now they can intersect between
57 * int idr_get_new(struct idr *idp, void *ptr); to get a new id and 58 * processes.
58 * related it to <ptr>
59 * void idr_remove(struct idr *idp, int id); to release <id>
60 * void idr_init(struct idr *idp); to initialize <idp>
61 * which we supply.
62 * The idr_get_new *may* call slab for more memory so it must not be
63 * called under a spin lock. Likewise idr_remore may release memory
64 * (but it may be ok to do this under a lock...).
65 * idr_find is just a memory look up and is quite fast. A -1 return
66 * indicates that the requested id does not exist.
67 */ 59 */
68 60
69/* 61/*
70 * Lets keep our timers in a slab cache :-) 62 * Lets keep our timers in a slab cache :-)
71 */ 63 */
72static struct kmem_cache *posix_timers_cache; 64static struct kmem_cache *posix_timers_cache;
73static struct idr posix_timers_id; 65
74static DEFINE_SPINLOCK(idr_lock); 66static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
67static DEFINE_SPINLOCK(hash_lock);
75 68
76/* 69/*
77 * we assume that the new SIGEV_THREAD_ID shares no bits with the other 70 * we assume that the new SIGEV_THREAD_ID shares no bits with the other
@@ -152,6 +145,56 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
152 __timr; \ 145 __timr; \
153}) 146})
154 147
148static int hash(struct signal_struct *sig, unsigned int nr)
149{
150 return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
151}
152
153static struct k_itimer *__posix_timers_find(struct hlist_head *head,
154 struct signal_struct *sig,
155 timer_t id)
156{
157 struct k_itimer *timer;
158
159 hlist_for_each_entry_rcu(timer, head, t_hash) {
160 if ((timer->it_signal == sig) && (timer->it_id == id))
161 return timer;
162 }
163 return NULL;
164}
165
166static struct k_itimer *posix_timer_by_id(timer_t id)
167{
168 struct signal_struct *sig = current->signal;
169 struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
170
171 return __posix_timers_find(head, sig, id);
172}
173
174static int posix_timer_add(struct k_itimer *timer)
175{
176 struct signal_struct *sig = current->signal;
177 int first_free_id = sig->posix_timer_id;
178 struct hlist_head *head;
179 int ret = -ENOENT;
180
181 do {
182 spin_lock(&hash_lock);
183 head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
184 if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
185 hlist_add_head_rcu(&timer->t_hash, head);
186 ret = sig->posix_timer_id;
187 }
188 if (++sig->posix_timer_id < 0)
189 sig->posix_timer_id = 0;
190 if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
191 /* Loop over all possible ids completed */
192 ret = -EAGAIN;
193 spin_unlock(&hash_lock);
194 } while (ret == -ENOENT);
195 return ret;
196}
197
155static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) 198static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
156{ 199{
157 spin_unlock_irqrestore(&timr->it_lock, flags); 200 spin_unlock_irqrestore(&timr->it_lock, flags);
@@ -221,6 +264,11 @@ static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
221 return 0; 264 return 0;
222} 265}
223 266
267static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
268{
269 timekeeping_clocktai(tp);
270 return 0;
271}
224 272
225/* 273/*
226 * Initialize everything, well, just everything in Posix clocks/timers ;) 274 * Initialize everything, well, just everything in Posix clocks/timers ;)
@@ -261,6 +309,16 @@ static __init int init_posix_timers(void)
261 .clock_getres = posix_get_coarse_res, 309 .clock_getres = posix_get_coarse_res,
262 .clock_get = posix_get_monotonic_coarse, 310 .clock_get = posix_get_monotonic_coarse,
263 }; 311 };
312 struct k_clock clock_tai = {
313 .clock_getres = hrtimer_get_res,
314 .clock_get = posix_get_tai,
315 .nsleep = common_nsleep,
316 .nsleep_restart = hrtimer_nanosleep_restart,
317 .timer_create = common_timer_create,
318 .timer_set = common_timer_set,
319 .timer_get = common_timer_get,
320 .timer_del = common_timer_del,
321 };
264 struct k_clock clock_boottime = { 322 struct k_clock clock_boottime = {
265 .clock_getres = hrtimer_get_res, 323 .clock_getres = hrtimer_get_res,
266 .clock_get = posix_get_boottime, 324 .clock_get = posix_get_boottime,
@@ -278,11 +336,11 @@ static __init int init_posix_timers(void)
278 posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); 336 posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
279 posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); 337 posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
280 posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime); 338 posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
339 posix_timers_register_clock(CLOCK_TAI, &clock_tai);
281 340
282 posix_timers_cache = kmem_cache_create("posix_timers_cache", 341 posix_timers_cache = kmem_cache_create("posix_timers_cache",
283 sizeof (struct k_itimer), 0, SLAB_PANIC, 342 sizeof (struct k_itimer), 0, SLAB_PANIC,
284 NULL); 343 NULL);
285 idr_init(&posix_timers_id);
286 return 0; 344 return 0;
287} 345}
288 346
@@ -504,9 +562,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
504{ 562{
505 if (it_id_set) { 563 if (it_id_set) {
506 unsigned long flags; 564 unsigned long flags;
507 spin_lock_irqsave(&idr_lock, flags); 565 spin_lock_irqsave(&hash_lock, flags);
508 idr_remove(&posix_timers_id, tmr->it_id); 566 hlist_del_rcu(&tmr->t_hash);
509 spin_unlock_irqrestore(&idr_lock, flags); 567 spin_unlock_irqrestore(&hash_lock, flags);
510 } 568 }
511 put_pid(tmr->it_pid); 569 put_pid(tmr->it_pid);
512 sigqueue_free(tmr->sigq); 570 sigqueue_free(tmr->sigq);
@@ -552,22 +610,11 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
552 return -EAGAIN; 610 return -EAGAIN;
553 611
554 spin_lock_init(&new_timer->it_lock); 612 spin_lock_init(&new_timer->it_lock);
555 613 new_timer_id = posix_timer_add(new_timer);
556 idr_preload(GFP_KERNEL); 614 if (new_timer_id < 0) {
557 spin_lock_irq(&idr_lock); 615 error = new_timer_id;
558 error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT);
559 spin_unlock_irq(&idr_lock);
560 idr_preload_end();
561 if (error < 0) {
562 /*
563 * Weird looking, but we return EAGAIN if the IDR is
564 * full (proper POSIX return value for this)
565 */
566 if (error == -ENOSPC)
567 error = -EAGAIN;
568 goto out; 616 goto out;
569 } 617 }
570 new_timer_id = error;
571 618
572 it_id_set = IT_ID_SET; 619 it_id_set = IT_ID_SET;
573 new_timer->it_id = (timer_t) new_timer_id; 620 new_timer->it_id = (timer_t) new_timer_id;
@@ -645,7 +692,7 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
645 return NULL; 692 return NULL;
646 693
647 rcu_read_lock(); 694 rcu_read_lock();
648 timr = idr_find(&posix_timers_id, (int)timer_id); 695 timr = posix_timer_by_id(timer_id);
649 if (timr) { 696 if (timr) {
650 spin_lock_irqsave(&timr->it_lock, *flags); 697 spin_lock_irqsave(&timr->it_lock, *flags);
651 if (timr->it_signal == current->signal) { 698 if (timr->it_signal == current->signal) {
diff --git a/kernel/power/console.c b/kernel/power/console.c
index b1dc456474b5..463aa6736751 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -4,6 +4,7 @@
4 * Originally from swsusp. 4 * Originally from swsusp.
5 */ 5 */
6 6
7#include <linux/console.h>
7#include <linux/vt_kern.h> 8#include <linux/vt_kern.h>
8#include <linux/kbd_kern.h> 9#include <linux/kbd_kern.h>
9#include <linux/vt.h> 10#include <linux/vt.h>
@@ -14,8 +15,120 @@
14 15
15static int orig_fgconsole, orig_kmsg; 16static int orig_fgconsole, orig_kmsg;
16 17
18static DEFINE_MUTEX(vt_switch_mutex);
19
20struct pm_vt_switch {
21 struct list_head head;
22 struct device *dev;
23 bool required;
24};
25
26static LIST_HEAD(pm_vt_switch_list);
27
28
29/**
30 * pm_vt_switch_required - indicate VT switch at suspend requirements
31 * @dev: device
32 * @required: if true, caller needs VT switch at suspend/resume time
33 *
34 * The different console drivers may or may not require VT switches across
35 * suspend/resume, depending on how they handle restoring video state and
36 * what may be running.
37 *
38 * Drivers can indicate support for switchless suspend/resume, which can
39 * save time and flicker, by using this routine and passing 'false' as
40 * the argument. If any loaded driver needs VT switching, or the
41 * no_console_suspend argument has been passed on the command line, VT
42 * switches will occur.
43 */
44void pm_vt_switch_required(struct device *dev, bool required)
45{
46 struct pm_vt_switch *entry, *tmp;
47
48 mutex_lock(&vt_switch_mutex);
49 list_for_each_entry(tmp, &pm_vt_switch_list, head) {
50 if (tmp->dev == dev) {
51 /* already registered, update requirement */
52 tmp->required = required;
53 goto out;
54 }
55 }
56
57 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
58 if (!entry)
59 goto out;
60
61 entry->required = required;
62 entry->dev = dev;
63
64 list_add(&entry->head, &pm_vt_switch_list);
65out:
66 mutex_unlock(&vt_switch_mutex);
67}
68EXPORT_SYMBOL(pm_vt_switch_required);
69
70/**
71 * pm_vt_switch_unregister - stop tracking a device's VT switching needs
72 * @dev: device
73 *
74 * Remove @dev from the vt switch list.
75 */
76void pm_vt_switch_unregister(struct device *dev)
77{
78 struct pm_vt_switch *tmp;
79
80 mutex_lock(&vt_switch_mutex);
81 list_for_each_entry(tmp, &pm_vt_switch_list, head) {
82 if (tmp->dev == dev) {
83 list_del(&tmp->head);
84 break;
85 }
86 }
87 mutex_unlock(&vt_switch_mutex);
88}
89EXPORT_SYMBOL(pm_vt_switch_unregister);
90
91/*
92 * There are three cases when a VT switch on suspend/resume are required:
93 * 1) no driver has indicated a requirement one way or another, so preserve
94 * the old behavior
95 * 2) console suspend is disabled, we want to see debug messages across
96 * suspend/resume
97 * 3) any registered driver indicates it needs a VT switch
98 *
99 * If none of these conditions is present, meaning we have at least one driver
100 * that doesn't need the switch, and none that do, we can avoid it to make
101 * resume look a little prettier (and suspend too, but that's usually hidden,
102 * e.g. when closing the lid on a laptop).
103 */
104static bool pm_vt_switch(void)
105{
106 struct pm_vt_switch *entry;
107 bool ret = true;
108
109 mutex_lock(&vt_switch_mutex);
110 if (list_empty(&pm_vt_switch_list))
111 goto out;
112
113 if (!console_suspend_enabled)
114 goto out;
115
116 list_for_each_entry(entry, &pm_vt_switch_list, head) {
117 if (entry->required)
118 goto out;
119 }
120
121 ret = false;
122out:
123 mutex_unlock(&vt_switch_mutex);
124 return ret;
125}
126
17int pm_prepare_console(void) 127int pm_prepare_console(void)
18{ 128{
129 if (!pm_vt_switch())
130 return 0;
131
19 orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1); 132 orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
20 if (orig_fgconsole < 0) 133 if (orig_fgconsole < 0)
21 return 1; 134 return 1;
@@ -26,6 +139,9 @@ int pm_prepare_console(void)
26 139
27void pm_restore_console(void) 140void pm_restore_console(void)
28{ 141{
142 if (!pm_vt_switch())
143 return;
144
29 if (orig_fgconsole >= 0) { 145 if (orig_fgconsole >= 0) {
30 vt_move_to_console(orig_fgconsole, 0); 146 vt_move_to_console(orig_fgconsole, 0);
31 vt_kmsg_redirect(orig_kmsg); 147 vt_kmsg_redirect(orig_kmsg);
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 68197a4e8fc9..7ef6866b521d 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -32,7 +32,7 @@ static void handle_poweroff(int key)
32 32
33static struct sysrq_key_op sysrq_poweroff_op = { 33static struct sysrq_key_op sysrq_poweroff_op = {
34 .handler = handle_poweroff, 34 .handler = handle_poweroff,
35 .help_msg = "powerOff", 35 .help_msg = "poweroff(o)",
36 .action_msg = "Power Off", 36 .action_msg = "Power Off",
37 .enable_mask = SYSRQ_ENABLE_BOOT, 37 .enable_mask = SYSRQ_ENABLE_BOOT,
38}; 38};
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index d4feda084a3a..bef86d121eb2 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -76,8 +76,20 @@ EXPORT_SYMBOL_GPL(suspend_set_ops);
76 76
77bool valid_state(suspend_state_t state) 77bool valid_state(suspend_state_t state)
78{ 78{
79 if (state == PM_SUSPEND_FREEZE) 79 if (state == PM_SUSPEND_FREEZE) {
80 return true; 80#ifdef CONFIG_PM_DEBUG
81 if (pm_test_level != TEST_NONE &&
82 pm_test_level != TEST_FREEZER &&
83 pm_test_level != TEST_DEVICES &&
84 pm_test_level != TEST_PLATFORM) {
85 printk(KERN_WARNING "Unsupported pm_test mode for "
86 "freeze state, please choose "
87 "none/freezer/devices/platform.\n");
88 return false;
89 }
90#endif
91 return true;
92 }
81 /* 93 /*
82 * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel 94 * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel
83 * support and need to be valid to the lowlevel 95 * support and need to be valid to the lowlevel
@@ -184,6 +196,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
184 goto Platform_wake; 196 goto Platform_wake;
185 } 197 }
186 198
199 if (suspend_test(TEST_PLATFORM))
200 goto Platform_wake;
201
187 /* 202 /*
188 * PM_SUSPEND_FREEZE equals 203 * PM_SUSPEND_FREEZE equals
189 * frozen processes + suspended devices + idle processors. 204 * frozen processes + suspended devices + idle processors.
@@ -195,9 +210,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
195 goto Platform_wake; 210 goto Platform_wake;
196 } 211 }
197 212
198 if (suspend_test(TEST_PLATFORM))
199 goto Platform_wake;
200
201 error = disable_nonboot_cpus(); 213 error = disable_nonboot_cpus();
202 if (error || suspend_test(TEST_CPUS)) 214 if (error || suspend_test(TEST_CPUS))
203 goto Enable_cpus; 215 goto Enable_cpus;
diff --git a/kernel/printk.c b/kernel/printk.c
index 376914e2869d..96dcfcd9a2d4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -43,6 +43,7 @@
43#include <linux/rculist.h> 43#include <linux/rculist.h>
44#include <linux/poll.h> 44#include <linux/poll.h>
45#include <linux/irq_work.h> 45#include <linux/irq_work.h>
46#include <linux/utsname.h>
46 47
47#include <asm/uaccess.h> 48#include <asm/uaccess.h>
48 49
@@ -2849,4 +2850,65 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)
2849 raw_spin_unlock_irqrestore(&logbuf_lock, flags); 2850 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2850} 2851}
2851EXPORT_SYMBOL_GPL(kmsg_dump_rewind); 2852EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
2853
2854static char dump_stack_arch_desc_str[128];
2855
2856/**
2857 * dump_stack_set_arch_desc - set arch-specific str to show with task dumps
2858 * @fmt: printf-style format string
2859 * @...: arguments for the format string
2860 *
2861 * The configured string will be printed right after utsname during task
2862 * dumps. Usually used to add arch-specific system identifiers. If an
2863 * arch wants to make use of such an ID string, it should initialize this
2864 * as soon as possible during boot.
2865 */
2866void __init dump_stack_set_arch_desc(const char *fmt, ...)
2867{
2868 va_list args;
2869
2870 va_start(args, fmt);
2871 vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str),
2872 fmt, args);
2873 va_end(args);
2874}
2875
2876/**
2877 * dump_stack_print_info - print generic debug info for dump_stack()
2878 * @log_lvl: log level
2879 *
2880 * Arch-specific dump_stack() implementations can use this function to
2881 * print out the same debug information as the generic dump_stack().
2882 */
2883void dump_stack_print_info(const char *log_lvl)
2884{
2885 printk("%sCPU: %d PID: %d Comm: %.20s %s %s %.*s\n",
2886 log_lvl, raw_smp_processor_id(), current->pid, current->comm,
2887 print_tainted(), init_utsname()->release,
2888 (int)strcspn(init_utsname()->version, " "),
2889 init_utsname()->version);
2890
2891 if (dump_stack_arch_desc_str[0] != '\0')
2892 printk("%sHardware name: %s\n",
2893 log_lvl, dump_stack_arch_desc_str);
2894
2895 print_worker_info(log_lvl, current);
2896}
2897
2898/**
2899 * show_regs_print_info - print generic debug info for show_regs()
2900 * @log_lvl: log level
2901 *
2902 * show_regs() implementations can use this function to print out generic
2903 * debug information.
2904 */
2905void show_regs_print_info(const char *log_lvl)
2906{
2907 dump_stack_print_info(log_lvl);
2908
2909 printk("%stask: %p ti: %p task.ti: %p\n",
2910 log_lvl, current, current_thread_info(),
2911 task_thread_info(current));
2912}
2913
2852#endif 2914#endif
diff --git a/kernel/profile.c b/kernel/profile.c
index dc3384ee874e..0bf400737660 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -462,10 +462,10 @@ static const struct file_operations prof_cpu_mask_proc_fops = {
462 .write = prof_cpu_mask_proc_write, 462 .write = prof_cpu_mask_proc_write,
463}; 463};
464 464
465void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) 465void create_prof_cpu_mask(void)
466{ 466{
467 /* create /proc/irq/prof_cpu_mask */ 467 /* create /proc/irq/prof_cpu_mask */
468 proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops); 468 proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_fops);
469} 469}
470 470
471/* 471/*
@@ -600,7 +600,7 @@ int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */
600 NULL, &proc_profile_operations); 600 NULL, &proc_profile_operations);
601 if (!entry) 601 if (!entry)
602 return 0; 602 return 0;
603 entry->size = (1+prof_len) * sizeof(atomic_t); 603 proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t));
604 hotcpu_notifier(profile_cpu_callback, 0); 604 hotcpu_notifier(profile_cpu_callback, 0);
605 return 0; 605 return 0;
606} 606}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index acbd28424d81..17ae54da0ec2 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -24,6 +24,7 @@
24#include <linux/regset.h> 24#include <linux/regset.h>
25#include <linux/hw_breakpoint.h> 25#include <linux/hw_breakpoint.h>
26#include <linux/cn_proc.h> 26#include <linux/cn_proc.h>
27#include <linux/compat.h>
27 28
28 29
29static int ptrace_trapping_sleep_fn(void *flags) 30static int ptrace_trapping_sleep_fn(void *flags)
@@ -618,6 +619,81 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
618 return error; 619 return error;
619} 620}
620 621
622static int ptrace_peek_siginfo(struct task_struct *child,
623 unsigned long addr,
624 unsigned long data)
625{
626 struct ptrace_peeksiginfo_args arg;
627 struct sigpending *pending;
628 struct sigqueue *q;
629 int ret, i;
630
631 ret = copy_from_user(&arg, (void __user *) addr,
632 sizeof(struct ptrace_peeksiginfo_args));
633 if (ret)
634 return -EFAULT;
635
636 if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED)
637 return -EINVAL; /* unknown flags */
638
639 if (arg.nr < 0)
640 return -EINVAL;
641
642 if (arg.flags & PTRACE_PEEKSIGINFO_SHARED)
643 pending = &child->signal->shared_pending;
644 else
645 pending = &child->pending;
646
647 for (i = 0; i < arg.nr; ) {
648 siginfo_t info;
649 s32 off = arg.off + i;
650
651 spin_lock_irq(&child->sighand->siglock);
652 list_for_each_entry(q, &pending->list, list) {
653 if (!off--) {
654 copy_siginfo(&info, &q->info);
655 break;
656 }
657 }
658 spin_unlock_irq(&child->sighand->siglock);
659
660 if (off >= 0) /* beyond the end of the list */
661 break;
662
663#ifdef CONFIG_COMPAT
664 if (unlikely(is_compat_task())) {
665 compat_siginfo_t __user *uinfo = compat_ptr(data);
666
667 ret = copy_siginfo_to_user32(uinfo, &info);
668 ret |= __put_user(info.si_code, &uinfo->si_code);
669 } else
670#endif
671 {
672 siginfo_t __user *uinfo = (siginfo_t __user *) data;
673
674 ret = copy_siginfo_to_user(uinfo, &info);
675 ret |= __put_user(info.si_code, &uinfo->si_code);
676 }
677
678 if (ret) {
679 ret = -EFAULT;
680 break;
681 }
682
683 data += sizeof(siginfo_t);
684 i++;
685
686 if (signal_pending(current))
687 break;
688
689 cond_resched();
690 }
691
692 if (i > 0)
693 return i;
694
695 return ret;
696}
621 697
622#ifdef PTRACE_SINGLESTEP 698#ifdef PTRACE_SINGLESTEP
623#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) 699#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
@@ -748,6 +824,10 @@ int ptrace_request(struct task_struct *child, long request,
748 ret = put_user(child->ptrace_message, datalp); 824 ret = put_user(child->ptrace_message, datalp);
749 break; 825 break;
750 826
827 case PTRACE_PEEKSIGINFO:
828 ret = ptrace_peek_siginfo(child, addr, data);
829 break;
830
751 case PTRACE_GETSIGINFO: 831 case PTRACE_GETSIGINFO:
752 ret = ptrace_getsiginfo(child, &siginfo); 832 ret = ptrace_getsiginfo(child, &siginfo);
753 if (!ret) 833 if (!ret)
diff --git a/kernel/range.c b/kernel/range.c
index 9b8ae2d6ed68..071b0ab455cb 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -97,7 +97,8 @@ void subtract_range(struct range *range, int az, u64 start, u64 end)
97 range[i].end = range[j].end; 97 range[i].end = range[j].end;
98 range[i].start = end; 98 range[i].start = end;
99 } else { 99 } else {
100 printk(KERN_ERR "run of slot in ranges\n"); 100 pr_err("%s: run out of slot in ranges\n",
101 __func__);
101 } 102 }
102 range[j].end = start; 103 range[j].end = start;
103 continue; 104 continue;
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 71bd7337d0cc..170814dc418f 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1706,7 +1706,7 @@ static void rcu_prepare_for_idle(int cpu)
1706 return; 1706 return;
1707 1707
1708 /* If this is a no-CBs CPU, no callbacks, just return. */ 1708 /* If this is a no-CBs CPU, no callbacks, just return. */
1709 if (is_nocb_cpu(cpu)) 1709 if (rcu_is_nocb_cpu(cpu))
1710 return; 1710 return;
1711 1711
1712 /* 1712 /*
@@ -1748,7 +1748,7 @@ static void rcu_cleanup_after_idle(int cpu)
1748 struct rcu_data *rdp; 1748 struct rcu_data *rdp;
1749 struct rcu_state *rsp; 1749 struct rcu_state *rsp;
1750 1750
1751 if (is_nocb_cpu(cpu)) 1751 if (rcu_is_nocb_cpu(cpu))
1752 return; 1752 return;
1753 rcu_try_advance_all_cbs(); 1753 rcu_try_advance_all_cbs();
1754 for_each_rcu_flavor(rsp) { 1754 for_each_rcu_flavor(rsp) {
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 49099e81c87b..cf6c17412932 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -95,7 +95,7 @@ static const struct file_operations rcubarrier_fops = {
95 .open = rcubarrier_open, 95 .open = rcubarrier_open,
96 .read = seq_read, 96 .read = seq_read,
97 .llseek = no_llseek, 97 .llseek = no_llseek,
98 .release = seq_release, 98 .release = single_release,
99}; 99};
100 100
101#ifdef CONFIG_RCU_BOOST 101#ifdef CONFIG_RCU_BOOST
@@ -206,7 +206,7 @@ static const struct file_operations rcuexp_fops = {
206 .open = rcuexp_open, 206 .open = rcuexp_open,
207 .read = seq_read, 207 .read = seq_read,
208 .llseek = no_llseek, 208 .llseek = no_llseek,
209 .release = seq_release, 209 .release = single_release,
210}; 210};
211 211
212#ifdef CONFIG_RCU_BOOST 212#ifdef CONFIG_RCU_BOOST
@@ -306,7 +306,7 @@ static const struct file_operations rcuhier_fops = {
306 .open = rcuhier_open, 306 .open = rcuhier_open,
307 .read = seq_read, 307 .read = seq_read,
308 .llseek = no_llseek, 308 .llseek = no_llseek,
309 .release = seq_release, 309 .release = single_release,
310}; 310};
311 311
312static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) 312static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
@@ -348,7 +348,7 @@ static const struct file_operations rcugp_fops = {
348 .open = rcugp_open, 348 .open = rcugp_open,
349 .read = seq_read, 349 .read = seq_read,
350 .llseek = no_llseek, 350 .llseek = no_llseek,
351 .release = seq_release, 351 .release = single_release,
352}; 352};
353 353
354static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) 354static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
diff --git a/kernel/relay.c b/kernel/relay.c
index 01ab081ac53a..eef0d113b79e 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -588,7 +588,7 @@ struct rchan *relay_open(const char *base_filename,
588 chan->version = RELAYFS_CHANNEL_VERSION; 588 chan->version = RELAYFS_CHANNEL_VERSION;
589 chan->n_subbufs = n_subbufs; 589 chan->n_subbufs = n_subbufs;
590 chan->subbuf_size = subbuf_size; 590 chan->subbuf_size = subbuf_size;
591 chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); 591 chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs);
592 chan->parent = parent; 592 chan->parent = parent;
593 chan->private_data = private_data; 593 chan->private_data = private_data;
594 if (base_filename) { 594 if (base_filename) {
@@ -1099,8 +1099,7 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,
1099static int subbuf_read_actor(size_t read_start, 1099static int subbuf_read_actor(size_t read_start,
1100 struct rchan_buf *buf, 1100 struct rchan_buf *buf,
1101 size_t avail, 1101 size_t avail,
1102 read_descriptor_t *desc, 1102 read_descriptor_t *desc)
1103 read_actor_t actor)
1104{ 1103{
1105 void *from; 1104 void *from;
1106 int ret = 0; 1105 int ret = 0;
@@ -1121,15 +1120,13 @@ static int subbuf_read_actor(size_t read_start,
1121typedef int (*subbuf_actor_t) (size_t read_start, 1120typedef int (*subbuf_actor_t) (size_t read_start,
1122 struct rchan_buf *buf, 1121 struct rchan_buf *buf,
1123 size_t avail, 1122 size_t avail,
1124 read_descriptor_t *desc, 1123 read_descriptor_t *desc);
1125 read_actor_t actor);
1126 1124
1127/* 1125/*
1128 * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries 1126 * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
1129 */ 1127 */
1130static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, 1128static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
1131 subbuf_actor_t subbuf_actor, 1129 subbuf_actor_t subbuf_actor,
1132 read_actor_t actor,
1133 read_descriptor_t *desc) 1130 read_descriptor_t *desc)
1134{ 1131{
1135 struct rchan_buf *buf = filp->private_data; 1132 struct rchan_buf *buf = filp->private_data;
@@ -1150,7 +1147,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
1150 break; 1147 break;
1151 1148
1152 avail = min(desc->count, avail); 1149 avail = min(desc->count, avail);
1153 ret = subbuf_actor(read_start, buf, avail, desc, actor); 1150 ret = subbuf_actor(read_start, buf, avail, desc);
1154 if (desc->error < 0) 1151 if (desc->error < 0)
1155 break; 1152 break;
1156 1153
@@ -1174,8 +1171,7 @@ static ssize_t relay_file_read(struct file *filp,
1174 desc.count = count; 1171 desc.count = count;
1175 desc.arg.buf = buffer; 1172 desc.arg.buf = buffer;
1176 desc.error = 0; 1173 desc.error = 0;
1177 return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, 1174 return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc);
1178 NULL, &desc);
1179} 1175}
1180 1176
1181static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) 1177static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf986a091a..58453b8272fd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4653,6 +4653,7 @@ void sched_show_task(struct task_struct *p)
4653 task_pid_nr(p), ppid, 4653 task_pid_nr(p), ppid,
4654 (unsigned long)task_thread_info(p)->flags); 4654 (unsigned long)task_thread_info(p)->flags);
4655 4655
4656 print_worker_info(KERN_INFO, p);
4656 show_stack(p, NULL); 4657 show_stack(p, NULL);
4657} 4658}
4658 4659
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ea32f02bf2c3..cc2dc3eea8a3 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -506,34 +506,47 @@ void account_idle_ticks(unsigned long ticks)
506} 506}
507 507
508/* 508/*
509 * Perform (stime * rtime) / total with reduced chances 509 * Perform (stime * rtime) / total, but avoid multiplication overflow by
510 * of multiplication overflows by using smaller factors 510 * loosing precision when the numbers are big.
511 * like quotient and remainders of divisions between
512 * rtime and total.
513 */ 511 */
514static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) 512static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
515{ 513{
516 u64 rem, res, scaled; 514 u64 scaled;
517 515
518 if (rtime >= total) { 516 for (;;) {
519 /* 517 /* Make sure "rtime" is the bigger of stime/rtime */
520 * Scale up to rtime / total then add 518 if (stime > rtime) {
521 * the remainder scaled to stime / total. 519 u64 tmp = rtime; rtime = stime; stime = tmp;
522 */ 520 }
523 res = div64_u64_rem(rtime, total, &rem); 521
524 scaled = stime * res; 522 /* Make sure 'total' fits in 32 bits */
525 scaled += div64_u64(stime * rem, total); 523 if (total >> 32)
526 } else { 524 goto drop_precision;
527 /* 525
528 * Same in reverse: scale down to total / rtime 526 /* Does rtime (and thus stime) fit in 32 bits? */
529 * then substract that result scaled to 527 if (!(rtime >> 32))
530 * to the remaining part. 528 break;
531 */ 529
532 res = div64_u64_rem(total, rtime, &rem); 530 /* Can we just balance rtime/stime rather than dropping bits? */
533 scaled = div64_u64(stime, res); 531 if (stime >> 31)
534 scaled -= div64_u64(scaled * rem, total); 532 goto drop_precision;
533
534 /* We can grow stime and shrink rtime and try to make them both fit */
535 stime <<= 1;
536 rtime >>= 1;
537 continue;
538
539drop_precision:
540 /* We drop from rtime, it has more bits than stime */
541 rtime >>= 1;
542 total >>= 1;
535 } 543 }
536 544
545 /*
546 * Make sure gcc understands that this is a 32x32->64 multiply,
547 * followed by a 64/32->64 divide.
548 */
549 scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
537 return (__force cputime_t) scaled; 550 return (__force cputime_t) scaled;
538} 551}
539 552
@@ -545,7 +558,7 @@ static void cputime_adjust(struct task_cputime *curr,
545 struct cputime *prev, 558 struct cputime *prev,
546 cputime_t *ut, cputime_t *st) 559 cputime_t *ut, cputime_t *st)
547{ 560{
548 cputime_t rtime, stime, total; 561 cputime_t rtime, stime, utime, total;
549 562
550 if (vtime_accounting_enabled()) { 563 if (vtime_accounting_enabled()) {
551 *ut = curr->utime; 564 *ut = curr->utime;
@@ -568,13 +581,21 @@ static void cputime_adjust(struct task_cputime *curr,
568 */ 581 */
569 rtime = nsecs_to_cputime(curr->sum_exec_runtime); 582 rtime = nsecs_to_cputime(curr->sum_exec_runtime);
570 583
571 if (!rtime) { 584 /*
572 stime = 0; 585 * Update userspace visible utime/stime values only if actual execution
573 } else if (!total) { 586 * time is bigger than already exported. Note that can happen, that we
574 stime = rtime; 587 * provided bigger values due to scaling inaccuracy on big numbers.
575 } else { 588 */
589 if (prev->stime + prev->utime >= rtime)
590 goto out;
591
592 if (total) {
576 stime = scale_stime((__force u64)stime, 593 stime = scale_stime((__force u64)stime,
577 (__force u64)rtime, (__force u64)total); 594 (__force u64)rtime, (__force u64)total);
595 utime = rtime - stime;
596 } else {
597 stime = rtime;
598 utime = 0;
578 } 599 }
579 600
580 /* 601 /*
@@ -583,8 +604,9 @@ static void cputime_adjust(struct task_cputime *curr,
583 * Let's enforce monotonicity. 604 * Let's enforce monotonicity.
584 */ 605 */
585 prev->stime = max(prev->stime, stime); 606 prev->stime = max(prev->stime, stime);
586 prev->utime = max(prev->utime, rtime - prev->stime); 607 prev->utime = max(prev->utime, utime);
587 608
609out:
588 *ut = prev->utime; 610 *ut = prev->utime;
589 *st = prev->stime; 611 *st = prev->stime;
590} 612}
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index e036eda1a9c9..da98af347e8b 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -130,16 +130,11 @@ static int schedstat_open(struct inode *inode, struct file *file)
130 return seq_open(file, &schedstat_sops); 130 return seq_open(file, &schedstat_sops);
131} 131}
132 132
133static int schedstat_release(struct inode *inode, struct file *file)
134{
135 return 0;
136};
137
138static const struct file_operations proc_schedstat_operations = { 133static const struct file_operations proc_schedstat_operations = {
139 .open = schedstat_open, 134 .open = schedstat_open,
140 .read = seq_read, 135 .read = seq_read,
141 .llseek = seq_lseek, 136 .llseek = seq_lseek,
142 .release = schedstat_release, 137 .release = seq_release,
143}; 138};
144 139
145static int __init proc_schedstat_init(void) 140static int __init proc_schedstat_init(void)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5af44b593770..b7a10048a32c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -160,6 +160,8 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
160 case BPF_S_ALU_AND_X: 160 case BPF_S_ALU_AND_X:
161 case BPF_S_ALU_OR_K: 161 case BPF_S_ALU_OR_K:
162 case BPF_S_ALU_OR_X: 162 case BPF_S_ALU_OR_X:
163 case BPF_S_ALU_XOR_K:
164 case BPF_S_ALU_XOR_X:
163 case BPF_S_ALU_LSH_K: 165 case BPF_S_ALU_LSH_K:
164 case BPF_S_ALU_LSH_X: 166 case BPF_S_ALU_LSH_X:
165 case BPF_S_ALU_RSH_K: 167 case BPF_S_ALU_RSH_K:
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 4567fc020fe3..6815171a4fff 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(up);
193struct semaphore_waiter { 193struct semaphore_waiter {
194 struct list_head list; 194 struct list_head list;
195 struct task_struct *task; 195 struct task_struct *task;
196 int up; 196 bool up;
197}; 197};
198 198
199/* 199/*
@@ -209,12 +209,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
209 209
210 list_add_tail(&waiter.list, &sem->wait_list); 210 list_add_tail(&waiter.list, &sem->wait_list);
211 waiter.task = task; 211 waiter.task = task;
212 waiter.up = 0; 212 waiter.up = false;
213 213
214 for (;;) { 214 for (;;) {
215 if (signal_pending_state(state, task)) 215 if (signal_pending_state(state, task))
216 goto interrupted; 216 goto interrupted;
217 if (timeout <= 0) 217 if (unlikely(timeout <= 0))
218 goto timed_out; 218 goto timed_out;
219 __set_task_state(task, state); 219 __set_task_state(task, state);
220 raw_spin_unlock_irq(&sem->lock); 220 raw_spin_unlock_irq(&sem->lock);
@@ -258,6 +258,6 @@ static noinline void __sched __up(struct semaphore *sem)
258 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, 258 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
259 struct semaphore_waiter, list); 259 struct semaphore_waiter, list);
260 list_del(&waiter->list); 260 list_del(&waiter->list);
261 waiter->up = 1; 261 waiter->up = true;
262 wake_up_process(waiter->task); 262 wake_up_process(waiter->task);
263} 263}
diff --git a/kernel/signal.c b/kernel/signal.c
index 598dc06be421..113411bfe8b1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -32,6 +32,7 @@
32#include <linux/user_namespace.h> 32#include <linux/user_namespace.h>
33#include <linux/uprobes.h> 33#include <linux/uprobes.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/cn_proc.h>
35#define CREATE_TRACE_POINTS 36#define CREATE_TRACE_POINTS
36#include <trace/events/signal.h> 37#include <trace/events/signal.h>
37 38
@@ -854,12 +855,14 @@ static void ptrace_trap_notify(struct task_struct *t)
854 * Returns true if the signal should be actually delivered, otherwise 855 * Returns true if the signal should be actually delivered, otherwise
855 * it should be dropped. 856 * it should be dropped.
856 */ 857 */
857static int prepare_signal(int sig, struct task_struct *p, bool force) 858static bool prepare_signal(int sig, struct task_struct *p, bool force)
858{ 859{
859 struct signal_struct *signal = p->signal; 860 struct signal_struct *signal = p->signal;
860 struct task_struct *t; 861 struct task_struct *t;
861 862
862 if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) { 863 if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
864 if (signal->flags & SIGNAL_GROUP_COREDUMP)
865 return sig == SIGKILL;
863 /* 866 /*
864 * The process is in the middle of dying, nothing to do. 867 * The process is in the middle of dying, nothing to do.
865 */ 868 */
@@ -1160,8 +1163,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
1160static void print_fatal_signal(int signr) 1163static void print_fatal_signal(int signr)
1161{ 1164{
1162 struct pt_regs *regs = signal_pt_regs(); 1165 struct pt_regs *regs = signal_pt_regs();
1163 printk(KERN_INFO "%s/%d: potentially unexpected fatal signal %d.\n", 1166 printk(KERN_INFO "potentially unexpected fatal signal %d.\n", signr);
1164 current->comm, task_pid_nr(current), signr);
1165 1167
1166#if defined(__i386__) && !defined(__arch_um__) 1168#if defined(__i386__) && !defined(__arch_um__)
1167 printk(KERN_INFO "code at %08lx: ", regs->ip); 1169 printk(KERN_INFO "code at %08lx: ", regs->ip);
@@ -2350,6 +2352,7 @@ relock:
2350 if (sig_kernel_coredump(signr)) { 2352 if (sig_kernel_coredump(signr)) {
2351 if (print_fatal_signals) 2353 if (print_fatal_signals)
2352 print_fatal_signal(info->si_signo); 2354 print_fatal_signal(info->si_signo);
2355 proc_coredump_connector(current);
2353 /* 2356 /*
2354 * If it was able to dump core, this kills all 2357 * If it was able to dump core, this kills all
2355 * other threads in the group and synchronizes with 2358 * other threads in the group and synchronizes with
diff --git a/kernel/smp.c b/kernel/smp.c
index 8e451f3ff51b..4dba0f7b72ad 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -100,16 +100,16 @@ void __init call_function_init(void)
100 * previous function call. For multi-cpu calls its even more interesting 100 * previous function call. For multi-cpu calls its even more interesting
101 * as we'll have to ensure no other cpu is observing our csd. 101 * as we'll have to ensure no other cpu is observing our csd.
102 */ 102 */
103static void csd_lock_wait(struct call_single_data *data) 103static void csd_lock_wait(struct call_single_data *csd)
104{ 104{
105 while (data->flags & CSD_FLAG_LOCK) 105 while (csd->flags & CSD_FLAG_LOCK)
106 cpu_relax(); 106 cpu_relax();
107} 107}
108 108
109static void csd_lock(struct call_single_data *data) 109static void csd_lock(struct call_single_data *csd)
110{ 110{
111 csd_lock_wait(data); 111 csd_lock_wait(csd);
112 data->flags = CSD_FLAG_LOCK; 112 csd->flags |= CSD_FLAG_LOCK;
113 113
114 /* 114 /*
115 * prevent CPU from reordering the above assignment 115 * prevent CPU from reordering the above assignment
@@ -119,16 +119,16 @@ static void csd_lock(struct call_single_data *data)
119 smp_mb(); 119 smp_mb();
120} 120}
121 121
122static void csd_unlock(struct call_single_data *data) 122static void csd_unlock(struct call_single_data *csd)
123{ 123{
124 WARN_ON(!(data->flags & CSD_FLAG_LOCK)); 124 WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
125 125
126 /* 126 /*
127 * ensure we're all done before releasing data: 127 * ensure we're all done before releasing data:
128 */ 128 */
129 smp_mb(); 129 smp_mb();
130 130
131 data->flags &= ~CSD_FLAG_LOCK; 131 csd->flags &= ~CSD_FLAG_LOCK;
132} 132}
133 133
134/* 134/*
@@ -137,7 +137,7 @@ static void csd_unlock(struct call_single_data *data)
137 * ->func, ->info, and ->flags set. 137 * ->func, ->info, and ->flags set.
138 */ 138 */
139static 139static
140void generic_exec_single(int cpu, struct call_single_data *data, int wait) 140void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
141{ 141{
142 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 142 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
143 unsigned long flags; 143 unsigned long flags;
@@ -145,7 +145,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
145 145
146 raw_spin_lock_irqsave(&dst->lock, flags); 146 raw_spin_lock_irqsave(&dst->lock, flags);
147 ipi = list_empty(&dst->list); 147 ipi = list_empty(&dst->list);
148 list_add_tail(&data->list, &dst->list); 148 list_add_tail(&csd->list, &dst->list);
149 raw_spin_unlock_irqrestore(&dst->lock, flags); 149 raw_spin_unlock_irqrestore(&dst->lock, flags);
150 150
151 /* 151 /*
@@ -163,7 +163,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
163 arch_send_call_function_single_ipi(cpu); 163 arch_send_call_function_single_ipi(cpu);
164 164
165 if (wait) 165 if (wait)
166 csd_lock_wait(data); 166 csd_lock_wait(csd);
167} 167}
168 168
169/* 169/*
@@ -173,7 +173,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
173void generic_smp_call_function_single_interrupt(void) 173void generic_smp_call_function_single_interrupt(void)
174{ 174{
175 struct call_single_queue *q = &__get_cpu_var(call_single_queue); 175 struct call_single_queue *q = &__get_cpu_var(call_single_queue);
176 unsigned int data_flags;
177 LIST_HEAD(list); 176 LIST_HEAD(list);
178 177
179 /* 178 /*
@@ -186,25 +185,26 @@ void generic_smp_call_function_single_interrupt(void)
186 raw_spin_unlock(&q->lock); 185 raw_spin_unlock(&q->lock);
187 186
188 while (!list_empty(&list)) { 187 while (!list_empty(&list)) {
189 struct call_single_data *data; 188 struct call_single_data *csd;
189 unsigned int csd_flags;
190 190
191 data = list_entry(list.next, struct call_single_data, list); 191 csd = list_entry(list.next, struct call_single_data, list);
192 list_del(&data->list); 192 list_del(&csd->list);
193 193
194 /* 194 /*
195 * 'data' can be invalid after this call if flags == 0 195 * 'csd' can be invalid after this call if flags == 0
196 * (when called through generic_exec_single()), 196 * (when called through generic_exec_single()),
197 * so save them away before making the call: 197 * so save them away before making the call:
198 */ 198 */
199 data_flags = data->flags; 199 csd_flags = csd->flags;
200 200
201 data->func(data->info); 201 csd->func(csd->info);
202 202
203 /* 203 /*
204 * Unlocked CSDs are valid through generic_exec_single(): 204 * Unlocked CSDs are valid through generic_exec_single():
205 */ 205 */
206 if (data_flags & CSD_FLAG_LOCK) 206 if (csd_flags & CSD_FLAG_LOCK)
207 csd_unlock(data); 207 csd_unlock(csd);
208 } 208 }
209} 209}
210 210
@@ -249,16 +249,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
249 local_irq_restore(flags); 249 local_irq_restore(flags);
250 } else { 250 } else {
251 if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 251 if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
252 struct call_single_data *data = &d; 252 struct call_single_data *csd = &d;
253 253
254 if (!wait) 254 if (!wait)
255 data = &__get_cpu_var(csd_data); 255 csd = &__get_cpu_var(csd_data);
256 256
257 csd_lock(data); 257 csd_lock(csd);
258 258
259 data->func = func; 259 csd->func = func;
260 data->info = info; 260 csd->info = info;
261 generic_exec_single(cpu, data, wait); 261 generic_exec_single(cpu, csd, wait);
262 } else { 262 } else {
263 err = -ENXIO; /* CPU not online */ 263 err = -ENXIO; /* CPU not online */
264 } 264 }
@@ -325,7 +325,7 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
325 * pre-allocated data structure. Useful for embedding @data inside 325 * pre-allocated data structure. Useful for embedding @data inside
326 * other structures, for instance. 326 * other structures, for instance.
327 */ 327 */
328void __smp_call_function_single(int cpu, struct call_single_data *data, 328void __smp_call_function_single(int cpu, struct call_single_data *csd,
329 int wait) 329 int wait)
330{ 330{
331 unsigned int this_cpu; 331 unsigned int this_cpu;
@@ -343,11 +343,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
343 343
344 if (cpu == this_cpu) { 344 if (cpu == this_cpu) {
345 local_irq_save(flags); 345 local_irq_save(flags);
346 data->func(data->info); 346 csd->func(csd->info);
347 local_irq_restore(flags); 347 local_irq_restore(flags);
348 } else { 348 } else {
349 csd_lock(data); 349 csd_lock(csd);
350 generic_exec_single(cpu, data, wait); 350 generic_exec_single(cpu, csd, wait);
351 } 351 }
352 put_cpu(); 352 put_cpu();
353} 353}
@@ -369,7 +369,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
369void smp_call_function_many(const struct cpumask *mask, 369void smp_call_function_many(const struct cpumask *mask,
370 smp_call_func_t func, void *info, bool wait) 370 smp_call_func_t func, void *info, bool wait)
371{ 371{
372 struct call_function_data *data; 372 struct call_function_data *cfd;
373 int cpu, next_cpu, this_cpu = smp_processor_id(); 373 int cpu, next_cpu, this_cpu = smp_processor_id();
374 374
375 /* 375 /*
@@ -401,24 +401,24 @@ void smp_call_function_many(const struct cpumask *mask,
401 return; 401 return;
402 } 402 }
403 403
404 data = &__get_cpu_var(cfd_data); 404 cfd = &__get_cpu_var(cfd_data);
405 405
406 cpumask_and(data->cpumask, mask, cpu_online_mask); 406 cpumask_and(cfd->cpumask, mask, cpu_online_mask);
407 cpumask_clear_cpu(this_cpu, data->cpumask); 407 cpumask_clear_cpu(this_cpu, cfd->cpumask);
408 408
409 /* Some callers race with other cpus changing the passed mask */ 409 /* Some callers race with other cpus changing the passed mask */
410 if (unlikely(!cpumask_weight(data->cpumask))) 410 if (unlikely(!cpumask_weight(cfd->cpumask)))
411 return; 411 return;
412 412
413 /* 413 /*
414 * After we put an entry into the list, data->cpumask 414 * After we put an entry into the list, cfd->cpumask may be cleared
415 * may be cleared again when another CPU sends another IPI for 415 * again when another CPU sends another IPI for a SMP function call, so
416 * a SMP function call, so data->cpumask will be zero. 416 * cfd->cpumask will be zero.
417 */ 417 */
418 cpumask_copy(data->cpumask_ipi, data->cpumask); 418 cpumask_copy(cfd->cpumask_ipi, cfd->cpumask);
419 419
420 for_each_cpu(cpu, data->cpumask) { 420 for_each_cpu(cpu, cfd->cpumask) {
421 struct call_single_data *csd = per_cpu_ptr(data->csd, cpu); 421 struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
422 struct call_single_queue *dst = 422 struct call_single_queue *dst =
423 &per_cpu(call_single_queue, cpu); 423 &per_cpu(call_single_queue, cpu);
424 unsigned long flags; 424 unsigned long flags;
@@ -433,12 +433,13 @@ void smp_call_function_many(const struct cpumask *mask,
433 } 433 }
434 434
435 /* Send a message to all CPUs in the map */ 435 /* Send a message to all CPUs in the map */
436 arch_send_call_function_ipi_mask(data->cpumask_ipi); 436 arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
437 437
438 if (wait) { 438 if (wait) {
439 for_each_cpu(cpu, data->cpumask) { 439 for_each_cpu(cpu, cfd->cpumask) {
440 struct call_single_data *csd = 440 struct call_single_data *csd;
441 per_cpu_ptr(data->csd, cpu); 441
442 csd = per_cpu_ptr(cfd->csd, cpu);
442 csd_lock_wait(csd); 443 csd_lock_wait(csd);
443 } 444 }
444 } 445 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 51a09d56e78b..b5197dcb0dad 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -629,8 +629,7 @@ static void remote_softirq_receive(void *data)
629 unsigned long flags; 629 unsigned long flags;
630 int softirq; 630 int softirq;
631 631
632 softirq = cp->priv; 632 softirq = *(int *)cp->info;
633
634 local_irq_save(flags); 633 local_irq_save(flags);
635 __local_trigger(cp, softirq); 634 __local_trigger(cp, softirq);
636 local_irq_restore(flags); 635 local_irq_restore(flags);
@@ -640,9 +639,8 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir
640{ 639{
641 if (cpu_online(cpu)) { 640 if (cpu_online(cpu)) {
642 cp->func = remote_softirq_receive; 641 cp->func = remote_softirq_receive;
643 cp->info = cp; 642 cp->info = &softirq;
644 cp->flags = 0; 643 cp->flags = 0;
645 cp->priv = softirq;
646 644
647 __smp_call_function_single(cpu, cp, 0); 645 __smp_call_function_single(cpu, cp, 0);
648 return 0; 646 return 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index 0da73cf73e60..b95d3c72ba21 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -49,6 +49,11 @@
49#include <linux/user_namespace.h> 49#include <linux/user_namespace.h>
50#include <linux/binfmts.h> 50#include <linux/binfmts.h>
51 51
52#include <linux/sched.h>
53#include <linux/rcupdate.h>
54#include <linux/uidgid.h>
55#include <linux/cred.h>
56
52#include <linux/kmsg_dump.h> 57#include <linux/kmsg_dump.h>
53/* Move somewhere else to avoid recompiling? */ 58/* Move somewhere else to avoid recompiling? */
54#include <generated/utsrelease.h> 59#include <generated/utsrelease.h>
@@ -1044,6 +1049,67 @@ change_okay:
1044 return old_fsgid; 1049 return old_fsgid;
1045} 1050}
1046 1051
1052/**
1053 * sys_getpid - return the thread group id of the current process
1054 *
1055 * Note, despite the name, this returns the tgid not the pid. The tgid and
1056 * the pid are identical unless CLONE_THREAD was specified on clone() in
1057 * which case the tgid is the same in all threads of the same group.
1058 *
1059 * This is SMP safe as current->tgid does not change.
1060 */
1061SYSCALL_DEFINE0(getpid)
1062{
1063 return task_tgid_vnr(current);
1064}
1065
1066/* Thread ID - the internal kernel "pid" */
1067SYSCALL_DEFINE0(gettid)
1068{
1069 return task_pid_vnr(current);
1070}
1071
1072/*
1073 * Accessing ->real_parent is not SMP-safe, it could
1074 * change from under us. However, we can use a stale
1075 * value of ->real_parent under rcu_read_lock(), see
1076 * release_task()->call_rcu(delayed_put_task_struct).
1077 */
1078SYSCALL_DEFINE0(getppid)
1079{
1080 int pid;
1081
1082 rcu_read_lock();
1083 pid = task_tgid_vnr(rcu_dereference(current->real_parent));
1084 rcu_read_unlock();
1085
1086 return pid;
1087}
1088
1089SYSCALL_DEFINE0(getuid)
1090{
1091 /* Only we change this so SMP safe */
1092 return from_kuid_munged(current_user_ns(), current_uid());
1093}
1094
1095SYSCALL_DEFINE0(geteuid)
1096{
1097 /* Only we change this so SMP safe */
1098 return from_kuid_munged(current_user_ns(), current_euid());
1099}
1100
1101SYSCALL_DEFINE0(getgid)
1102{
1103 /* Only we change this so SMP safe */
1104 return from_kgid_munged(current_user_ns(), current_gid());
1105}
1106
1107SYSCALL_DEFINE0(getegid)
1108{
1109 /* Only we change this so SMP safe */
1110 return from_kgid_munged(current_user_ns(), current_egid());
1111}
1112
1047void do_sys_times(struct tms *tms) 1113void do_sys_times(struct tms *tms)
1048{ 1114{
1049 cputime_t tgutime, tgstime, cutime, cstime; 1115 cputime_t tgutime, tgstime, cutime, cstime;
@@ -1785,13 +1851,26 @@ SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1785 return getrusage(current, who, ru); 1851 return getrusage(current, who, ru);
1786} 1852}
1787 1853
1854#ifdef CONFIG_COMPAT
1855COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru)
1856{
1857 struct rusage r;
1858
1859 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1860 who != RUSAGE_THREAD)
1861 return -EINVAL;
1862
1863 k_getrusage(current, who, &r);
1864 return put_compat_rusage(&r, ru);
1865}
1866#endif
1867
1788SYSCALL_DEFINE1(umask, int, mask) 1868SYSCALL_DEFINE1(umask, int, mask)
1789{ 1869{
1790 mask = xchg(&current->fs->umask, mask & S_IRWXUGO); 1870 mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1791 return mask; 1871 return mask;
1792} 1872}
1793 1873
1794#ifdef CONFIG_CHECKPOINT_RESTORE
1795static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1874static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1796{ 1875{
1797 struct fd exe; 1876 struct fd exe;
@@ -1985,17 +2064,12 @@ out:
1985 return error; 2064 return error;
1986} 2065}
1987 2066
2067#ifdef CONFIG_CHECKPOINT_RESTORE
1988static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2068static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1989{ 2069{
1990 return put_user(me->clear_child_tid, tid_addr); 2070 return put_user(me->clear_child_tid, tid_addr);
1991} 2071}
1992 2072#else
1993#else /* CONFIG_CHECKPOINT_RESTORE */
1994static int prctl_set_mm(int opt, unsigned long addr,
1995 unsigned long arg4, unsigned long arg5)
1996{
1997 return -EINVAL;
1998}
1999static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2073static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
2000{ 2074{
2001 return -EINVAL; 2075 return -EINVAL;
@@ -2245,3 +2319,148 @@ int orderly_poweroff(bool force)
2245 return 0; 2319 return 0;
2246} 2320}
2247EXPORT_SYMBOL_GPL(orderly_poweroff); 2321EXPORT_SYMBOL_GPL(orderly_poweroff);
2322
2323/**
2324 * do_sysinfo - fill in sysinfo struct
2325 * @info: pointer to buffer to fill
2326 */
2327static int do_sysinfo(struct sysinfo *info)
2328{
2329 unsigned long mem_total, sav_total;
2330 unsigned int mem_unit, bitcount;
2331 struct timespec tp;
2332
2333 memset(info, 0, sizeof(struct sysinfo));
2334
2335 ktime_get_ts(&tp);
2336 monotonic_to_bootbased(&tp);
2337 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
2338
2339 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
2340
2341 info->procs = nr_threads;
2342
2343 si_meminfo(info);
2344 si_swapinfo(info);
2345
2346 /*
2347 * If the sum of all the available memory (i.e. ram + swap)
2348 * is less than can be stored in a 32 bit unsigned long then
2349 * we can be binary compatible with 2.2.x kernels. If not,
2350 * well, in that case 2.2.x was broken anyways...
2351 *
2352 * -Erik Andersen <andersee@debian.org>
2353 */
2354
2355 mem_total = info->totalram + info->totalswap;
2356 if (mem_total < info->totalram || mem_total < info->totalswap)
2357 goto out;
2358 bitcount = 0;
2359 mem_unit = info->mem_unit;
2360 while (mem_unit > 1) {
2361 bitcount++;
2362 mem_unit >>= 1;
2363 sav_total = mem_total;
2364 mem_total <<= 1;
2365 if (mem_total < sav_total)
2366 goto out;
2367 }
2368
2369 /*
2370 * If mem_total did not overflow, multiply all memory values by
2371 * info->mem_unit and set it to 1. This leaves things compatible
2372 * with 2.2.x, and also retains compatibility with earlier 2.4.x
2373 * kernels...
2374 */
2375
2376 info->mem_unit = 1;
2377 info->totalram <<= bitcount;
2378 info->freeram <<= bitcount;
2379 info->sharedram <<= bitcount;
2380 info->bufferram <<= bitcount;
2381 info->totalswap <<= bitcount;
2382 info->freeswap <<= bitcount;
2383 info->totalhigh <<= bitcount;
2384 info->freehigh <<= bitcount;
2385
2386out:
2387 return 0;
2388}
2389
2390SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
2391{
2392 struct sysinfo val;
2393
2394 do_sysinfo(&val);
2395
2396 if (copy_to_user(info, &val, sizeof(struct sysinfo)))
2397 return -EFAULT;
2398
2399 return 0;
2400}
2401
2402#ifdef CONFIG_COMPAT
2403struct compat_sysinfo {
2404 s32 uptime;
2405 u32 loads[3];
2406 u32 totalram;
2407 u32 freeram;
2408 u32 sharedram;
2409 u32 bufferram;
2410 u32 totalswap;
2411 u32 freeswap;
2412 u16 procs;
2413 u16 pad;
2414 u32 totalhigh;
2415 u32 freehigh;
2416 u32 mem_unit;
2417 char _f[20-2*sizeof(u32)-sizeof(int)];
2418};
2419
2420COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
2421{
2422 struct sysinfo s;
2423
2424 do_sysinfo(&s);
2425
2426 /* Check to see if any memory value is too large for 32-bit and scale
2427 * down if needed
2428 */
2429 if ((s.totalram >> 32) || (s.totalswap >> 32)) {
2430 int bitcount = 0;
2431
2432 while (s.mem_unit < PAGE_SIZE) {
2433 s.mem_unit <<= 1;
2434 bitcount++;
2435 }
2436
2437 s.totalram >>= bitcount;
2438 s.freeram >>= bitcount;
2439 s.sharedram >>= bitcount;
2440 s.bufferram >>= bitcount;
2441 s.totalswap >>= bitcount;
2442 s.freeswap >>= bitcount;
2443 s.totalhigh >>= bitcount;
2444 s.freehigh >>= bitcount;
2445 }
2446
2447 if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
2448 __put_user(s.uptime, &info->uptime) ||
2449 __put_user(s.loads[0], &info->loads[0]) ||
2450 __put_user(s.loads[1], &info->loads[1]) ||
2451 __put_user(s.loads[2], &info->loads[2]) ||
2452 __put_user(s.totalram, &info->totalram) ||
2453 __put_user(s.freeram, &info->freeram) ||
2454 __put_user(s.sharedram, &info->sharedram) ||
2455 __put_user(s.bufferram, &info->bufferram) ||
2456 __put_user(s.totalswap, &info->totalswap) ||
2457 __put_user(s.freeswap, &info->freeswap) ||
2458 __put_user(s.procs, &info->procs) ||
2459 __put_user(s.totalhigh, &info->totalhigh) ||
2460 __put_user(s.freehigh, &info->freehigh) ||
2461 __put_user(s.mem_unit, &info->mem_unit))
2462 return -EFAULT;
2463
2464 return 0;
2465}
2466#endif /* CONFIG_COMPAT */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 395084d4ce16..bfd6787b355a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -20,6 +20,7 @@ cond_syscall(sys_quotactl);
20cond_syscall(sys32_quotactl); 20cond_syscall(sys32_quotactl);
21cond_syscall(sys_acct); 21cond_syscall(sys_acct);
22cond_syscall(sys_lookup_dcookie); 22cond_syscall(sys_lookup_dcookie);
23cond_syscall(compat_sys_lookup_dcookie);
23cond_syscall(sys_swapon); 24cond_syscall(sys_swapon);
24cond_syscall(sys_swapoff); 25cond_syscall(sys_swapoff);
25cond_syscall(sys_kexec_load); 26cond_syscall(sys_kexec_load);
@@ -155,7 +156,7 @@ cond_syscall(compat_sys_process_vm_writev);
155cond_syscall(sys_pciconfig_read); 156cond_syscall(sys_pciconfig_read);
156cond_syscall(sys_pciconfig_write); 157cond_syscall(sys_pciconfig_write);
157cond_syscall(sys_pciconfig_iobase); 158cond_syscall(sys_pciconfig_iobase);
158cond_syscall(sys32_ipc); 159cond_syscall(compat_sys_s390_ipc);
159cond_syscall(ppc_rtas); 160cond_syscall(ppc_rtas);
160cond_syscall(sys_spu_run); 161cond_syscall(sys_spu_run);
161cond_syscall(sys_spu_create); 162cond_syscall(sys_spu_create);
diff --git a/kernel/time.c b/kernel/time.c
index f8342a41efa6..d3617dbd3dca 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -138,13 +138,14 @@ int persistent_clock_is_local;
138 */ 138 */
139static inline void warp_clock(void) 139static inline void warp_clock(void)
140{ 140{
141 struct timespec adjust; 141 if (sys_tz.tz_minuteswest != 0) {
142 struct timespec adjust;
142 143
143 adjust = current_kernel_time();
144 if (sys_tz.tz_minuteswest != 0)
145 persistent_clock_is_local = 1; 144 persistent_clock_is_local = 1;
146 adjust.tv_sec += sys_tz.tz_minuteswest * 60; 145 adjust.tv_sec = sys_tz.tz_minuteswest * 60;
147 do_settimeofday(&adjust); 146 adjust.tv_nsec = 0;
147 timekeeping_inject_offset(&adjust);
148 }
148} 149}
149 150
150/* 151/*
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 072bb066bb7d..12ff13a838c6 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -18,13 +18,14 @@
18#include <linux/rtc.h> 18#include <linux/rtc.h>
19 19
20#include "tick-internal.h" 20#include "tick-internal.h"
21#include "ntp_internal.h"
21 22
22/* 23/*
23 * NTP timekeeping variables: 24 * NTP timekeeping variables:
25 *
26 * Note: All of the NTP state is protected by the timekeeping locks.
24 */ 27 */
25 28
26DEFINE_RAW_SPINLOCK(ntp_lock);
27
28 29
29/* USER_HZ period (usecs): */ 30/* USER_HZ period (usecs): */
30unsigned long tick_usec = TICK_USEC; 31unsigned long tick_usec = TICK_USEC;
@@ -53,9 +54,6 @@ static int time_state = TIME_OK;
53/* clock status bits: */ 54/* clock status bits: */
54static int time_status = STA_UNSYNC; 55static int time_status = STA_UNSYNC;
55 56
56/* TAI offset (secs): */
57static long time_tai;
58
59/* time adjustment (nsecs): */ 57/* time adjustment (nsecs): */
60static s64 time_offset; 58static s64 time_offset;
61 59
@@ -134,8 +132,6 @@ static inline void pps_reset_freq_interval(void)
134 132
135/** 133/**
136 * pps_clear - Clears the PPS state variables 134 * pps_clear - Clears the PPS state variables
137 *
138 * Must be called while holding a write on the ntp_lock
139 */ 135 */
140static inline void pps_clear(void) 136static inline void pps_clear(void)
141{ 137{
@@ -150,8 +146,6 @@ static inline void pps_clear(void)
150/* Decrease pps_valid to indicate that another second has passed since 146/* Decrease pps_valid to indicate that another second has passed since
151 * the last PPS signal. When it reaches 0, indicate that PPS signal is 147 * the last PPS signal. When it reaches 0, indicate that PPS signal is
152 * missing. 148 * missing.
153 *
154 * Must be called while holding a write on the ntp_lock
155 */ 149 */
156static inline void pps_dec_valid(void) 150static inline void pps_dec_valid(void)
157{ 151{
@@ -346,10 +340,6 @@ static void ntp_update_offset(long offset)
346 */ 340 */
347void ntp_clear(void) 341void ntp_clear(void)
348{ 342{
349 unsigned long flags;
350
351 raw_spin_lock_irqsave(&ntp_lock, flags);
352
353 time_adjust = 0; /* stop active adjtime() */ 343 time_adjust = 0; /* stop active adjtime() */
354 time_status |= STA_UNSYNC; 344 time_status |= STA_UNSYNC;
355 time_maxerror = NTP_PHASE_LIMIT; 345 time_maxerror = NTP_PHASE_LIMIT;
@@ -362,20 +352,12 @@ void ntp_clear(void)
362 352
363 /* Clear PPS state variables */ 353 /* Clear PPS state variables */
364 pps_clear(); 354 pps_clear();
365 raw_spin_unlock_irqrestore(&ntp_lock, flags);
366
367} 355}
368 356
369 357
370u64 ntp_tick_length(void) 358u64 ntp_tick_length(void)
371{ 359{
372 unsigned long flags; 360 return tick_length;
373 s64 ret;
374
375 raw_spin_lock_irqsave(&ntp_lock, flags);
376 ret = tick_length;
377 raw_spin_unlock_irqrestore(&ntp_lock, flags);
378 return ret;
379} 361}
380 362
381 363
@@ -393,9 +375,6 @@ int second_overflow(unsigned long secs)
393{ 375{
394 s64 delta; 376 s64 delta;
395 int leap = 0; 377 int leap = 0;
396 unsigned long flags;
397
398 raw_spin_lock_irqsave(&ntp_lock, flags);
399 378
400 /* 379 /*
401 * Leap second processing. If in leap-insert state at the end of the 380 * Leap second processing. If in leap-insert state at the end of the
@@ -415,7 +394,6 @@ int second_overflow(unsigned long secs)
415 else if (secs % 86400 == 0) { 394 else if (secs % 86400 == 0) {
416 leap = -1; 395 leap = -1;
417 time_state = TIME_OOP; 396 time_state = TIME_OOP;
418 time_tai++;
419 printk(KERN_NOTICE 397 printk(KERN_NOTICE
420 "Clock: inserting leap second 23:59:60 UTC\n"); 398 "Clock: inserting leap second 23:59:60 UTC\n");
421 } 399 }
@@ -425,7 +403,6 @@ int second_overflow(unsigned long secs)
425 time_state = TIME_OK; 403 time_state = TIME_OK;
426 else if ((secs + 1) % 86400 == 0) { 404 else if ((secs + 1) % 86400 == 0) {
427 leap = 1; 405 leap = 1;
428 time_tai--;
429 time_state = TIME_WAIT; 406 time_state = TIME_WAIT;
430 printk(KERN_NOTICE 407 printk(KERN_NOTICE
431 "Clock: deleting leap second 23:59:59 UTC\n"); 408 "Clock: deleting leap second 23:59:59 UTC\n");
@@ -479,8 +456,6 @@ int second_overflow(unsigned long secs)
479 time_adjust = 0; 456 time_adjust = 0;
480 457
481out: 458out:
482 raw_spin_unlock_irqrestore(&ntp_lock, flags);
483
484 return leap; 459 return leap;
485} 460}
486 461
@@ -575,11 +550,10 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
575 time_status |= txc->status & ~STA_RONLY; 550 time_status |= txc->status & ~STA_RONLY;
576} 551}
577 552
578/* 553
579 * Called with ntp_lock held, so we can access and modify 554static inline void process_adjtimex_modes(struct timex *txc,
580 * all the global NTP state: 555 struct timespec *ts,
581 */ 556 s32 *time_tai)
582static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
583{ 557{
584 if (txc->modes & ADJ_STATUS) 558 if (txc->modes & ADJ_STATUS)
585 process_adj_status(txc, ts); 559 process_adj_status(txc, ts);
@@ -613,7 +587,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
613 } 587 }
614 588
615 if (txc->modes & ADJ_TAI && txc->constant > 0) 589 if (txc->modes & ADJ_TAI && txc->constant > 0)
616 time_tai = txc->constant; 590 *time_tai = txc->constant;
617 591
618 if (txc->modes & ADJ_OFFSET) 592 if (txc->modes & ADJ_OFFSET)
619 ntp_update_offset(txc->offset); 593 ntp_update_offset(txc->offset);
@@ -625,16 +599,13 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
625 ntp_update_frequency(); 599 ntp_update_frequency();
626} 600}
627 601
628/* 602
629 * adjtimex mainly allows reading (and writing, if superuser) of 603
630 * kernel time-keeping variables. used by xntpd. 604/**
605 * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
631 */ 606 */
632int do_adjtimex(struct timex *txc) 607int ntp_validate_timex(struct timex *txc)
633{ 608{
634 struct timespec ts;
635 int result;
636
637 /* Validate the data before disabling interrupts */
638 if (txc->modes & ADJ_ADJTIME) { 609 if (txc->modes & ADJ_ADJTIME) {
639 /* singleshot must not be used with any other mode bits */ 610 /* singleshot must not be used with any other mode bits */
640 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) 611 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
@@ -646,7 +617,6 @@ int do_adjtimex(struct timex *txc)
646 /* In order to modify anything, you gotta be super-user! */ 617 /* In order to modify anything, you gotta be super-user! */
647 if (txc->modes && !capable(CAP_SYS_TIME)) 618 if (txc->modes && !capable(CAP_SYS_TIME))
648 return -EPERM; 619 return -EPERM;
649
650 /* 620 /*
651 * if the quartz is off by more than 10% then 621 * if the quartz is off by more than 10% then
652 * something is VERY wrong! 622 * something is VERY wrong!
@@ -657,22 +627,20 @@ int do_adjtimex(struct timex *txc)
657 return -EINVAL; 627 return -EINVAL;
658 } 628 }
659 629
660 if (txc->modes & ADJ_SETOFFSET) { 630 if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
661 struct timespec delta; 631 return -EPERM;
662 delta.tv_sec = txc->time.tv_sec;
663 delta.tv_nsec = txc->time.tv_usec;
664 if (!capable(CAP_SYS_TIME))
665 return -EPERM;
666 if (!(txc->modes & ADJ_NANO))
667 delta.tv_nsec *= 1000;
668 result = timekeeping_inject_offset(&delta);
669 if (result)
670 return result;
671 }
672 632
673 getnstimeofday(&ts); 633 return 0;
634}
674 635
675 raw_spin_lock_irq(&ntp_lock); 636
637/*
638 * adjtimex mainly allows reading (and writing, if superuser) of
639 * kernel time-keeping variables. used by xntpd.
640 */
641int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
642{
643 int result;
676 644
677 if (txc->modes & ADJ_ADJTIME) { 645 if (txc->modes & ADJ_ADJTIME) {
678 long save_adjust = time_adjust; 646 long save_adjust = time_adjust;
@@ -687,7 +655,7 @@ int do_adjtimex(struct timex *txc)
687 655
688 /* If there are input parameters, then process them: */ 656 /* If there are input parameters, then process them: */
689 if (txc->modes) 657 if (txc->modes)
690 process_adjtimex_modes(txc, &ts); 658 process_adjtimex_modes(txc, ts, time_tai);
691 659
692 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, 660 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
693 NTP_SCALE_SHIFT); 661 NTP_SCALE_SHIFT);
@@ -709,15 +677,13 @@ int do_adjtimex(struct timex *txc)
709 txc->precision = 1; 677 txc->precision = 1;
710 txc->tolerance = MAXFREQ_SCALED / PPM_SCALE; 678 txc->tolerance = MAXFREQ_SCALED / PPM_SCALE;
711 txc->tick = tick_usec; 679 txc->tick = tick_usec;
712 txc->tai = time_tai; 680 txc->tai = *time_tai;
713 681
714 /* fill PPS status fields */ 682 /* fill PPS status fields */
715 pps_fill_timex(txc); 683 pps_fill_timex(txc);
716 684
717 raw_spin_unlock_irq(&ntp_lock); 685 txc->time.tv_sec = ts->tv_sec;
718 686 txc->time.tv_usec = ts->tv_nsec;
719 txc->time.tv_sec = ts.tv_sec;
720 txc->time.tv_usec = ts.tv_nsec;
721 if (!(time_status & STA_NANO)) 687 if (!(time_status & STA_NANO))
722 txc->time.tv_usec /= NSEC_PER_USEC; 688 txc->time.tv_usec /= NSEC_PER_USEC;
723 689
@@ -894,7 +860,7 @@ static void hardpps_update_phase(long error)
894} 860}
895 861
896/* 862/*
897 * hardpps() - discipline CPU clock oscillator to external PPS signal 863 * __hardpps() - discipline CPU clock oscillator to external PPS signal
898 * 864 *
899 * This routine is called at each PPS signal arrival in order to 865 * This routine is called at each PPS signal arrival in order to
900 * discipline the CPU clock oscillator to the PPS signal. It takes two 866 * discipline the CPU clock oscillator to the PPS signal. It takes two
@@ -905,15 +871,13 @@ static void hardpps_update_phase(long error)
905 * This code is based on David Mills's reference nanokernel 871 * This code is based on David Mills's reference nanokernel
906 * implementation. It was mostly rewritten but keeps the same idea. 872 * implementation. It was mostly rewritten but keeps the same idea.
907 */ 873 */
908void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) 874void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
909{ 875{
910 struct pps_normtime pts_norm, freq_norm; 876 struct pps_normtime pts_norm, freq_norm;
911 unsigned long flags; 877 unsigned long flags;
912 878
913 pts_norm = pps_normalize_ts(*phase_ts); 879 pts_norm = pps_normalize_ts(*phase_ts);
914 880
915 raw_spin_lock_irqsave(&ntp_lock, flags);
916
917 /* clear the error bits, they will be set again if needed */ 881 /* clear the error bits, they will be set again if needed */
918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 882 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
919 883
@@ -925,7 +889,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
925 * just start the frequency interval */ 889 * just start the frequency interval */
926 if (unlikely(pps_fbase.tv_sec == 0)) { 890 if (unlikely(pps_fbase.tv_sec == 0)) {
927 pps_fbase = *raw_ts; 891 pps_fbase = *raw_ts;
928 raw_spin_unlock_irqrestore(&ntp_lock, flags);
929 return; 892 return;
930 } 893 }
931 894
@@ -940,7 +903,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
940 time_status |= STA_PPSJITTER; 903 time_status |= STA_PPSJITTER;
941 /* restart the frequency calibration interval */ 904 /* restart the frequency calibration interval */
942 pps_fbase = *raw_ts; 905 pps_fbase = *raw_ts;
943 raw_spin_unlock_irqrestore(&ntp_lock, flags);
944 pr_err("hardpps: PPSJITTER: bad pulse\n"); 906 pr_err("hardpps: PPSJITTER: bad pulse\n");
945 return; 907 return;
946 } 908 }
@@ -957,10 +919,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
957 919
958 hardpps_update_phase(pts_norm.nsec); 920 hardpps_update_phase(pts_norm.nsec);
959 921
960 raw_spin_unlock_irqrestore(&ntp_lock, flags);
961} 922}
962EXPORT_SYMBOL(hardpps);
963
964#endif /* CONFIG_NTP_PPS */ 923#endif /* CONFIG_NTP_PPS */
965 924
966static int __init ntp_tick_adj_setup(char *str) 925static int __init ntp_tick_adj_setup(char *str)
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
new file mode 100644
index 000000000000..1950cb4ca2a4
--- /dev/null
+++ b/kernel/time/ntp_internal.h
@@ -0,0 +1,12 @@
1#ifndef _LINUX_NTP_INTERNAL_H
2#define _LINUX_NTP_INTERNAL_H
3
4extern void ntp_init(void);
5extern void ntp_clear(void);
6/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
7extern u64 ntp_tick_length(void);
8extern int second_overflow(unsigned long secs);
9extern int ntp_validate_timex(struct timex *);
10extern int __do_adjtimex(struct timex *, struct timespec *, s32 *);
11extern void __hardpps(const struct timespec *, const struct timespec *);
12#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 40c10502c9e9..206bbfb34e09 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -28,9 +28,8 @@
28 */ 28 */
29 29
30static struct tick_device tick_broadcast_device; 30static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */ 31static cpumask_var_t tick_broadcast_mask;
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); 32static cpumask_var_t tmpmask;
33static DECLARE_BITMAP(tmpmask, NR_CPUS);
34static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 33static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35static int tick_broadcast_force; 34static int tick_broadcast_force;
36 35
@@ -50,7 +49,7 @@ struct tick_device *tick_get_broadcast_device(void)
50 49
51struct cpumask *tick_get_broadcast_mask(void) 50struct cpumask *tick_get_broadcast_mask(void)
52{ 51{
53 return to_cpumask(tick_broadcast_mask); 52 return tick_broadcast_mask;
54} 53}
55 54
56/* 55/*
@@ -67,6 +66,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
67 */ 66 */
68int tick_check_broadcast_device(struct clock_event_device *dev) 67int tick_check_broadcast_device(struct clock_event_device *dev)
69{ 68{
69 struct clock_event_device *cur = tick_broadcast_device.evtdev;
70
70 if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || 71 if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
71 (tick_broadcast_device.evtdev && 72 (tick_broadcast_device.evtdev &&
72 tick_broadcast_device.evtdev->rating >= dev->rating) || 73 tick_broadcast_device.evtdev->rating >= dev->rating) ||
@@ -74,9 +75,21 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
74 return 0; 75 return 0;
75 76
76 clockevents_exchange_device(tick_broadcast_device.evtdev, dev); 77 clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
78 if (cur)
79 cur->event_handler = clockevents_handle_noop;
77 tick_broadcast_device.evtdev = dev; 80 tick_broadcast_device.evtdev = dev;
78 if (!cpumask_empty(tick_get_broadcast_mask())) 81 if (!cpumask_empty(tick_broadcast_mask))
79 tick_broadcast_start_periodic(dev); 82 tick_broadcast_start_periodic(dev);
83 /*
84 * Inform all cpus about this. We might be in a situation
85 * where we did not switch to oneshot mode because the per cpu
86 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
87 * of a oneshot capable broadcast device. Without that
88 * notification the systems stays stuck in periodic mode
89 * forever.
90 */
91 if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
92 tick_clock_notify();
80 return 1; 93 return 1;
81} 94}
82 95
@@ -124,7 +137,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
124 if (!tick_device_is_functional(dev)) { 137 if (!tick_device_is_functional(dev)) {
125 dev->event_handler = tick_handle_periodic; 138 dev->event_handler = tick_handle_periodic;
126 tick_device_setup_broadcast_func(dev); 139 tick_device_setup_broadcast_func(dev);
127 cpumask_set_cpu(cpu, tick_get_broadcast_mask()); 140 cpumask_set_cpu(cpu, tick_broadcast_mask);
128 tick_broadcast_start_periodic(tick_broadcast_device.evtdev); 141 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
129 ret = 1; 142 ret = 1;
130 } else { 143 } else {
@@ -135,7 +148,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
135 */ 148 */
136 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { 149 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
137 int cpu = smp_processor_id(); 150 int cpu = smp_processor_id();
138 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 151 cpumask_clear_cpu(cpu, tick_broadcast_mask);
139 tick_broadcast_clear_oneshot(cpu); 152 tick_broadcast_clear_oneshot(cpu);
140 } else { 153 } else {
141 tick_device_setup_broadcast_func(dev); 154 tick_device_setup_broadcast_func(dev);
@@ -199,9 +212,8 @@ static void tick_do_periodic_broadcast(void)
199{ 212{
200 raw_spin_lock(&tick_broadcast_lock); 213 raw_spin_lock(&tick_broadcast_lock);
201 214
202 cpumask_and(to_cpumask(tmpmask), 215 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
203 cpu_online_mask, tick_get_broadcast_mask()); 216 tick_do_broadcast(tmpmask);
204 tick_do_broadcast(to_cpumask(tmpmask));
205 217
206 raw_spin_unlock(&tick_broadcast_lock); 218 raw_spin_unlock(&tick_broadcast_lock);
207} 219}
@@ -264,13 +276,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
264 if (!tick_device_is_functional(dev)) 276 if (!tick_device_is_functional(dev))
265 goto out; 277 goto out;
266 278
267 bc_stopped = cpumask_empty(tick_get_broadcast_mask()); 279 bc_stopped = cpumask_empty(tick_broadcast_mask);
268 280
269 switch (*reason) { 281 switch (*reason) {
270 case CLOCK_EVT_NOTIFY_BROADCAST_ON: 282 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
271 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: 283 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
272 if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { 284 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
273 cpumask_set_cpu(cpu, tick_get_broadcast_mask());
274 if (tick_broadcast_device.mode == 285 if (tick_broadcast_device.mode ==
275 TICKDEV_MODE_PERIODIC) 286 TICKDEV_MODE_PERIODIC)
276 clockevents_shutdown(dev); 287 clockevents_shutdown(dev);
@@ -280,8 +291,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
280 break; 291 break;
281 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 292 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
282 if (!tick_broadcast_force && 293 if (!tick_broadcast_force &&
283 cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { 294 cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
284 cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
285 if (tick_broadcast_device.mode == 295 if (tick_broadcast_device.mode ==
286 TICKDEV_MODE_PERIODIC) 296 TICKDEV_MODE_PERIODIC)
287 tick_setup_periodic(dev, 0); 297 tick_setup_periodic(dev, 0);
@@ -289,7 +299,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
289 break; 299 break;
290 } 300 }
291 301
292 if (cpumask_empty(tick_get_broadcast_mask())) { 302 if (cpumask_empty(tick_broadcast_mask)) {
293 if (!bc_stopped) 303 if (!bc_stopped)
294 clockevents_shutdown(bc); 304 clockevents_shutdown(bc);
295 } else if (bc_stopped) { 305 } else if (bc_stopped) {
@@ -338,10 +348,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
338 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 348 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
339 349
340 bc = tick_broadcast_device.evtdev; 350 bc = tick_broadcast_device.evtdev;
341 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 351 cpumask_clear_cpu(cpu, tick_broadcast_mask);
342 352
343 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 353 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
344 if (bc && cpumask_empty(tick_get_broadcast_mask())) 354 if (bc && cpumask_empty(tick_broadcast_mask))
345 clockevents_shutdown(bc); 355 clockevents_shutdown(bc);
346 } 356 }
347 357
@@ -377,13 +387,13 @@ int tick_resume_broadcast(void)
377 387
378 switch (tick_broadcast_device.mode) { 388 switch (tick_broadcast_device.mode) {
379 case TICKDEV_MODE_PERIODIC: 389 case TICKDEV_MODE_PERIODIC:
380 if (!cpumask_empty(tick_get_broadcast_mask())) 390 if (!cpumask_empty(tick_broadcast_mask))
381 tick_broadcast_start_periodic(bc); 391 tick_broadcast_start_periodic(bc);
382 broadcast = cpumask_test_cpu(smp_processor_id(), 392 broadcast = cpumask_test_cpu(smp_processor_id(),
383 tick_get_broadcast_mask()); 393 tick_broadcast_mask);
384 break; 394 break;
385 case TICKDEV_MODE_ONESHOT: 395 case TICKDEV_MODE_ONESHOT:
386 if (!cpumask_empty(tick_get_broadcast_mask())) 396 if (!cpumask_empty(tick_broadcast_mask))
387 broadcast = tick_resume_broadcast_oneshot(bc); 397 broadcast = tick_resume_broadcast_oneshot(bc);
388 break; 398 break;
389 } 399 }
@@ -396,25 +406,58 @@ int tick_resume_broadcast(void)
396 406
397#ifdef CONFIG_TICK_ONESHOT 407#ifdef CONFIG_TICK_ONESHOT
398 408
399/* FIXME: use cpumask_var_t. */ 409static cpumask_var_t tick_broadcast_oneshot_mask;
400static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS); 410static cpumask_var_t tick_broadcast_pending_mask;
411static cpumask_var_t tick_broadcast_force_mask;
401 412
402/* 413/*
403 * Exposed for debugging: see timer_list.c 414 * Exposed for debugging: see timer_list.c
404 */ 415 */
405struct cpumask *tick_get_broadcast_oneshot_mask(void) 416struct cpumask *tick_get_broadcast_oneshot_mask(void)
406{ 417{
407 return to_cpumask(tick_broadcast_oneshot_mask); 418 return tick_broadcast_oneshot_mask;
408} 419}
409 420
410static int tick_broadcast_set_event(ktime_t expires, int force) 421/*
422 * Called before going idle with interrupts disabled. Checks whether a
423 * broadcast event from the other core is about to happen. We detected
424 * that in tick_broadcast_oneshot_control(). The callsite can use this
425 * to avoid a deep idle transition as we are about to get the
426 * broadcast IPI right away.
427 */
428int tick_check_broadcast_expired(void)
411{ 429{
412 struct clock_event_device *bc = tick_broadcast_device.evtdev; 430 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
431}
432
433/*
434 * Set broadcast interrupt affinity
435 */
436static void tick_broadcast_set_affinity(struct clock_event_device *bc,
437 const struct cpumask *cpumask)
438{
439 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
440 return;
441
442 if (cpumask_equal(bc->cpumask, cpumask))
443 return;
444
445 bc->cpumask = cpumask;
446 irq_set_affinity(bc->irq, bc->cpumask);
447}
448
449static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
450 ktime_t expires, int force)
451{
452 int ret;
413 453
414 if (bc->mode != CLOCK_EVT_MODE_ONESHOT) 454 if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
415 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 455 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
416 456
417 return clockevents_program_event(bc, expires, force); 457 ret = clockevents_program_event(bc, expires, force);
458 if (!ret)
459 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
460 return ret;
418} 461}
419 462
420int tick_resume_broadcast_oneshot(struct clock_event_device *bc) 463int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
@@ -429,7 +472,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
429 */ 472 */
430void tick_check_oneshot_broadcast(int cpu) 473void tick_check_oneshot_broadcast(int cpu)
431{ 474{
432 if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) { 475 if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
433 struct tick_device *td = &per_cpu(tick_cpu_device, cpu); 476 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
434 477
435 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); 478 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -443,27 +486,39 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
443{ 486{
444 struct tick_device *td; 487 struct tick_device *td;
445 ktime_t now, next_event; 488 ktime_t now, next_event;
446 int cpu; 489 int cpu, next_cpu = 0;
447 490
448 raw_spin_lock(&tick_broadcast_lock); 491 raw_spin_lock(&tick_broadcast_lock);
449again: 492again:
450 dev->next_event.tv64 = KTIME_MAX; 493 dev->next_event.tv64 = KTIME_MAX;
451 next_event.tv64 = KTIME_MAX; 494 next_event.tv64 = KTIME_MAX;
452 cpumask_clear(to_cpumask(tmpmask)); 495 cpumask_clear(tmpmask);
453 now = ktime_get(); 496 now = ktime_get();
454 /* Find all expired events */ 497 /* Find all expired events */
455 for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) { 498 for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
456 td = &per_cpu(tick_cpu_device, cpu); 499 td = &per_cpu(tick_cpu_device, cpu);
457 if (td->evtdev->next_event.tv64 <= now.tv64) 500 if (td->evtdev->next_event.tv64 <= now.tv64) {
458 cpumask_set_cpu(cpu, to_cpumask(tmpmask)); 501 cpumask_set_cpu(cpu, tmpmask);
459 else if (td->evtdev->next_event.tv64 < next_event.tv64) 502 /*
503 * Mark the remote cpu in the pending mask, so
504 * it can avoid reprogramming the cpu local
505 * timer in tick_broadcast_oneshot_control().
506 */
507 cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
508 } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
460 next_event.tv64 = td->evtdev->next_event.tv64; 509 next_event.tv64 = td->evtdev->next_event.tv64;
510 next_cpu = cpu;
511 }
461 } 512 }
462 513
514 /* Take care of enforced broadcast requests */
515 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
516 cpumask_clear(tick_broadcast_force_mask);
517
463 /* 518 /*
464 * Wakeup the cpus which have an expired event. 519 * Wakeup the cpus which have an expired event.
465 */ 520 */
466 tick_do_broadcast(to_cpumask(tmpmask)); 521 tick_do_broadcast(tmpmask);
467 522
468 /* 523 /*
469 * Two reasons for reprogram: 524 * Two reasons for reprogram:
@@ -480,7 +535,7 @@ again:
480 * Rearm the broadcast device. If event expired, 535 * Rearm the broadcast device. If event expired,
481 * repeat the above 536 * repeat the above
482 */ 537 */
483 if (tick_broadcast_set_event(next_event, 0)) 538 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
484 goto again; 539 goto again;
485 } 540 }
486 raw_spin_unlock(&tick_broadcast_lock); 541 raw_spin_unlock(&tick_broadcast_lock);
@@ -495,6 +550,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
495 struct clock_event_device *bc, *dev; 550 struct clock_event_device *bc, *dev;
496 struct tick_device *td; 551 struct tick_device *td;
497 unsigned long flags; 552 unsigned long flags;
553 ktime_t now;
498 int cpu; 554 int cpu;
499 555
500 /* 556 /*
@@ -519,21 +575,84 @@ void tick_broadcast_oneshot_control(unsigned long reason)
519 575
520 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 576 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
521 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { 577 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
522 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { 578 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
523 cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); 579 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
524 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 580 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
525 if (dev->next_event.tv64 < bc->next_event.tv64) 581 /*
526 tick_broadcast_set_event(dev->next_event, 1); 582 * We only reprogram the broadcast timer if we
583 * did not mark ourself in the force mask and
584 * if the cpu local event is earlier than the
585 * broadcast event. If the current CPU is in
586 * the force mask, then we are going to be
587 * woken by the IPI right away.
588 */
589 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
590 dev->next_event.tv64 < bc->next_event.tv64)
591 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
527 } 592 }
528 } else { 593 } else {
529 if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { 594 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
530 cpumask_clear_cpu(cpu,
531 tick_get_broadcast_oneshot_mask());
532 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 595 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
533 if (dev->next_event.tv64 != KTIME_MAX) 596 if (dev->next_event.tv64 == KTIME_MAX)
534 tick_program_event(dev->next_event, 1); 597 goto out;
598 /*
599 * The cpu which was handling the broadcast
600 * timer marked this cpu in the broadcast
601 * pending mask and fired the broadcast
602 * IPI. So we are going to handle the expired
603 * event anyway via the broadcast IPI
604 * handler. No need to reprogram the timer
605 * with an already expired event.
606 */
607 if (cpumask_test_and_clear_cpu(cpu,
608 tick_broadcast_pending_mask))
609 goto out;
610
611 /*
612 * If the pending bit is not set, then we are
613 * either the CPU handling the broadcast
614 * interrupt or we got woken by something else.
615 *
616 * We are not longer in the broadcast mask, so
617 * if the cpu local expiry time is already
618 * reached, we would reprogram the cpu local
619 * timer with an already expired event.
620 *
621 * This can lead to a ping-pong when we return
622 * to idle and therefor rearm the broadcast
623 * timer before the cpu local timer was able
624 * to fire. This happens because the forced
625 * reprogramming makes sure that the event
626 * will happen in the future and depending on
627 * the min_delta setting this might be far
628 * enough out that the ping-pong starts.
629 *
630 * If the cpu local next_event has expired
631 * then we know that the broadcast timer
632 * next_event has expired as well and
633 * broadcast is about to be handled. So we
634 * avoid reprogramming and enforce that the
635 * broadcast handler, which did not run yet,
636 * will invoke the cpu local handler.
637 *
638 * We cannot call the handler directly from
639 * here, because we might be in a NOHZ phase
640 * and we did not go through the irq_enter()
641 * nohz fixups.
642 */
643 now = ktime_get();
644 if (dev->next_event.tv64 <= now.tv64) {
645 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
646 goto out;
647 }
648 /*
649 * We got woken by something else. Reprogram
650 * the cpu local timer device.
651 */
652 tick_program_event(dev->next_event, 1);
535 } 653 }
536 } 654 }
655out:
537 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 656 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
538} 657}
539 658
@@ -544,7 +663,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
544 */ 663 */
545static void tick_broadcast_clear_oneshot(int cpu) 664static void tick_broadcast_clear_oneshot(int cpu)
546{ 665{
547 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 666 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
548} 667}
549 668
550static void tick_broadcast_init_next_event(struct cpumask *mask, 669static void tick_broadcast_init_next_event(struct cpumask *mask,
@@ -583,17 +702,16 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
583 * oneshot_mask bits for those and program the 702 * oneshot_mask bits for those and program the
584 * broadcast device to fire. 703 * broadcast device to fire.
585 */ 704 */
586 cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask()); 705 cpumask_copy(tmpmask, tick_broadcast_mask);
587 cpumask_clear_cpu(cpu, to_cpumask(tmpmask)); 706 cpumask_clear_cpu(cpu, tmpmask);
588 cpumask_or(tick_get_broadcast_oneshot_mask(), 707 cpumask_or(tick_broadcast_oneshot_mask,
589 tick_get_broadcast_oneshot_mask(), 708 tick_broadcast_oneshot_mask, tmpmask);
590 to_cpumask(tmpmask));
591 709
592 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { 710 if (was_periodic && !cpumask_empty(tmpmask)) {
593 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 711 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
594 tick_broadcast_init_next_event(to_cpumask(tmpmask), 712 tick_broadcast_init_next_event(tmpmask,
595 tick_next_period); 713 tick_next_period);
596 tick_broadcast_set_event(tick_next_period, 1); 714 tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
597 } else 715 } else
598 bc->next_event.tv64 = KTIME_MAX; 716 bc->next_event.tv64 = KTIME_MAX;
599 } else { 717 } else {
@@ -641,7 +759,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
641 * Clear the broadcast mask flag for the dead cpu, but do not 759 * Clear the broadcast mask flag for the dead cpu, but do not
642 * stop the broadcast device! 760 * stop the broadcast device!
643 */ 761 */
644 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 762 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
645 763
646 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 764 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
647} 765}
@@ -665,3 +783,14 @@ bool tick_broadcast_oneshot_available(void)
665} 783}
666 784
667#endif 785#endif
786
787void __init tick_broadcast_init(void)
788{
789 alloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
790 alloc_cpumask_var(&tmpmask, GFP_NOWAIT);
791#ifdef CONFIG_TICK_ONESHOT
792 alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
793 alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
794 alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
795#endif
796}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 83f2bd967161..5d3fb100bc06 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -326,6 +326,7 @@ static void tick_shutdown(unsigned int *cpup)
326 */ 326 */
327 dev->mode = CLOCK_EVT_MODE_UNUSED; 327 dev->mode = CLOCK_EVT_MODE_UNUSED;
328 clockevents_exchange_device(dev, NULL); 328 clockevents_exchange_device(dev, NULL);
329 dev->event_handler = clockevents_handle_noop;
329 td->evtdev = NULL; 330 td->evtdev = NULL;
330 } 331 }
331 raw_spin_unlock_irqrestore(&tick_device_lock, flags); 332 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
@@ -419,4 +420,5 @@ static struct notifier_block tick_notifier = {
419void __init tick_init(void) 420void __init tick_init(void)
420{ 421{
421 clockevents_register_notifier(&tick_notifier); 422 clockevents_register_notifier(&tick_notifier);
423 tick_broadcast_init();
422} 424}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index cf3e59ed6dc0..f0299eae4602 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -4,6 +4,8 @@
4#include <linux/hrtimer.h> 4#include <linux/hrtimer.h>
5#include <linux/tick.h> 5#include <linux/tick.h>
6 6
7extern seqlock_t jiffies_lock;
8
7#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD 9#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
8 10
9#define TICK_DO_TIMER_NONE -1 11#define TICK_DO_TIMER_NONE -1
@@ -94,7 +96,7 @@ extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
94extern void tick_shutdown_broadcast(unsigned int *cpup); 96extern void tick_shutdown_broadcast(unsigned int *cpup);
95extern void tick_suspend_broadcast(void); 97extern void tick_suspend_broadcast(void);
96extern int tick_resume_broadcast(void); 98extern int tick_resume_broadcast(void);
97 99extern void tick_broadcast_init(void);
98extern void 100extern void
99tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); 101tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
100 102
@@ -119,6 +121,7 @@ static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
119static inline void tick_shutdown_broadcast(unsigned int *cpup) { } 121static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
120static inline void tick_suspend_broadcast(void) { } 122static inline void tick_suspend_broadcast(void) { }
121static inline int tick_resume_broadcast(void) { return 0; } 123static inline int tick_resume_broadcast(void) { return 0; }
124static inline void tick_broadcast_init(void) { }
122 125
123/* 126/*
124 * Set the periodic handler in non broadcast mode 127 * Set the periodic handler in non broadcast mode
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 07929c633570..bc67d4245e1d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -730,8 +730,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
730 730
731 if (ratelimit < 10 && 731 if (ratelimit < 10 &&
732 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { 732 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
733 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 733 pr_warn("NOHZ: local_softirq_pending %02x\n",
734 (unsigned int) local_softirq_pending()); 734 (unsigned int) local_softirq_pending());
735 ratelimit++; 735 ratelimit++;
736 } 736 }
737 return false; 737 return false;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9a0bc98fbe1d..98cd470bbe49 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -23,8 +23,13 @@
23#include <linux/stop_machine.h> 23#include <linux/stop_machine.h>
24#include <linux/pvclock_gtod.h> 24#include <linux/pvclock_gtod.h>
25 25
26#include "tick-internal.h"
27#include "ntp_internal.h"
26 28
27static struct timekeeper timekeeper; 29static struct timekeeper timekeeper;
30static DEFINE_RAW_SPINLOCK(timekeeper_lock);
31static seqcount_t timekeeper_seq;
32static struct timekeeper shadow_timekeeper;
28 33
29/* flag for if timekeeping is suspended */ 34/* flag for if timekeeping is suspended */
30int __read_mostly timekeeping_suspended; 35int __read_mostly timekeeping_suspended;
@@ -67,6 +72,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
67 tk->wall_to_monotonic = wtm; 72 tk->wall_to_monotonic = wtm;
68 set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec); 73 set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
69 tk->offs_real = timespec_to_ktime(tmp); 74 tk->offs_real = timespec_to_ktime(tmp);
75 tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0));
70} 76}
71 77
72static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) 78static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
@@ -96,7 +102,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
96 102
97 old_clock = tk->clock; 103 old_clock = tk->clock;
98 tk->clock = clock; 104 tk->clock = clock;
99 clock->cycle_last = clock->read(clock); 105 tk->cycle_last = clock->cycle_last = clock->read(clock);
100 106
101 /* Do the ns -> cycle conversion first, using original mult */ 107 /* Do the ns -> cycle conversion first, using original mult */
102 tmp = NTP_INTERVAL_LENGTH; 108 tmp = NTP_INTERVAL_LENGTH;
@@ -201,8 +207,6 @@ static void update_pvclock_gtod(struct timekeeper *tk)
201 207
202/** 208/**
203 * pvclock_gtod_register_notifier - register a pvclock timedata update listener 209 * pvclock_gtod_register_notifier - register a pvclock timedata update listener
204 *
205 * Must hold write on timekeeper.lock
206 */ 210 */
207int pvclock_gtod_register_notifier(struct notifier_block *nb) 211int pvclock_gtod_register_notifier(struct notifier_block *nb)
208{ 212{
@@ -210,11 +214,10 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
210 unsigned long flags; 214 unsigned long flags;
211 int ret; 215 int ret;
212 216
213 write_seqlock_irqsave(&tk->lock, flags); 217 raw_spin_lock_irqsave(&timekeeper_lock, flags);
214 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); 218 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
215 /* update timekeeping data */
216 update_pvclock_gtod(tk); 219 update_pvclock_gtod(tk);
217 write_sequnlock_irqrestore(&tk->lock, flags); 220 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
218 221
219 return ret; 222 return ret;
220} 223}
@@ -223,25 +226,22 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
223/** 226/**
224 * pvclock_gtod_unregister_notifier - unregister a pvclock 227 * pvclock_gtod_unregister_notifier - unregister a pvclock
225 * timedata update listener 228 * timedata update listener
226 *
227 * Must hold write on timekeeper.lock
228 */ 229 */
229int pvclock_gtod_unregister_notifier(struct notifier_block *nb) 230int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
230{ 231{
231 struct timekeeper *tk = &timekeeper;
232 unsigned long flags; 232 unsigned long flags;
233 int ret; 233 int ret;
234 234
235 write_seqlock_irqsave(&tk->lock, flags); 235 raw_spin_lock_irqsave(&timekeeper_lock, flags);
236 ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); 236 ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
237 write_sequnlock_irqrestore(&tk->lock, flags); 237 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
238 238
239 return ret; 239 return ret;
240} 240}
241EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 241EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
242 242
243/* must hold write on timekeeper.lock */ 243/* must hold timekeeper_lock */
244static void timekeeping_update(struct timekeeper *tk, bool clearntp) 244static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
245{ 245{
246 if (clearntp) { 246 if (clearntp) {
247 tk->ntp_error = 0; 247 tk->ntp_error = 0;
@@ -249,6 +249,9 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
249 } 249 }
250 update_vsyscall(tk); 250 update_vsyscall(tk);
251 update_pvclock_gtod(tk); 251 update_pvclock_gtod(tk);
252
253 if (mirror)
254 memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
252} 255}
253 256
254/** 257/**
@@ -267,7 +270,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
267 clock = tk->clock; 270 clock = tk->clock;
268 cycle_now = clock->read(clock); 271 cycle_now = clock->read(clock);
269 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 272 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
270 clock->cycle_last = cycle_now; 273 tk->cycle_last = clock->cycle_last = cycle_now;
271 274
272 tk->xtime_nsec += cycle_delta * tk->mult; 275 tk->xtime_nsec += cycle_delta * tk->mult;
273 276
@@ -294,12 +297,12 @@ int __getnstimeofday(struct timespec *ts)
294 s64 nsecs = 0; 297 s64 nsecs = 0;
295 298
296 do { 299 do {
297 seq = read_seqbegin(&tk->lock); 300 seq = read_seqcount_begin(&timekeeper_seq);
298 301
299 ts->tv_sec = tk->xtime_sec; 302 ts->tv_sec = tk->xtime_sec;
300 nsecs = timekeeping_get_ns(tk); 303 nsecs = timekeeping_get_ns(tk);
301 304
302 } while (read_seqretry(&tk->lock, seq)); 305 } while (read_seqcount_retry(&timekeeper_seq, seq));
303 306
304 ts->tv_nsec = 0; 307 ts->tv_nsec = 0;
305 timespec_add_ns(ts, nsecs); 308 timespec_add_ns(ts, nsecs);
@@ -335,11 +338,11 @@ ktime_t ktime_get(void)
335 WARN_ON(timekeeping_suspended); 338 WARN_ON(timekeeping_suspended);
336 339
337 do { 340 do {
338 seq = read_seqbegin(&tk->lock); 341 seq = read_seqcount_begin(&timekeeper_seq);
339 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; 342 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
340 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec; 343 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
341 344
342 } while (read_seqretry(&tk->lock, seq)); 345 } while (read_seqcount_retry(&timekeeper_seq, seq));
343 /* 346 /*
344 * Use ktime_set/ktime_add_ns to create a proper ktime on 347 * Use ktime_set/ktime_add_ns to create a proper ktime on
345 * 32-bit architectures without CONFIG_KTIME_SCALAR. 348 * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -366,12 +369,12 @@ void ktime_get_ts(struct timespec *ts)
366 WARN_ON(timekeeping_suspended); 369 WARN_ON(timekeeping_suspended);
367 370
368 do { 371 do {
369 seq = read_seqbegin(&tk->lock); 372 seq = read_seqcount_begin(&timekeeper_seq);
370 ts->tv_sec = tk->xtime_sec; 373 ts->tv_sec = tk->xtime_sec;
371 nsec = timekeeping_get_ns(tk); 374 nsec = timekeeping_get_ns(tk);
372 tomono = tk->wall_to_monotonic; 375 tomono = tk->wall_to_monotonic;
373 376
374 } while (read_seqretry(&tk->lock, seq)); 377 } while (read_seqcount_retry(&timekeeper_seq, seq));
375 378
376 ts->tv_sec += tomono.tv_sec; 379 ts->tv_sec += tomono.tv_sec;
377 ts->tv_nsec = 0; 380 ts->tv_nsec = 0;
@@ -379,6 +382,50 @@ void ktime_get_ts(struct timespec *ts)
379} 382}
380EXPORT_SYMBOL_GPL(ktime_get_ts); 383EXPORT_SYMBOL_GPL(ktime_get_ts);
381 384
385
386/**
387 * timekeeping_clocktai - Returns the TAI time of day in a timespec
388 * @ts: pointer to the timespec to be set
389 *
390 * Returns the time of day in a timespec.
391 */
392void timekeeping_clocktai(struct timespec *ts)
393{
394 struct timekeeper *tk = &timekeeper;
395 unsigned long seq;
396 u64 nsecs;
397
398 WARN_ON(timekeeping_suspended);
399
400 do {
401 seq = read_seqcount_begin(&timekeeper_seq);
402
403 ts->tv_sec = tk->xtime_sec + tk->tai_offset;
404 nsecs = timekeeping_get_ns(tk);
405
406 } while (read_seqcount_retry(&timekeeper_seq, seq));
407
408 ts->tv_nsec = 0;
409 timespec_add_ns(ts, nsecs);
410
411}
412EXPORT_SYMBOL(timekeeping_clocktai);
413
414
415/**
416 * ktime_get_clocktai - Returns the TAI time of day in a ktime
417 *
418 * Returns the time of day in a ktime.
419 */
420ktime_t ktime_get_clocktai(void)
421{
422 struct timespec ts;
423
424 timekeeping_clocktai(&ts);
425 return timespec_to_ktime(ts);
426}
427EXPORT_SYMBOL(ktime_get_clocktai);
428
382#ifdef CONFIG_NTP_PPS 429#ifdef CONFIG_NTP_PPS
383 430
384/** 431/**
@@ -399,7 +446,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
399 WARN_ON_ONCE(timekeeping_suspended); 446 WARN_ON_ONCE(timekeeping_suspended);
400 447
401 do { 448 do {
402 seq = read_seqbegin(&tk->lock); 449 seq = read_seqcount_begin(&timekeeper_seq);
403 450
404 *ts_raw = tk->raw_time; 451 *ts_raw = tk->raw_time;
405 ts_real->tv_sec = tk->xtime_sec; 452 ts_real->tv_sec = tk->xtime_sec;
@@ -408,7 +455,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
408 nsecs_raw = timekeeping_get_ns_raw(tk); 455 nsecs_raw = timekeeping_get_ns_raw(tk);
409 nsecs_real = timekeeping_get_ns(tk); 456 nsecs_real = timekeeping_get_ns(tk);
410 457
411 } while (read_seqretry(&tk->lock, seq)); 458 } while (read_seqcount_retry(&timekeeper_seq, seq));
412 459
413 timespec_add_ns(ts_raw, nsecs_raw); 460 timespec_add_ns(ts_raw, nsecs_raw);
414 timespec_add_ns(ts_real, nsecs_real); 461 timespec_add_ns(ts_real, nsecs_real);
@@ -448,7 +495,8 @@ int do_settimeofday(const struct timespec *tv)
448 if (!timespec_valid_strict(tv)) 495 if (!timespec_valid_strict(tv))
449 return -EINVAL; 496 return -EINVAL;
450 497
451 write_seqlock_irqsave(&tk->lock, flags); 498 raw_spin_lock_irqsave(&timekeeper_lock, flags);
499 write_seqcount_begin(&timekeeper_seq);
452 500
453 timekeeping_forward_now(tk); 501 timekeeping_forward_now(tk);
454 502
@@ -460,9 +508,10 @@ int do_settimeofday(const struct timespec *tv)
460 508
461 tk_set_xtime(tk, tv); 509 tk_set_xtime(tk, tv);
462 510
463 timekeeping_update(tk, true); 511 timekeeping_update(tk, true, true);
464 512
465 write_sequnlock_irqrestore(&tk->lock, flags); 513 write_seqcount_end(&timekeeper_seq);
514 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
466 515
467 /* signal hrtimers about time change */ 516 /* signal hrtimers about time change */
468 clock_was_set(); 517 clock_was_set();
@@ -487,7 +536,8 @@ int timekeeping_inject_offset(struct timespec *ts)
487 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) 536 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
488 return -EINVAL; 537 return -EINVAL;
489 538
490 write_seqlock_irqsave(&tk->lock, flags); 539 raw_spin_lock_irqsave(&timekeeper_lock, flags);
540 write_seqcount_begin(&timekeeper_seq);
491 541
492 timekeeping_forward_now(tk); 542 timekeeping_forward_now(tk);
493 543
@@ -502,9 +552,10 @@ int timekeeping_inject_offset(struct timespec *ts)
502 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); 552 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
503 553
504error: /* even if we error out, we forwarded the time, so call update */ 554error: /* even if we error out, we forwarded the time, so call update */
505 timekeeping_update(tk, true); 555 timekeeping_update(tk, true, true);
506 556
507 write_sequnlock_irqrestore(&tk->lock, flags); 557 write_seqcount_end(&timekeeper_seq);
558 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
508 559
509 /* signal hrtimers about time change */ 560 /* signal hrtimers about time change */
510 clock_was_set(); 561 clock_was_set();
@@ -513,6 +564,52 @@ error: /* even if we error out, we forwarded the time, so call update */
513} 564}
514EXPORT_SYMBOL(timekeeping_inject_offset); 565EXPORT_SYMBOL(timekeeping_inject_offset);
515 566
567
568/**
569 * timekeeping_get_tai_offset - Returns current TAI offset from UTC
570 *
571 */
572s32 timekeeping_get_tai_offset(void)
573{
574 struct timekeeper *tk = &timekeeper;
575 unsigned int seq;
576 s32 ret;
577
578 do {
579 seq = read_seqcount_begin(&timekeeper_seq);
580 ret = tk->tai_offset;
581 } while (read_seqcount_retry(&timekeeper_seq, seq));
582
583 return ret;
584}
585
586/**
587 * __timekeeping_set_tai_offset - Lock free worker function
588 *
589 */
590static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
591{
592 tk->tai_offset = tai_offset;
593 tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
594}
595
596/**
597 * timekeeping_set_tai_offset - Sets the current TAI offset from UTC
598 *
599 */
600void timekeeping_set_tai_offset(s32 tai_offset)
601{
602 struct timekeeper *tk = &timekeeper;
603 unsigned long flags;
604
605 raw_spin_lock_irqsave(&timekeeper_lock, flags);
606 write_seqcount_begin(&timekeeper_seq);
607 __timekeeping_set_tai_offset(tk, tai_offset);
608 write_seqcount_end(&timekeeper_seq);
609 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
610 clock_was_set();
611}
612
516/** 613/**
517 * change_clocksource - Swaps clocksources if a new one is available 614 * change_clocksource - Swaps clocksources if a new one is available
518 * 615 *
@@ -526,7 +623,8 @@ static int change_clocksource(void *data)
526 623
527 new = (struct clocksource *) data; 624 new = (struct clocksource *) data;
528 625
529 write_seqlock_irqsave(&tk->lock, flags); 626 raw_spin_lock_irqsave(&timekeeper_lock, flags);
627 write_seqcount_begin(&timekeeper_seq);
530 628
531 timekeeping_forward_now(tk); 629 timekeeping_forward_now(tk);
532 if (!new->enable || new->enable(new) == 0) { 630 if (!new->enable || new->enable(new) == 0) {
@@ -535,9 +633,10 @@ static int change_clocksource(void *data)
535 if (old->disable) 633 if (old->disable)
536 old->disable(old); 634 old->disable(old);
537 } 635 }
538 timekeeping_update(tk, true); 636 timekeeping_update(tk, true, true);
539 637
540 write_sequnlock_irqrestore(&tk->lock, flags); 638 write_seqcount_end(&timekeeper_seq);
639 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
541 640
542 return 0; 641 return 0;
543} 642}
@@ -587,11 +686,11 @@ void getrawmonotonic(struct timespec *ts)
587 s64 nsecs; 686 s64 nsecs;
588 687
589 do { 688 do {
590 seq = read_seqbegin(&tk->lock); 689 seq = read_seqcount_begin(&timekeeper_seq);
591 nsecs = timekeeping_get_ns_raw(tk); 690 nsecs = timekeeping_get_ns_raw(tk);
592 *ts = tk->raw_time; 691 *ts = tk->raw_time;
593 692
594 } while (read_seqretry(&tk->lock, seq)); 693 } while (read_seqcount_retry(&timekeeper_seq, seq));
595 694
596 timespec_add_ns(ts, nsecs); 695 timespec_add_ns(ts, nsecs);
597} 696}
@@ -607,11 +706,11 @@ int timekeeping_valid_for_hres(void)
607 int ret; 706 int ret;
608 707
609 do { 708 do {
610 seq = read_seqbegin(&tk->lock); 709 seq = read_seqcount_begin(&timekeeper_seq);
611 710
612 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 711 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
613 712
614 } while (read_seqretry(&tk->lock, seq)); 713 } while (read_seqcount_retry(&timekeeper_seq, seq));
615 714
616 return ret; 715 return ret;
617} 716}
@@ -626,11 +725,11 @@ u64 timekeeping_max_deferment(void)
626 u64 ret; 725 u64 ret;
627 726
628 do { 727 do {
629 seq = read_seqbegin(&tk->lock); 728 seq = read_seqcount_begin(&timekeeper_seq);
630 729
631 ret = tk->clock->max_idle_ns; 730 ret = tk->clock->max_idle_ns;
632 731
633 } while (read_seqretry(&tk->lock, seq)); 732 } while (read_seqcount_retry(&timekeeper_seq, seq));
634 733
635 return ret; 734 return ret;
636} 735}
@@ -693,11 +792,10 @@ void __init timekeeping_init(void)
693 boot.tv_nsec = 0; 792 boot.tv_nsec = 0;
694 } 793 }
695 794
696 seqlock_init(&tk->lock); 795 raw_spin_lock_irqsave(&timekeeper_lock, flags);
697 796 write_seqcount_begin(&timekeeper_seq);
698 ntp_init(); 797 ntp_init();
699 798
700 write_seqlock_irqsave(&tk->lock, flags);
701 clock = clocksource_default_clock(); 799 clock = clocksource_default_clock();
702 if (clock->enable) 800 if (clock->enable)
703 clock->enable(clock); 801 clock->enable(clock);
@@ -716,7 +814,10 @@ void __init timekeeping_init(void)
716 tmp.tv_nsec = 0; 814 tmp.tv_nsec = 0;
717 tk_set_sleep_time(tk, tmp); 815 tk_set_sleep_time(tk, tmp);
718 816
719 write_sequnlock_irqrestore(&tk->lock, flags); 817 memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
818
819 write_seqcount_end(&timekeeper_seq);
820 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
720} 821}
721 822
722/* time in seconds when suspend began */ 823/* time in seconds when suspend began */
@@ -764,15 +865,17 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
764 if (has_persistent_clock()) 865 if (has_persistent_clock())
765 return; 866 return;
766 867
767 write_seqlock_irqsave(&tk->lock, flags); 868 raw_spin_lock_irqsave(&timekeeper_lock, flags);
869 write_seqcount_begin(&timekeeper_seq);
768 870
769 timekeeping_forward_now(tk); 871 timekeeping_forward_now(tk);
770 872
771 __timekeeping_inject_sleeptime(tk, delta); 873 __timekeeping_inject_sleeptime(tk, delta);
772 874
773 timekeeping_update(tk, true); 875 timekeeping_update(tk, true, true);
774 876
775 write_sequnlock_irqrestore(&tk->lock, flags); 877 write_seqcount_end(&timekeeper_seq);
878 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
776 879
777 /* signal hrtimers about time change */ 880 /* signal hrtimers about time change */
778 clock_was_set(); 881 clock_was_set();
@@ -788,26 +891,72 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
788static void timekeeping_resume(void) 891static void timekeeping_resume(void)
789{ 892{
790 struct timekeeper *tk = &timekeeper; 893 struct timekeeper *tk = &timekeeper;
894 struct clocksource *clock = tk->clock;
791 unsigned long flags; 895 unsigned long flags;
792 struct timespec ts; 896 struct timespec ts_new, ts_delta;
897 cycle_t cycle_now, cycle_delta;
898 bool suspendtime_found = false;
793 899
794 read_persistent_clock(&ts); 900 read_persistent_clock(&ts_new);
795 901
796 clockevents_resume(); 902 clockevents_resume();
797 clocksource_resume(); 903 clocksource_resume();
798 904
799 write_seqlock_irqsave(&tk->lock, flags); 905 raw_spin_lock_irqsave(&timekeeper_lock, flags);
906 write_seqcount_begin(&timekeeper_seq);
907
908 /*
909 * After system resumes, we need to calculate the suspended time and
910 * compensate it for the OS time. There are 3 sources that could be
911 * used: Nonstop clocksource during suspend, persistent clock and rtc
912 * device.
913 *
914 * One specific platform may have 1 or 2 or all of them, and the
915 * preference will be:
916 * suspend-nonstop clocksource -> persistent clock -> rtc
917 * The less preferred source will only be tried if there is no better
918 * usable source. The rtc part is handled separately in rtc core code.
919 */
920 cycle_now = clock->read(clock);
921 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
922 cycle_now > clock->cycle_last) {
923 u64 num, max = ULLONG_MAX;
924 u32 mult = clock->mult;
925 u32 shift = clock->shift;
926 s64 nsec = 0;
927
928 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
800 929
801 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { 930 /*
802 ts = timespec_sub(ts, timekeeping_suspend_time); 931 * "cycle_delta * mutl" may cause 64 bits overflow, if the
803 __timekeeping_inject_sleeptime(tk, &ts); 932 * suspended time is too long. In that case we need do the
933 * 64 bits math carefully
934 */
935 do_div(max, mult);
936 if (cycle_delta > max) {
937 num = div64_u64(cycle_delta, max);
938 nsec = (((u64) max * mult) >> shift) * num;
939 cycle_delta -= num * max;
940 }
941 nsec += ((u64) cycle_delta * mult) >> shift;
942
943 ts_delta = ns_to_timespec(nsec);
944 suspendtime_found = true;
945 } else if (timespec_compare(&ts_new, &timekeeping_suspend_time) > 0) {
946 ts_delta = timespec_sub(ts_new, timekeeping_suspend_time);
947 suspendtime_found = true;
804 } 948 }
805 /* re-base the last cycle value */ 949
806 tk->clock->cycle_last = tk->clock->read(tk->clock); 950 if (suspendtime_found)
951 __timekeeping_inject_sleeptime(tk, &ts_delta);
952
953 /* Re-base the last cycle value */
954 tk->cycle_last = clock->cycle_last = cycle_now;
807 tk->ntp_error = 0; 955 tk->ntp_error = 0;
808 timekeeping_suspended = 0; 956 timekeeping_suspended = 0;
809 timekeeping_update(tk, false); 957 timekeeping_update(tk, false, true);
810 write_sequnlock_irqrestore(&tk->lock, flags); 958 write_seqcount_end(&timekeeper_seq);
959 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
811 960
812 touch_softlockup_watchdog(); 961 touch_softlockup_watchdog();
813 962
@@ -826,7 +975,8 @@ static int timekeeping_suspend(void)
826 975
827 read_persistent_clock(&timekeeping_suspend_time); 976 read_persistent_clock(&timekeeping_suspend_time);
828 977
829 write_seqlock_irqsave(&tk->lock, flags); 978 raw_spin_lock_irqsave(&timekeeper_lock, flags);
979 write_seqcount_begin(&timekeeper_seq);
830 timekeeping_forward_now(tk); 980 timekeeping_forward_now(tk);
831 timekeeping_suspended = 1; 981 timekeeping_suspended = 1;
832 982
@@ -849,7 +999,8 @@ static int timekeeping_suspend(void)
849 timekeeping_suspend_time = 999 timekeeping_suspend_time =
850 timespec_add(timekeeping_suspend_time, delta_delta); 1000 timespec_add(timekeeping_suspend_time, delta_delta);
851 } 1001 }
852 write_sequnlock_irqrestore(&tk->lock, flags); 1002 write_seqcount_end(&timekeeper_seq);
1003 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
853 1004
854 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 1005 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
855 clocksource_suspend(); 1006 clocksource_suspend();
@@ -1099,6 +1250,8 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1099 tk_set_wall_to_mono(tk, 1250 tk_set_wall_to_mono(tk,
1100 timespec_sub(tk->wall_to_monotonic, ts)); 1251 timespec_sub(tk->wall_to_monotonic, ts));
1101 1252
1253 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
1254
1102 clock_was_set_delayed(); 1255 clock_was_set_delayed();
1103 } 1256 }
1104 } 1257 }
@@ -1116,15 +1269,16 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1116static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, 1269static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1117 u32 shift) 1270 u32 shift)
1118{ 1271{
1272 cycle_t interval = tk->cycle_interval << shift;
1119 u64 raw_nsecs; 1273 u64 raw_nsecs;
1120 1274
1121 /* If the offset is smaller then a shifted interval, do nothing */ 1275 /* If the offset is smaller then a shifted interval, do nothing */
1122 if (offset < tk->cycle_interval<<shift) 1276 if (offset < interval)
1123 return offset; 1277 return offset;
1124 1278
1125 /* Accumulate one shifted interval */ 1279 /* Accumulate one shifted interval */
1126 offset -= tk->cycle_interval << shift; 1280 offset -= interval;
1127 tk->clock->cycle_last += tk->cycle_interval << shift; 1281 tk->cycle_last += interval;
1128 1282
1129 tk->xtime_nsec += tk->xtime_interval << shift; 1283 tk->xtime_nsec += tk->xtime_interval << shift;
1130 accumulate_nsecs_to_secs(tk); 1284 accumulate_nsecs_to_secs(tk);
@@ -1181,27 +1335,28 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
1181static void update_wall_time(void) 1335static void update_wall_time(void)
1182{ 1336{
1183 struct clocksource *clock; 1337 struct clocksource *clock;
1184 struct timekeeper *tk = &timekeeper; 1338 struct timekeeper *real_tk = &timekeeper;
1339 struct timekeeper *tk = &shadow_timekeeper;
1185 cycle_t offset; 1340 cycle_t offset;
1186 int shift = 0, maxshift; 1341 int shift = 0, maxshift;
1187 unsigned long flags; 1342 unsigned long flags;
1188 1343
1189 write_seqlock_irqsave(&tk->lock, flags); 1344 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1190 1345
1191 /* Make sure we're fully resumed: */ 1346 /* Make sure we're fully resumed: */
1192 if (unlikely(timekeeping_suspended)) 1347 if (unlikely(timekeeping_suspended))
1193 goto out; 1348 goto out;
1194 1349
1195 clock = tk->clock; 1350 clock = real_tk->clock;
1196 1351
1197#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1352#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1198 offset = tk->cycle_interval; 1353 offset = real_tk->cycle_interval;
1199#else 1354#else
1200 offset = (clock->read(clock) - clock->cycle_last) & clock->mask; 1355 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
1201#endif 1356#endif
1202 1357
1203 /* Check if there's really nothing to do */ 1358 /* Check if there's really nothing to do */
1204 if (offset < tk->cycle_interval) 1359 if (offset < real_tk->cycle_interval)
1205 goto out; 1360 goto out;
1206 1361
1207 /* 1362 /*
@@ -1238,11 +1393,24 @@ static void update_wall_time(void)
1238 */ 1393 */
1239 accumulate_nsecs_to_secs(tk); 1394 accumulate_nsecs_to_secs(tk);
1240 1395
1241 timekeeping_update(tk, false); 1396 write_seqcount_begin(&timekeeper_seq);
1242 1397 /* Update clock->cycle_last with the new value */
1398 clock->cycle_last = tk->cycle_last;
1399 /*
1400 * Update the real timekeeper.
1401 *
1402 * We could avoid this memcpy by switching pointers, but that
1403 * requires changes to all other timekeeper usage sites as
1404 * well, i.e. move the timekeeper pointer getter into the
1405 * spinlocked/seqcount protected sections. And we trade this
1406 * memcpy under the timekeeper_seq against one before we start
1407 * updating.
1408 */
1409 memcpy(real_tk, tk, sizeof(*tk));
1410 timekeeping_update(real_tk, false, false);
1411 write_seqcount_end(&timekeeper_seq);
1243out: 1412out:
1244 write_sequnlock_irqrestore(&tk->lock, flags); 1413 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1245
1246} 1414}
1247 1415
1248/** 1416/**
@@ -1289,13 +1457,13 @@ void get_monotonic_boottime(struct timespec *ts)
1289 WARN_ON(timekeeping_suspended); 1457 WARN_ON(timekeeping_suspended);
1290 1458
1291 do { 1459 do {
1292 seq = read_seqbegin(&tk->lock); 1460 seq = read_seqcount_begin(&timekeeper_seq);
1293 ts->tv_sec = tk->xtime_sec; 1461 ts->tv_sec = tk->xtime_sec;
1294 nsec = timekeeping_get_ns(tk); 1462 nsec = timekeeping_get_ns(tk);
1295 tomono = tk->wall_to_monotonic; 1463 tomono = tk->wall_to_monotonic;
1296 sleep = tk->total_sleep_time; 1464 sleep = tk->total_sleep_time;
1297 1465
1298 } while (read_seqretry(&tk->lock, seq)); 1466 } while (read_seqcount_retry(&timekeeper_seq, seq));
1299 1467
1300 ts->tv_sec += tomono.tv_sec + sleep.tv_sec; 1468 ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
1301 ts->tv_nsec = 0; 1469 ts->tv_nsec = 0;
@@ -1354,10 +1522,10 @@ struct timespec current_kernel_time(void)
1354 unsigned long seq; 1522 unsigned long seq;
1355 1523
1356 do { 1524 do {
1357 seq = read_seqbegin(&tk->lock); 1525 seq = read_seqcount_begin(&timekeeper_seq);
1358 1526
1359 now = tk_xtime(tk); 1527 now = tk_xtime(tk);
1360 } while (read_seqretry(&tk->lock, seq)); 1528 } while (read_seqcount_retry(&timekeeper_seq, seq));
1361 1529
1362 return now; 1530 return now;
1363} 1531}
@@ -1370,11 +1538,11 @@ struct timespec get_monotonic_coarse(void)
1370 unsigned long seq; 1538 unsigned long seq;
1371 1539
1372 do { 1540 do {
1373 seq = read_seqbegin(&tk->lock); 1541 seq = read_seqcount_begin(&timekeeper_seq);
1374 1542
1375 now = tk_xtime(tk); 1543 now = tk_xtime(tk);
1376 mono = tk->wall_to_monotonic; 1544 mono = tk->wall_to_monotonic;
1377 } while (read_seqretry(&tk->lock, seq)); 1545 } while (read_seqcount_retry(&timekeeper_seq, seq));
1378 1546
1379 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, 1547 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
1380 now.tv_nsec + mono.tv_nsec); 1548 now.tv_nsec + mono.tv_nsec);
@@ -1405,11 +1573,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1405 unsigned long seq; 1573 unsigned long seq;
1406 1574
1407 do { 1575 do {
1408 seq = read_seqbegin(&tk->lock); 1576 seq = read_seqcount_begin(&timekeeper_seq);
1409 *xtim = tk_xtime(tk); 1577 *xtim = tk_xtime(tk);
1410 *wtom = tk->wall_to_monotonic; 1578 *wtom = tk->wall_to_monotonic;
1411 *sleep = tk->total_sleep_time; 1579 *sleep = tk->total_sleep_time;
1412 } while (read_seqretry(&tk->lock, seq)); 1580 } while (read_seqcount_retry(&timekeeper_seq, seq));
1413} 1581}
1414 1582
1415#ifdef CONFIG_HIGH_RES_TIMERS 1583#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1421,7 +1589,8 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1421 * Returns current monotonic time and updates the offsets 1589 * Returns current monotonic time and updates the offsets
1422 * Called from hrtimer_interupt() or retrigger_next_event() 1590 * Called from hrtimer_interupt() or retrigger_next_event()
1423 */ 1591 */
1424ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) 1592ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
1593 ktime_t *offs_tai)
1425{ 1594{
1426 struct timekeeper *tk = &timekeeper; 1595 struct timekeeper *tk = &timekeeper;
1427 ktime_t now; 1596 ktime_t now;
@@ -1429,14 +1598,15 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1429 u64 secs, nsecs; 1598 u64 secs, nsecs;
1430 1599
1431 do { 1600 do {
1432 seq = read_seqbegin(&tk->lock); 1601 seq = read_seqcount_begin(&timekeeper_seq);
1433 1602
1434 secs = tk->xtime_sec; 1603 secs = tk->xtime_sec;
1435 nsecs = timekeeping_get_ns(tk); 1604 nsecs = timekeeping_get_ns(tk);
1436 1605
1437 *offs_real = tk->offs_real; 1606 *offs_real = tk->offs_real;
1438 *offs_boot = tk->offs_boot; 1607 *offs_boot = tk->offs_boot;
1439 } while (read_seqretry(&tk->lock, seq)); 1608 *offs_tai = tk->offs_tai;
1609 } while (read_seqcount_retry(&timekeeper_seq, seq));
1440 1610
1441 now = ktime_add_ns(ktime_set(secs, 0), nsecs); 1611 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1442 now = ktime_sub(now, *offs_real); 1612 now = ktime_sub(now, *offs_real);
@@ -1454,15 +1624,79 @@ ktime_t ktime_get_monotonic_offset(void)
1454 struct timespec wtom; 1624 struct timespec wtom;
1455 1625
1456 do { 1626 do {
1457 seq = read_seqbegin(&tk->lock); 1627 seq = read_seqcount_begin(&timekeeper_seq);
1458 wtom = tk->wall_to_monotonic; 1628 wtom = tk->wall_to_monotonic;
1459 } while (read_seqretry(&tk->lock, seq)); 1629 } while (read_seqcount_retry(&timekeeper_seq, seq));
1460 1630
1461 return timespec_to_ktime(wtom); 1631 return timespec_to_ktime(wtom);
1462} 1632}
1463EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); 1633EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
1464 1634
1465/** 1635/**
1636 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
1637 */
1638int do_adjtimex(struct timex *txc)
1639{
1640 struct timekeeper *tk = &timekeeper;
1641 unsigned long flags;
1642 struct timespec ts;
1643 s32 orig_tai, tai;
1644 int ret;
1645
1646 /* Validate the data before disabling interrupts */
1647 ret = ntp_validate_timex(txc);
1648 if (ret)
1649 return ret;
1650
1651 if (txc->modes & ADJ_SETOFFSET) {
1652 struct timespec delta;
1653 delta.tv_sec = txc->time.tv_sec;
1654 delta.tv_nsec = txc->time.tv_usec;
1655 if (!(txc->modes & ADJ_NANO))
1656 delta.tv_nsec *= 1000;
1657 ret = timekeeping_inject_offset(&delta);
1658 if (ret)
1659 return ret;
1660 }
1661
1662 getnstimeofday(&ts);
1663
1664 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1665 write_seqcount_begin(&timekeeper_seq);
1666
1667 orig_tai = tai = tk->tai_offset;
1668 ret = __do_adjtimex(txc, &ts, &tai);
1669
1670 if (tai != orig_tai) {
1671 __timekeeping_set_tai_offset(tk, tai);
1672 clock_was_set_delayed();
1673 }
1674 write_seqcount_end(&timekeeper_seq);
1675 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1676
1677 return ret;
1678}
1679
1680#ifdef CONFIG_NTP_PPS
1681/**
1682 * hardpps() - Accessor function to NTP __hardpps function
1683 */
1684void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
1685{
1686 unsigned long flags;
1687
1688 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1689 write_seqcount_begin(&timekeeper_seq);
1690
1691 __hardpps(phase_ts, raw_ts);
1692
1693 write_seqcount_end(&timekeeper_seq);
1694 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1695}
1696EXPORT_SYMBOL(hardpps);
1697#endif
1698
1699/**
1466 * xtime_update() - advances the timekeeping infrastructure 1700 * xtime_update() - advances the timekeeping infrastructure
1467 * @ticks: number of ticks, that have elapsed since the last call. 1701 * @ticks: number of ticks, that have elapsed since the last call.
1468 * 1702 *
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index af5a7e9f164b..3bdf28323012 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -20,6 +20,13 @@
20 20
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22 22
23
24struct timer_list_iter {
25 int cpu;
26 bool second_pass;
27 u64 now;
28};
29
23typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); 30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
24 31
25DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); 32DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
@@ -133,7 +140,6 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
133 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 140 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
134 int i; 141 int i;
135 142
136 SEQ_printf(m, "\n");
137 SEQ_printf(m, "cpu: %d\n", cpu); 143 SEQ_printf(m, "cpu: %d\n", cpu);
138 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 144 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
139 SEQ_printf(m, " clock %d:\n", i); 145 SEQ_printf(m, " clock %d:\n", i);
@@ -187,6 +193,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
187 193
188#undef P 194#undef P
189#undef P_ns 195#undef P_ns
196 SEQ_printf(m, "\n");
190} 197}
191 198
192#ifdef CONFIG_GENERIC_CLOCKEVENTS 199#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -195,7 +202,6 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
195{ 202{
196 struct clock_event_device *dev = td->evtdev; 203 struct clock_event_device *dev = td->evtdev;
197 204
198 SEQ_printf(m, "\n");
199 SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); 205 SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
200 if (cpu < 0) 206 if (cpu < 0)
201 SEQ_printf(m, "Broadcast device\n"); 207 SEQ_printf(m, "Broadcast device\n");
@@ -230,12 +236,11 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
230 print_name_offset(m, dev->event_handler); 236 print_name_offset(m, dev->event_handler);
231 SEQ_printf(m, "\n"); 237 SEQ_printf(m, "\n");
232 SEQ_printf(m, " retries: %lu\n", dev->retries); 238 SEQ_printf(m, " retries: %lu\n", dev->retries);
239 SEQ_printf(m, "\n");
233} 240}
234 241
235static void timer_list_show_tickdevices(struct seq_file *m) 242static void timer_list_show_tickdevices_header(struct seq_file *m)
236{ 243{
237 int cpu;
238
239#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 244#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
240 print_tickdevice(m, tick_get_broadcast_device(), -1); 245 print_tickdevice(m, tick_get_broadcast_device(), -1);
241 SEQ_printf(m, "tick_broadcast_mask: %08lx\n", 246 SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
@@ -246,47 +251,104 @@ static void timer_list_show_tickdevices(struct seq_file *m)
246#endif 251#endif
247 SEQ_printf(m, "\n"); 252 SEQ_printf(m, "\n");
248#endif 253#endif
249 for_each_online_cpu(cpu)
250 print_tickdevice(m, tick_get_device(cpu), cpu);
251 SEQ_printf(m, "\n");
252} 254}
253#else
254static void timer_list_show_tickdevices(struct seq_file *m) { }
255#endif 255#endif
256 256
257static inline void timer_list_header(struct seq_file *m, u64 now)
258{
259 SEQ_printf(m, "Timer List Version: v0.7\n");
260 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
261 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
262 SEQ_printf(m, "\n");
263}
264
257static int timer_list_show(struct seq_file *m, void *v) 265static int timer_list_show(struct seq_file *m, void *v)
258{ 266{
267 struct timer_list_iter *iter = v;
268 u64 now = ktime_to_ns(ktime_get());
269
270 if (iter->cpu == -1 && !iter->second_pass)
271 timer_list_header(m, now);
272 else if (!iter->second_pass)
273 print_cpu(m, iter->cpu, iter->now);
274#ifdef CONFIG_GENERIC_CLOCKEVENTS
275 else if (iter->cpu == -1 && iter->second_pass)
276 timer_list_show_tickdevices_header(m);
277 else
278 print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
279#endif
280 return 0;
281}
282
283void sysrq_timer_list_show(void)
284{
259 u64 now = ktime_to_ns(ktime_get()); 285 u64 now = ktime_to_ns(ktime_get());
260 int cpu; 286 int cpu;
261 287
262 SEQ_printf(m, "Timer List Version: v0.7\n"); 288 timer_list_header(NULL, now);
263 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
264 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
265 289
266 for_each_online_cpu(cpu) 290 for_each_online_cpu(cpu)
267 print_cpu(m, cpu, now); 291 print_cpu(NULL, cpu, now);
268 292
269 SEQ_printf(m, "\n"); 293#ifdef CONFIG_GENERIC_CLOCKEVENTS
270 timer_list_show_tickdevices(m); 294 timer_list_show_tickdevices_header(NULL);
295 for_each_online_cpu(cpu)
296 print_tickdevice(NULL, tick_get_device(cpu), cpu);
297#endif
298 return;
299}
271 300
272 return 0; 301static void *timer_list_start(struct seq_file *file, loff_t *offset)
302{
303 struct timer_list_iter *iter = file->private;
304
305 if (!*offset) {
306 iter->cpu = -1;
307 iter->now = ktime_to_ns(ktime_get());
308 } else if (iter->cpu >= nr_cpu_ids) {
309#ifdef CONFIG_GENERIC_CLOCKEVENTS
310 if (!iter->second_pass) {
311 iter->cpu = -1;
312 iter->second_pass = true;
313 } else
314 return NULL;
315#else
316 return NULL;
317#endif
318 }
319 return iter;
273} 320}
274 321
275void sysrq_timer_list_show(void) 322static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
323{
324 struct timer_list_iter *iter = file->private;
325 iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
326 ++*offset;
327 return timer_list_start(file, offset);
328}
329
330static void timer_list_stop(struct seq_file *seq, void *v)
276{ 331{
277 timer_list_show(NULL, NULL);
278} 332}
279 333
334static const struct seq_operations timer_list_sops = {
335 .start = timer_list_start,
336 .next = timer_list_next,
337 .stop = timer_list_stop,
338 .show = timer_list_show,
339};
340
280static int timer_list_open(struct inode *inode, struct file *filp) 341static int timer_list_open(struct inode *inode, struct file *filp)
281{ 342{
282 return single_open(filp, timer_list_show, NULL); 343 return seq_open_private(filp, &timer_list_sops,
344 sizeof(struct timer_list_iter));
283} 345}
284 346
285static const struct file_operations timer_list_fops = { 347static const struct file_operations timer_list_fops = {
286 .open = timer_list_open, 348 .open = timer_list_open,
287 .read = seq_read, 349 .read = seq_read,
288 .llseek = seq_lseek, 350 .llseek = seq_lseek,
289 .release = single_release, 351 .release = seq_release_private,
290}; 352};
291 353
292static int __init init_timer_list_procfs(void) 354static int __init init_timer_list_procfs(void)
diff --git a/kernel/timer.c b/kernel/timer.c
index 1b7489fdea41..a860bba34412 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/kernel/timer.c 2 * linux/kernel/timer.c
3 * 3 *
4 * Kernel internal timers, basic process system calls 4 * Kernel internal timers
5 * 5 *
6 * Copyright (C) 1991, 1992 Linus Torvalds 6 * Copyright (C) 1991, 1992 Linus Torvalds
7 * 7 *
@@ -41,6 +41,7 @@
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/sched/sysctl.h> 42#include <linux/sched/sysctl.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/compat.h>
44 45
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/unistd.h> 47#include <asm/unistd.h>
@@ -1395,61 +1396,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1395 1396
1396#endif 1397#endif
1397 1398
1398/**
1399 * sys_getpid - return the thread group id of the current process
1400 *
1401 * Note, despite the name, this returns the tgid not the pid. The tgid and
1402 * the pid are identical unless CLONE_THREAD was specified on clone() in
1403 * which case the tgid is the same in all threads of the same group.
1404 *
1405 * This is SMP safe as current->tgid does not change.
1406 */
1407SYSCALL_DEFINE0(getpid)
1408{
1409 return task_tgid_vnr(current);
1410}
1411
1412/*
1413 * Accessing ->real_parent is not SMP-safe, it could
1414 * change from under us. However, we can use a stale
1415 * value of ->real_parent under rcu_read_lock(), see
1416 * release_task()->call_rcu(delayed_put_task_struct).
1417 */
1418SYSCALL_DEFINE0(getppid)
1419{
1420 int pid;
1421
1422 rcu_read_lock();
1423 pid = task_tgid_vnr(rcu_dereference(current->real_parent));
1424 rcu_read_unlock();
1425
1426 return pid;
1427}
1428
1429SYSCALL_DEFINE0(getuid)
1430{
1431 /* Only we change this so SMP safe */
1432 return from_kuid_munged(current_user_ns(), current_uid());
1433}
1434
1435SYSCALL_DEFINE0(geteuid)
1436{
1437 /* Only we change this so SMP safe */
1438 return from_kuid_munged(current_user_ns(), current_euid());
1439}
1440
1441SYSCALL_DEFINE0(getgid)
1442{
1443 /* Only we change this so SMP safe */
1444 return from_kgid_munged(current_user_ns(), current_gid());
1445}
1446
1447SYSCALL_DEFINE0(getegid)
1448{
1449 /* Only we change this so SMP safe */
1450 return from_kgid_munged(current_user_ns(), current_egid());
1451}
1452
1453static void process_timeout(unsigned long __data) 1399static void process_timeout(unsigned long __data)
1454{ 1400{
1455 wake_up_process((struct task_struct *)__data); 1401 wake_up_process((struct task_struct *)__data);
@@ -1557,91 +1503,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1557} 1503}
1558EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1504EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1559 1505
1560/* Thread ID - the internal kernel "pid" */
1561SYSCALL_DEFINE0(gettid)
1562{
1563 return task_pid_vnr(current);
1564}
1565
1566/**
1567 * do_sysinfo - fill in sysinfo struct
1568 * @info: pointer to buffer to fill
1569 */
1570int do_sysinfo(struct sysinfo *info)
1571{
1572 unsigned long mem_total, sav_total;
1573 unsigned int mem_unit, bitcount;
1574 struct timespec tp;
1575
1576 memset(info, 0, sizeof(struct sysinfo));
1577
1578 ktime_get_ts(&tp);
1579 monotonic_to_bootbased(&tp);
1580 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1581
1582 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
1583
1584 info->procs = nr_threads;
1585
1586 si_meminfo(info);
1587 si_swapinfo(info);
1588
1589 /*
1590 * If the sum of all the available memory (i.e. ram + swap)
1591 * is less than can be stored in a 32 bit unsigned long then
1592 * we can be binary compatible with 2.2.x kernels. If not,
1593 * well, in that case 2.2.x was broken anyways...
1594 *
1595 * -Erik Andersen <andersee@debian.org>
1596 */
1597
1598 mem_total = info->totalram + info->totalswap;
1599 if (mem_total < info->totalram || mem_total < info->totalswap)
1600 goto out;
1601 bitcount = 0;
1602 mem_unit = info->mem_unit;
1603 while (mem_unit > 1) {
1604 bitcount++;
1605 mem_unit >>= 1;
1606 sav_total = mem_total;
1607 mem_total <<= 1;
1608 if (mem_total < sav_total)
1609 goto out;
1610 }
1611
1612 /*
1613 * If mem_total did not overflow, multiply all memory values by
1614 * info->mem_unit and set it to 1. This leaves things compatible
1615 * with 2.2.x, and also retains compatibility with earlier 2.4.x
1616 * kernels...
1617 */
1618
1619 info->mem_unit = 1;
1620 info->totalram <<= bitcount;
1621 info->freeram <<= bitcount;
1622 info->sharedram <<= bitcount;
1623 info->bufferram <<= bitcount;
1624 info->totalswap <<= bitcount;
1625 info->freeswap <<= bitcount;
1626 info->totalhigh <<= bitcount;
1627 info->freehigh <<= bitcount;
1628
1629out:
1630 return 0;
1631}
1632
1633SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
1634{
1635 struct sysinfo val;
1636
1637 do_sysinfo(&val);
1638
1639 if (copy_to_user(info, &val, sizeof(struct sysinfo)))
1640 return -EFAULT;
1641
1642 return 0;
1643}
1644
1645static int __cpuinit init_timers_cpu(int cpu) 1506static int __cpuinit init_timers_cpu(int cpu)
1646{ 1507{
1647 int j; 1508 int j;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index d7948eb10225..f6c83d7ef000 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -18,67 +18,43 @@
18 18
19SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) 19SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group)
20{ 20{
21 long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); 21 return sys_chown(filename, low2highuid(user), low2highgid(group));
22 /* avoid REGPARM breakage on x86: */
23 asmlinkage_protect(3, ret, filename, user, group);
24 return ret;
25} 22}
26 23
27SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) 24SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group)
28{ 25{
29 long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); 26 return sys_lchown(filename, low2highuid(user), low2highgid(group));
30 /* avoid REGPARM breakage on x86: */
31 asmlinkage_protect(3, ret, filename, user, group);
32 return ret;
33} 27}
34 28
35SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) 29SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group)
36{ 30{
37 long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); 31 return sys_fchown(fd, low2highuid(user), low2highgid(group));
38 /* avoid REGPARM breakage on x86: */
39 asmlinkage_protect(3, ret, fd, user, group);
40 return ret;
41} 32}
42 33
43SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid) 34SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid)
44{ 35{
45 long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); 36 return sys_setregid(low2highgid(rgid), low2highgid(egid));
46 /* avoid REGPARM breakage on x86: */
47 asmlinkage_protect(2, ret, rgid, egid);
48 return ret;
49} 37}
50 38
51SYSCALL_DEFINE1(setgid16, old_gid_t, gid) 39SYSCALL_DEFINE1(setgid16, old_gid_t, gid)
52{ 40{
53 long ret = sys_setgid(low2highgid(gid)); 41 return sys_setgid(low2highgid(gid));
54 /* avoid REGPARM breakage on x86: */
55 asmlinkage_protect(1, ret, gid);
56 return ret;
57} 42}
58 43
59SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid) 44SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid)
60{ 45{
61 long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); 46 return sys_setreuid(low2highuid(ruid), low2highuid(euid));
62 /* avoid REGPARM breakage on x86: */
63 asmlinkage_protect(2, ret, ruid, euid);
64 return ret;
65} 47}
66 48
67SYSCALL_DEFINE1(setuid16, old_uid_t, uid) 49SYSCALL_DEFINE1(setuid16, old_uid_t, uid)
68{ 50{
69 long ret = sys_setuid(low2highuid(uid)); 51 return sys_setuid(low2highuid(uid));
70 /* avoid REGPARM breakage on x86: */
71 asmlinkage_protect(1, ret, uid);
72 return ret;
73} 52}
74 53
75SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid) 54SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid)
76{ 55{
77 long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), 56 return sys_setresuid(low2highuid(ruid), low2highuid(euid),
78 low2highuid(suid)); 57 low2highuid(suid));
79 /* avoid REGPARM breakage on x86: */
80 asmlinkage_protect(3, ret, ruid, euid, suid);
81 return ret;
82} 58}
83 59
84SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruidp, old_uid_t __user *, euidp, old_uid_t __user *, suidp) 60SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruidp, old_uid_t __user *, euidp, old_uid_t __user *, suidp)
@@ -100,11 +76,8 @@ SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruidp, old_uid_t __user *, euid
100 76
101SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid) 77SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid)
102{ 78{
103 long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), 79 return sys_setresgid(low2highgid(rgid), low2highgid(egid),
104 low2highgid(sgid)); 80 low2highgid(sgid));
105 /* avoid REGPARM breakage on x86: */
106 asmlinkage_protect(3, ret, rgid, egid, sgid);
107 return ret;
108} 81}
109 82
110 83
@@ -127,18 +100,12 @@ SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgidp, old_gid_t __user *, egid
127 100
128SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid) 101SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid)
129{ 102{
130 long ret = sys_setfsuid(low2highuid(uid)); 103 return sys_setfsuid(low2highuid(uid));
131 /* avoid REGPARM breakage on x86: */
132 asmlinkage_protect(1, ret, uid);
133 return ret;
134} 104}
135 105
136SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid) 106SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid)
137{ 107{
138 long ret = sys_setfsgid(low2highgid(gid)); 108 return sys_setfsgid(low2highgid(gid));
139 /* avoid REGPARM breakage on x86: */
140 asmlinkage_protect(1, ret, gid);
141 return ret;
142} 109}
143 110
144static int groups16_to_user(old_gid_t __user *grouplist, 111static int groups16_to_user(old_gid_t __user *grouplist,
diff --git a/kernel/user.c b/kernel/user.c
index 8e635a18ab52..69b4c3d48cde 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,7 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/export.h> 17#include <linux/export.h>
18#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
19#include <linux/proc_fs.h> 19#include <linux/proc_ns.h>
20 20
21/* 21/*
22 * userns count is 1 for root user, 1 for init_uts_ns, 22 * userns count is 1 for root user, 1 for init_uts_ns,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e134d8f365dd..d8c30db06c5b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,7 +9,7 @@
9#include <linux/nsproxy.h> 9#include <linux/nsproxy.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/user_namespace.h> 11#include <linux/user_namespace.h>
12#include <linux/proc_fs.h> 12#include <linux/proc_ns.h>
13#include <linux/highuid.h> 13#include <linux/highuid.h>
14#include <linux/cred.h> 14#include <linux/cred.h>
15#include <linux/securebits.h> 15#include <linux/securebits.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index a47fc5de3113..2fc8576efaa8 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,7 +15,7 @@
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_ns.h>
19 19
20static struct uts_namespace *create_uts_ns(void) 20static struct uts_namespace *create_uts_ns(void)
21{ 21{
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 154aa12af48e..4aa9f5bc6b2d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,6 +46,7 @@
46#include <linux/rculist.h> 46#include <linux/rculist.h>
47#include <linux/nodemask.h> 47#include <linux/nodemask.h>
48#include <linux/moduleparam.h> 48#include <linux/moduleparam.h>
49#include <linux/uaccess.h>
49 50
50#include "workqueue_internal.h" 51#include "workqueue_internal.h"
51 52
@@ -2197,6 +2198,7 @@ __acquires(&pool->lock)
2197 worker->current_work = NULL; 2198 worker->current_work = NULL;
2198 worker->current_func = NULL; 2199 worker->current_func = NULL;
2199 worker->current_pwq = NULL; 2200 worker->current_pwq = NULL;
2201 worker->desc_valid = false;
2200 pwq_dec_nr_in_flight(pwq, work_color); 2202 pwq_dec_nr_in_flight(pwq, work_color);
2201} 2203}
2202 2204
@@ -4365,6 +4367,83 @@ unsigned int work_busy(struct work_struct *work)
4365} 4367}
4366EXPORT_SYMBOL_GPL(work_busy); 4368EXPORT_SYMBOL_GPL(work_busy);
4367 4369
4370/**
4371 * set_worker_desc - set description for the current work item
4372 * @fmt: printf-style format string
4373 * @...: arguments for the format string
4374 *
4375 * This function can be called by a running work function to describe what
4376 * the work item is about. If the worker task gets dumped, this
4377 * information will be printed out together to help debugging. The
4378 * description can be at most WORKER_DESC_LEN including the trailing '\0'.
4379 */
4380void set_worker_desc(const char *fmt, ...)
4381{
4382 struct worker *worker = current_wq_worker();
4383 va_list args;
4384
4385 if (worker) {
4386 va_start(args, fmt);
4387 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4388 va_end(args);
4389 worker->desc_valid = true;
4390 }
4391}
4392
4393/**
4394 * print_worker_info - print out worker information and description
4395 * @log_lvl: the log level to use when printing
4396 * @task: target task
4397 *
4398 * If @task is a worker and currently executing a work item, print out the
4399 * name of the workqueue being serviced and worker description set with
4400 * set_worker_desc() by the currently executing work item.
4401 *
4402 * This function can be safely called on any task as long as the
4403 * task_struct itself is accessible. While safe, this function isn't
4404 * synchronized and may print out mixups or garbages of limited length.
4405 */
4406void print_worker_info(const char *log_lvl, struct task_struct *task)
4407{
4408 work_func_t *fn = NULL;
4409 char name[WQ_NAME_LEN] = { };
4410 char desc[WORKER_DESC_LEN] = { };
4411 struct pool_workqueue *pwq = NULL;
4412 struct workqueue_struct *wq = NULL;
4413 bool desc_valid = false;
4414 struct worker *worker;
4415
4416 if (!(task->flags & PF_WQ_WORKER))
4417 return;
4418
4419 /*
4420 * This function is called without any synchronization and @task
4421 * could be in any state. Be careful with dereferences.
4422 */
4423 worker = probe_kthread_data(task);
4424
4425 /*
4426 * Carefully copy the associated workqueue's workfn and name. Keep
4427 * the original last '\0' in case the original contains garbage.
4428 */
4429 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4430 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4431 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4432 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4433
4434 /* copy worker description */
4435 probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
4436 if (desc_valid)
4437 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4438
4439 if (fn || name[0] || desc[0]) {
4440 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4441 if (desc[0])
4442 pr_cont(" (%s)", desc);
4443 pr_cont("\n");
4444 }
4445}
4446
4368/* 4447/*
4369 * CPU hotplug. 4448 * CPU hotplug.
4370 * 4449 *
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 84ab6e1dc6fb..ad83c96b2ece 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -29,15 +29,25 @@ struct worker {
29 struct work_struct *current_work; /* L: work being processed */ 29 struct work_struct *current_work; /* L: work being processed */
30 work_func_t current_func; /* L: current_work's fn */ 30 work_func_t current_func; /* L: current_work's fn */
31 struct pool_workqueue *current_pwq; /* L: current_work's pwq */ 31 struct pool_workqueue *current_pwq; /* L: current_work's pwq */
32 bool desc_valid; /* ->desc is valid */
32 struct list_head scheduled; /* L: scheduled works */ 33 struct list_head scheduled; /* L: scheduled works */
34
35 /* 64 bytes boundary on 64bit, 32 on 32bit */
36
33 struct task_struct *task; /* I: worker task */ 37 struct task_struct *task; /* I: worker task */
34 struct worker_pool *pool; /* I: the associated pool */ 38 struct worker_pool *pool; /* I: the associated pool */
35 /* L: for rescuers */ 39 /* L: for rescuers */
36 /* 64 bytes boundary on 64bit, 32 on 32bit */ 40
37 unsigned long last_active; /* L: last active timestamp */ 41 unsigned long last_active; /* L: last active timestamp */
38 unsigned int flags; /* X: flags */ 42 unsigned int flags; /* X: flags */
39 int id; /* I: worker id */ 43 int id; /* I: worker id */
40 44
45 /*
46 * Opaque string set with work_set_desc(). Printed out with task
47 * dump for debugging - WARN, BUG, panic or sysrq.
48 */
49 char desc[WORKER_DESC_LEN];
50
41 /* used only by rescuers to point to the target workqueue */ 51 /* used only by rescuers to point to the target workqueue */
42 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ 52 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
43}; 53};