diff options
author | David S. Miller <davem@davemloft.net> | 2009-12-11 20:12:17 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-12-11 20:12:17 -0500 |
commit | 501706565b2d4d2d40d0d301d5411ede099b8a6f (patch) | |
tree | 142a18bf1f1e74a09dbfa27540b893ade0fd797d /kernel | |
parent | e93737b0f0159a61772894943199fd3b6f315641 (diff) | |
parent | 2fe77b81c77eed92c4c0439f74c8148a295b4a86 (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts:
include/net/tcp.h
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 5 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/hrtimer.c | 3 | ||||
-rw-r--r-- | kernel/irq/manage.c | 2 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 2 | ||||
-rw-r--r-- | kernel/itimer.c | 7 | ||||
-rw-r--r-- | kernel/kgdb.c | 56 | ||||
-rw-r--r-- | kernel/perf_event.c | 4 | ||||
-rw-r--r-- | kernel/pm_qos_params.c | 20 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 5 | ||||
-rw-r--r-- | kernel/resource.c | 26 | ||||
-rw-r--r-- | kernel/sys.c | 14 | ||||
-rw-r--r-- | kernel/time.c | 1 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 13 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 99 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 141 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 125 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 10 | ||||
-rw-r--r-- | kernel/user-return-notifier.c | 6 | ||||
-rw-r--r-- | kernel/workqueue.c | 131 |
21 files changed, 492 insertions, 184 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 6ba0f1ecb212..7c4e2713df0a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -392,10 +392,9 @@ int disable_nonboot_cpus(void) | |||
392 | if (cpu == first_cpu) | 392 | if (cpu == first_cpu) |
393 | continue; | 393 | continue; |
394 | error = _cpu_down(cpu, 1); | 394 | error = _cpu_down(cpu, 1); |
395 | if (!error) { | 395 | if (!error) |
396 | cpumask_set_cpu(cpu, frozen_cpus); | 396 | cpumask_set_cpu(cpu, frozen_cpus); |
397 | printk("CPU%d is down\n", cpu); | 397 | else { |
398 | } else { | ||
399 | printk(KERN_ERR "Error taking CPU%d down: %d\n", | 398 | printk(KERN_ERR "Error taking CPU%d down: %d\n", |
400 | cpu, error); | 399 | cpu, error); |
401 | break; | 400 | break; |
diff --git a/kernel/exit.c b/kernel/exit.c index 1143012951e9..6f50ef55a6f3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -971,7 +971,7 @@ NORET_TYPE void do_exit(long code) | |||
971 | exit_thread(); | 971 | exit_thread(); |
972 | cgroup_exit(tsk, 1); | 972 | cgroup_exit(tsk, 1); |
973 | 973 | ||
974 | if (group_dead && tsk->signal->leader) | 974 | if (group_dead) |
975 | disassociate_ctty(1); | 975 | disassociate_ctty(1); |
976 | 976 | ||
977 | module_put(task_thread_info(tsk)->exec_domain->module); | 977 | module_put(task_thread_info(tsk)->exec_domain->module); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3e1c36e7998f..ede527708123 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1238,7 +1238,8 @@ hrtimer_interrupt_hanging(struct clock_event_device *dev, | |||
1238 | force_clock_reprogram = 1; | 1238 | force_clock_reprogram = 1; |
1239 | dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; | 1239 | dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; |
1240 | printk(KERN_WARNING "hrtimer: interrupt too slow, " | 1240 | printk(KERN_WARNING "hrtimer: interrupt too slow, " |
1241 | "forcing clock min delta to %lu ns\n", dev->min_delta_ns); | 1241 | "forcing clock min delta to %llu ns\n", |
1242 | (unsigned long long) dev->min_delta_ns); | ||
1242 | } | 1243 | } |
1243 | /* | 1244 | /* |
1244 | * High resolution timer interrupt | 1245 | * High resolution timer interrupt |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index bde4c667d24d..7305b297d1eb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -1067,7 +1067,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
1067 | kfree(action); | 1067 | kfree(action); |
1068 | 1068 | ||
1069 | #ifdef CONFIG_DEBUG_SHIRQ | 1069 | #ifdef CONFIG_DEBUG_SHIRQ |
1070 | if (irqflags & IRQF_SHARED) { | 1070 | if (!retval && (irqflags & IRQF_SHARED)) { |
1071 | /* | 1071 | /* |
1072 | * It's a shared IRQ -- the driver ought to be prepared for it | 1072 | * It's a shared IRQ -- the driver ought to be prepared for it |
1073 | * to happen immediately, so let's make sure.... | 1073 | * to happen immediately, so let's make sure.... |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 22b0a6eedf24..e49ea1c5232d 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -220,7 +220,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
220 | /* | 220 | /* |
221 | * If we are seeing only the odd spurious IRQ caused by | 221 | * If we are seeing only the odd spurious IRQ caused by |
222 | * bus asynchronicity then don't eventually trigger an error, | 222 | * bus asynchronicity then don't eventually trigger an error, |
223 | * otherwise the couter becomes a doomsday timer for otherwise | 223 | * otherwise the counter becomes a doomsday timer for otherwise |
224 | * working systems | 224 | * working systems |
225 | */ | 225 | */ |
226 | if (time_after(jiffies, desc->last_unhandled + HZ/10)) | 226 | if (time_after(jiffies, desc->last_unhandled + HZ/10)) |
diff --git a/kernel/itimer.c b/kernel/itimer.c index b03451ede528..d802883153da 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
@@ -146,6 +146,7 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, | |||
146 | { | 146 | { |
147 | cputime_t cval, nval, cinterval, ninterval; | 147 | cputime_t cval, nval, cinterval, ninterval; |
148 | s64 ns_ninterval, ns_nval; | 148 | s64 ns_ninterval, ns_nval; |
149 | u32 error, incr_error; | ||
149 | struct cpu_itimer *it = &tsk->signal->it[clock_id]; | 150 | struct cpu_itimer *it = &tsk->signal->it[clock_id]; |
150 | 151 | ||
151 | nval = timeval_to_cputime(&value->it_value); | 152 | nval = timeval_to_cputime(&value->it_value); |
@@ -153,8 +154,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, | |||
153 | ninterval = timeval_to_cputime(&value->it_interval); | 154 | ninterval = timeval_to_cputime(&value->it_interval); |
154 | ns_ninterval = timeval_to_ns(&value->it_interval); | 155 | ns_ninterval = timeval_to_ns(&value->it_interval); |
155 | 156 | ||
156 | it->incr_error = cputime_sub_ns(ninterval, ns_ninterval); | 157 | error = cputime_sub_ns(nval, ns_nval); |
157 | it->error = cputime_sub_ns(nval, ns_nval); | 158 | incr_error = cputime_sub_ns(ninterval, ns_ninterval); |
158 | 159 | ||
159 | spin_lock_irq(&tsk->sighand->siglock); | 160 | spin_lock_irq(&tsk->sighand->siglock); |
160 | 161 | ||
@@ -168,6 +169,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, | |||
168 | } | 169 | } |
169 | it->expires = nval; | 170 | it->expires = nval; |
170 | it->incr = ninterval; | 171 | it->incr = ninterval; |
172 | it->error = error; | ||
173 | it->incr_error = incr_error; | ||
171 | trace_itimer_state(clock_id == CPUCLOCK_VIRT ? | 174 | trace_itimer_state(clock_id == CPUCLOCK_VIRT ? |
172 | ITIMER_VIRTUAL : ITIMER_PROF, value, nval); | 175 | ITIMER_VIRTUAL : ITIMER_PROF, value, nval); |
173 | 176 | ||
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 7d7014634022..2eb517e23514 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
@@ -129,6 +129,7 @@ struct task_struct *kgdb_usethread; | |||
129 | struct task_struct *kgdb_contthread; | 129 | struct task_struct *kgdb_contthread; |
130 | 130 | ||
131 | int kgdb_single_step; | 131 | int kgdb_single_step; |
132 | pid_t kgdb_sstep_pid; | ||
132 | 133 | ||
133 | /* Our I/O buffers. */ | 134 | /* Our I/O buffers. */ |
134 | static char remcom_in_buffer[BUFMAX]; | 135 | static char remcom_in_buffer[BUFMAX]; |
@@ -541,12 +542,17 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid) | |||
541 | */ | 542 | */ |
542 | if (tid == 0 || tid == -1) | 543 | if (tid == 0 || tid == -1) |
543 | tid = -atomic_read(&kgdb_active) - 2; | 544 | tid = -atomic_read(&kgdb_active) - 2; |
544 | if (tid < 0) { | 545 | if (tid < -1 && tid > -NR_CPUS - 2) { |
545 | if (kgdb_info[-tid - 2].task) | 546 | if (kgdb_info[-tid - 2].task) |
546 | return kgdb_info[-tid - 2].task; | 547 | return kgdb_info[-tid - 2].task; |
547 | else | 548 | else |
548 | return idle_task(-tid - 2); | 549 | return idle_task(-tid - 2); |
549 | } | 550 | } |
551 | if (tid <= 0) { | ||
552 | printk(KERN_ERR "KGDB: Internal thread select error\n"); | ||
553 | dump_stack(); | ||
554 | return NULL; | ||
555 | } | ||
550 | 556 | ||
551 | /* | 557 | /* |
552 | * find_task_by_pid_ns() does not take the tasklist lock anymore | 558 | * find_task_by_pid_ns() does not take the tasklist lock anymore |
@@ -619,7 +625,8 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) | |||
619 | static int kgdb_activate_sw_breakpoints(void) | 625 | static int kgdb_activate_sw_breakpoints(void) |
620 | { | 626 | { |
621 | unsigned long addr; | 627 | unsigned long addr; |
622 | int error = 0; | 628 | int error; |
629 | int ret = 0; | ||
623 | int i; | 630 | int i; |
624 | 631 | ||
625 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 632 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
@@ -629,13 +636,16 @@ static int kgdb_activate_sw_breakpoints(void) | |||
629 | addr = kgdb_break[i].bpt_addr; | 636 | addr = kgdb_break[i].bpt_addr; |
630 | error = kgdb_arch_set_breakpoint(addr, | 637 | error = kgdb_arch_set_breakpoint(addr, |
631 | kgdb_break[i].saved_instr); | 638 | kgdb_break[i].saved_instr); |
632 | if (error) | 639 | if (error) { |
633 | return error; | 640 | ret = error; |
641 | printk(KERN_INFO "KGDB: BP install failed: %lx", addr); | ||
642 | continue; | ||
643 | } | ||
634 | 644 | ||
635 | kgdb_flush_swbreak_addr(addr); | 645 | kgdb_flush_swbreak_addr(addr); |
636 | kgdb_break[i].state = BP_ACTIVE; | 646 | kgdb_break[i].state = BP_ACTIVE; |
637 | } | 647 | } |
638 | return 0; | 648 | return ret; |
639 | } | 649 | } |
640 | 650 | ||
641 | static int kgdb_set_sw_break(unsigned long addr) | 651 | static int kgdb_set_sw_break(unsigned long addr) |
@@ -682,7 +692,8 @@ static int kgdb_set_sw_break(unsigned long addr) | |||
682 | static int kgdb_deactivate_sw_breakpoints(void) | 692 | static int kgdb_deactivate_sw_breakpoints(void) |
683 | { | 693 | { |
684 | unsigned long addr; | 694 | unsigned long addr; |
685 | int error = 0; | 695 | int error; |
696 | int ret = 0; | ||
686 | int i; | 697 | int i; |
687 | 698 | ||
688 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 699 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
@@ -691,13 +702,15 @@ static int kgdb_deactivate_sw_breakpoints(void) | |||
691 | addr = kgdb_break[i].bpt_addr; | 702 | addr = kgdb_break[i].bpt_addr; |
692 | error = kgdb_arch_remove_breakpoint(addr, | 703 | error = kgdb_arch_remove_breakpoint(addr, |
693 | kgdb_break[i].saved_instr); | 704 | kgdb_break[i].saved_instr); |
694 | if (error) | 705 | if (error) { |
695 | return error; | 706 | printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); |
707 | ret = error; | ||
708 | } | ||
696 | 709 | ||
697 | kgdb_flush_swbreak_addr(addr); | 710 | kgdb_flush_swbreak_addr(addr); |
698 | kgdb_break[i].state = BP_SET; | 711 | kgdb_break[i].state = BP_SET; |
699 | } | 712 | } |
700 | return 0; | 713 | return ret; |
701 | } | 714 | } |
702 | 715 | ||
703 | static int kgdb_remove_sw_break(unsigned long addr) | 716 | static int kgdb_remove_sw_break(unsigned long addr) |
@@ -1204,8 +1217,10 @@ static int gdb_cmd_exception_pass(struct kgdb_state *ks) | |||
1204 | return 1; | 1217 | return 1; |
1205 | 1218 | ||
1206 | } else { | 1219 | } else { |
1207 | error_packet(remcom_out_buffer, -EINVAL); | 1220 | kgdb_msg_write("KGDB only knows signal 9 (pass)" |
1208 | return 0; | 1221 | " and 15 (pass and disconnect)\n" |
1222 | "Executing a continue without signal passing\n", 0); | ||
1223 | remcom_in_buffer[0] = 'c'; | ||
1209 | } | 1224 | } |
1210 | 1225 | ||
1211 | /* Indicate fall through */ | 1226 | /* Indicate fall through */ |
@@ -1395,6 +1410,7 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
1395 | struct kgdb_state kgdb_var; | 1410 | struct kgdb_state kgdb_var; |
1396 | struct kgdb_state *ks = &kgdb_var; | 1411 | struct kgdb_state *ks = &kgdb_var; |
1397 | unsigned long flags; | 1412 | unsigned long flags; |
1413 | int sstep_tries = 100; | ||
1398 | int error = 0; | 1414 | int error = 0; |
1399 | int i, cpu; | 1415 | int i, cpu; |
1400 | 1416 | ||
@@ -1425,13 +1441,14 @@ acquirelock: | |||
1425 | cpu_relax(); | 1441 | cpu_relax(); |
1426 | 1442 | ||
1427 | /* | 1443 | /* |
1428 | * Do not start the debugger connection on this CPU if the last | 1444 | * For single stepping, try to only enter on the processor |
1429 | * instance of the exception handler wanted to come into the | 1445 | * that was single stepping. To gaurd against a deadlock, the |
1430 | * debugger on a different CPU via a single step | 1446 | * kernel will only try for the value of sstep_tries before |
1447 | * giving up and continuing on. | ||
1431 | */ | 1448 | */ |
1432 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && | 1449 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && |
1433 | atomic_read(&kgdb_cpu_doing_single_step) != cpu) { | 1450 | (kgdb_info[cpu].task && |
1434 | 1451 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { | |
1435 | atomic_set(&kgdb_active, -1); | 1452 | atomic_set(&kgdb_active, -1); |
1436 | touch_softlockup_watchdog(); | 1453 | touch_softlockup_watchdog(); |
1437 | clocksource_touch_watchdog(); | 1454 | clocksource_touch_watchdog(); |
@@ -1524,6 +1541,13 @@ acquirelock: | |||
1524 | } | 1541 | } |
1525 | 1542 | ||
1526 | kgdb_restore: | 1543 | kgdb_restore: |
1544 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { | ||
1545 | int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step); | ||
1546 | if (kgdb_info[sstep_cpu].task) | ||
1547 | kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid; | ||
1548 | else | ||
1549 | kgdb_sstep_pid = 0; | ||
1550 | } | ||
1527 | /* Free kgdb_active */ | 1551 | /* Free kgdb_active */ |
1528 | atomic_set(&kgdb_active, -1); | 1552 | atomic_set(&kgdb_active, -1); |
1529 | touch_softlockup_watchdog(); | 1553 | touch_softlockup_watchdog(); |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6b7ddba1dd64..40a996ec39fa 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -476,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event) | |||
476 | if (!task) { | 476 | if (!task) { |
477 | /* | 477 | /* |
478 | * Per cpu events are removed via an smp call and | 478 | * Per cpu events are removed via an smp call and |
479 | * the removal is always sucessful. | 479 | * the removal is always successful. |
480 | */ | 480 | */ |
481 | smp_call_function_single(event->cpu, | 481 | smp_call_function_single(event->cpu, |
482 | __perf_event_remove_from_context, | 482 | __perf_event_remove_from_context, |
@@ -845,7 +845,7 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
845 | if (!task) { | 845 | if (!task) { |
846 | /* | 846 | /* |
847 | * Per cpu events are installed via an smp call and | 847 | * Per cpu events are installed via an smp call and |
848 | * the install is always sucessful. | 848 | * the install is always successful. |
849 | */ | 849 | */ |
850 | smp_call_function_single(cpu, __perf_install_in_context, | 850 | smp_call_function_single(cpu, __perf_install_in_context, |
851 | event, 1); | 851 | event, 1); |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index dfdec524d1b7..3db49b9ca374 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -29,7 +29,6 @@ | |||
29 | 29 | ||
30 | #include <linux/pm_qos_params.h> | 30 | #include <linux/pm_qos_params.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <linux/smp_lock.h> | ||
33 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
34 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
35 | #include <linux/time.h> | 34 | #include <linux/time.h> |
@@ -344,37 +343,33 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) | |||
344 | } | 343 | } |
345 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); | 344 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); |
346 | 345 | ||
347 | #define PID_NAME_LEN sizeof("process_1234567890") | 346 | #define PID_NAME_LEN 32 |
348 | static char name[PID_NAME_LEN]; | ||
349 | 347 | ||
350 | static int pm_qos_power_open(struct inode *inode, struct file *filp) | 348 | static int pm_qos_power_open(struct inode *inode, struct file *filp) |
351 | { | 349 | { |
352 | int ret; | 350 | int ret; |
353 | long pm_qos_class; | 351 | long pm_qos_class; |
352 | char name[PID_NAME_LEN]; | ||
354 | 353 | ||
355 | lock_kernel(); | ||
356 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); | 354 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); |
357 | if (pm_qos_class >= 0) { | 355 | if (pm_qos_class >= 0) { |
358 | filp->private_data = (void *)pm_qos_class; | 356 | filp->private_data = (void *)pm_qos_class; |
359 | sprintf(name, "process_%d", current->pid); | 357 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); |
360 | ret = pm_qos_add_requirement(pm_qos_class, name, | 358 | ret = pm_qos_add_requirement(pm_qos_class, name, |
361 | PM_QOS_DEFAULT_VALUE); | 359 | PM_QOS_DEFAULT_VALUE); |
362 | if (ret >= 0) { | 360 | if (ret >= 0) |
363 | unlock_kernel(); | ||
364 | return 0; | 361 | return 0; |
365 | } | ||
366 | } | 362 | } |
367 | unlock_kernel(); | ||
368 | |||
369 | return -EPERM; | 363 | return -EPERM; |
370 | } | 364 | } |
371 | 365 | ||
372 | static int pm_qos_power_release(struct inode *inode, struct file *filp) | 366 | static int pm_qos_power_release(struct inode *inode, struct file *filp) |
373 | { | 367 | { |
374 | int pm_qos_class; | 368 | int pm_qos_class; |
369 | char name[PID_NAME_LEN]; | ||
375 | 370 | ||
376 | pm_qos_class = (long)filp->private_data; | 371 | pm_qos_class = (long)filp->private_data; |
377 | sprintf(name, "process_%d", current->pid); | 372 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); |
378 | pm_qos_remove_requirement(pm_qos_class, name); | 373 | pm_qos_remove_requirement(pm_qos_class, name); |
379 | 374 | ||
380 | return 0; | 375 | return 0; |
@@ -385,13 +380,14 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
385 | { | 380 | { |
386 | s32 value; | 381 | s32 value; |
387 | int pm_qos_class; | 382 | int pm_qos_class; |
383 | char name[PID_NAME_LEN]; | ||
388 | 384 | ||
389 | pm_qos_class = (long)filp->private_data; | 385 | pm_qos_class = (long)filp->private_data; |
390 | if (count != sizeof(s32)) | 386 | if (count != sizeof(s32)) |
391 | return -EINVAL; | 387 | return -EINVAL; |
392 | if (copy_from_user(&value, buf, sizeof(s32))) | 388 | if (copy_from_user(&value, buf, sizeof(s32))) |
393 | return -EFAULT; | 389 | return -EFAULT; |
394 | sprintf(name, "process_%d", current->pid); | 390 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); |
395 | pm_qos_update_requirement(pm_qos_class, name, value); | 391 | pm_qos_update_requirement(pm_qos_class, name, value); |
396 | 392 | ||
397 | return sizeof(s32); | 393 | return sizeof(s32); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 5c9dc228747b..438ff4523513 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -384,7 +384,8 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
384 | 384 | ||
385 | /* | 385 | /* |
386 | * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. | 386 | * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. |
387 | * This is called from sys_timer_create with the new timer already locked. | 387 | * This is called from sys_timer_create() and do_cpu_nanosleep() with the |
388 | * new timer already all-zeros initialized. | ||
388 | */ | 389 | */ |
389 | int posix_cpu_timer_create(struct k_itimer *new_timer) | 390 | int posix_cpu_timer_create(struct k_itimer *new_timer) |
390 | { | 391 | { |
@@ -396,8 +397,6 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
396 | return -EINVAL; | 397 | return -EINVAL; |
397 | 398 | ||
398 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); | 399 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); |
399 | new_timer->it.cpu.incr.sched = 0; | ||
400 | new_timer->it.cpu.expires.sched = 0; | ||
401 | 400 | ||
402 | read_lock(&tasklist_lock); | 401 | read_lock(&tasklist_lock); |
403 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { | 402 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { |
diff --git a/kernel/resource.c b/kernel/resource.c index fb11a58b9594..dc15686b7a77 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -308,35 +308,37 @@ static int find_resource(struct resource *root, struct resource *new, | |||
308 | void *alignf_data) | 308 | void *alignf_data) |
309 | { | 309 | { |
310 | struct resource *this = root->child; | 310 | struct resource *this = root->child; |
311 | resource_size_t start, end; | ||
311 | 312 | ||
312 | new->start = root->start; | 313 | start = root->start; |
313 | /* | 314 | /* |
314 | * Skip past an allocated resource that starts at 0, since the assignment | 315 | * Skip past an allocated resource that starts at 0, since the assignment |
315 | * of this->start - 1 to new->end below would cause an underflow. | 316 | * of this->start - 1 to new->end below would cause an underflow. |
316 | */ | 317 | */ |
317 | if (this && this->start == 0) { | 318 | if (this && this->start == 0) { |
318 | new->start = this->end + 1; | 319 | start = this->end + 1; |
319 | this = this->sibling; | 320 | this = this->sibling; |
320 | } | 321 | } |
321 | for(;;) { | 322 | for(;;) { |
322 | if (this) | 323 | if (this) |
323 | new->end = this->start - 1; | 324 | end = this->start - 1; |
324 | else | 325 | else |
325 | new->end = root->end; | 326 | end = root->end; |
326 | if (new->start < min) | 327 | if (start < min) |
327 | new->start = min; | 328 | start = min; |
328 | if (new->end > max) | 329 | if (end > max) |
329 | new->end = max; | 330 | end = max; |
330 | new->start = ALIGN(new->start, align); | 331 | start = ALIGN(start, align); |
331 | if (alignf) | 332 | if (alignf) |
332 | alignf(alignf_data, new, size, align); | 333 | alignf(alignf_data, new, size, align); |
333 | if (new->start < new->end && new->end - new->start >= size - 1) { | 334 | if (start < end && end - start >= size - 1) { |
334 | new->end = new->start + size - 1; | 335 | new->start = start; |
336 | new->end = start + size - 1; | ||
335 | return 0; | 337 | return 0; |
336 | } | 338 | } |
337 | if (!this) | 339 | if (!this) |
338 | break; | 340 | break; |
339 | new->start = this->end + 1; | 341 | start = this->end + 1; |
340 | this = this->sibling; | 342 | this = this->sibling; |
341 | } | 343 | } |
342 | return -EBUSY; | 344 | return -EBUSY; |
diff --git a/kernel/sys.c b/kernel/sys.c index 9968c5fb55b9..585d6cd10040 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/utsname.h> | 9 | #include <linux/utsname.h> |
10 | #include <linux/mman.h> | 10 | #include <linux/mman.h> |
11 | #include <linux/smp_lock.h> | ||
12 | #include <linux/notifier.h> | 11 | #include <linux/notifier.h> |
13 | #include <linux/reboot.h> | 12 | #include <linux/reboot.h> |
14 | #include <linux/prctl.h> | 13 | #include <linux/prctl.h> |
@@ -349,6 +348,9 @@ void kernel_power_off(void) | |||
349 | machine_power_off(); | 348 | machine_power_off(); |
350 | } | 349 | } |
351 | EXPORT_SYMBOL_GPL(kernel_power_off); | 350 | EXPORT_SYMBOL_GPL(kernel_power_off); |
351 | |||
352 | static DEFINE_MUTEX(reboot_mutex); | ||
353 | |||
352 | /* | 354 | /* |
353 | * Reboot system call: for obvious reasons only root may call it, | 355 | * Reboot system call: for obvious reasons only root may call it, |
354 | * and even root needs to set up some magic numbers in the registers | 356 | * and even root needs to set up some magic numbers in the registers |
@@ -381,7 +383,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
381 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) | 383 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) |
382 | cmd = LINUX_REBOOT_CMD_HALT; | 384 | cmd = LINUX_REBOOT_CMD_HALT; |
383 | 385 | ||
384 | lock_kernel(); | 386 | mutex_lock(&reboot_mutex); |
385 | switch (cmd) { | 387 | switch (cmd) { |
386 | case LINUX_REBOOT_CMD_RESTART: | 388 | case LINUX_REBOOT_CMD_RESTART: |
387 | kernel_restart(NULL); | 389 | kernel_restart(NULL); |
@@ -397,20 +399,18 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
397 | 399 | ||
398 | case LINUX_REBOOT_CMD_HALT: | 400 | case LINUX_REBOOT_CMD_HALT: |
399 | kernel_halt(); | 401 | kernel_halt(); |
400 | unlock_kernel(); | ||
401 | do_exit(0); | 402 | do_exit(0); |
402 | panic("cannot halt"); | 403 | panic("cannot halt"); |
403 | 404 | ||
404 | case LINUX_REBOOT_CMD_POWER_OFF: | 405 | case LINUX_REBOOT_CMD_POWER_OFF: |
405 | kernel_power_off(); | 406 | kernel_power_off(); |
406 | unlock_kernel(); | ||
407 | do_exit(0); | 407 | do_exit(0); |
408 | break; | 408 | break; |
409 | 409 | ||
410 | case LINUX_REBOOT_CMD_RESTART2: | 410 | case LINUX_REBOOT_CMD_RESTART2: |
411 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { | 411 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { |
412 | unlock_kernel(); | 412 | ret = -EFAULT; |
413 | return -EFAULT; | 413 | break; |
414 | } | 414 | } |
415 | buffer[sizeof(buffer) - 1] = '\0'; | 415 | buffer[sizeof(buffer) - 1] = '\0'; |
416 | 416 | ||
@@ -433,7 +433,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
433 | ret = -EINVAL; | 433 | ret = -EINVAL; |
434 | break; | 434 | break; |
435 | } | 435 | } |
436 | unlock_kernel(); | 436 | mutex_unlock(&reboot_mutex); |
437 | return ret; | 437 | return ret; |
438 | } | 438 | } |
439 | 439 | ||
diff --git a/kernel/time.c b/kernel/time.c index 804798005d19..c6324d96009e 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -136,7 +136,6 @@ static inline void warp_clock(void) | |||
136 | write_seqlock_irq(&xtime_lock); | 136 | write_seqlock_irq(&xtime_lock); |
137 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 137 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; |
138 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 138 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; |
139 | update_xtime_cache(0); | ||
140 | write_sequnlock_irq(&xtime_lock); | 139 | write_sequnlock_irq(&xtime_lock); |
141 | clock_was_set(); | 140 | clock_was_set(); |
142 | } | 141 | } |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 620b58abdc32..20a8920029ee 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/tick.h> | 21 | #include <linux/tick.h> |
22 | 22 | ||
23 | #include "tick-internal.h" | ||
24 | |||
23 | /* The registered clock event devices */ | 25 | /* The registered clock event devices */ |
24 | static LIST_HEAD(clockevent_devices); | 26 | static LIST_HEAD(clockevent_devices); |
25 | static LIST_HEAD(clockevents_released); | 27 | static LIST_HEAD(clockevents_released); |
@@ -37,10 +39,9 @@ static DEFINE_SPINLOCK(clockevents_lock); | |||
37 | * | 39 | * |
38 | * Math helper, returns latch value converted to nanoseconds (bound checked) | 40 | * Math helper, returns latch value converted to nanoseconds (bound checked) |
39 | */ | 41 | */ |
40 | unsigned long clockevent_delta2ns(unsigned long latch, | 42 | u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) |
41 | struct clock_event_device *evt) | ||
42 | { | 43 | { |
43 | u64 clc = ((u64) latch << evt->shift); | 44 | u64 clc = (u64) latch << evt->shift; |
44 | 45 | ||
45 | if (unlikely(!evt->mult)) { | 46 | if (unlikely(!evt->mult)) { |
46 | evt->mult = 1; | 47 | evt->mult = 1; |
@@ -50,10 +51,10 @@ unsigned long clockevent_delta2ns(unsigned long latch, | |||
50 | do_div(clc, evt->mult); | 51 | do_div(clc, evt->mult); |
51 | if (clc < 1000) | 52 | if (clc < 1000) |
52 | clc = 1000; | 53 | clc = 1000; |
53 | if (clc > LONG_MAX) | 54 | if (clc > KTIME_MAX) |
54 | clc = LONG_MAX; | 55 | clc = KTIME_MAX; |
55 | 56 | ||
56 | return (unsigned long) clc; | 57 | return clc; |
57 | } | 58 | } |
58 | EXPORT_SYMBOL_GPL(clockevent_delta2ns); | 59 | EXPORT_SYMBOL_GPL(clockevent_delta2ns); |
59 | 60 | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4a310906b3e8..e85c23404d34 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -107,6 +107,59 @@ u64 timecounter_cyc2time(struct timecounter *tc, | |||
107 | } | 107 | } |
108 | EXPORT_SYMBOL_GPL(timecounter_cyc2time); | 108 | EXPORT_SYMBOL_GPL(timecounter_cyc2time); |
109 | 109 | ||
110 | /** | ||
111 | * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks | ||
112 | * @mult: pointer to mult variable | ||
113 | * @shift: pointer to shift variable | ||
114 | * @from: frequency to convert from | ||
115 | * @to: frequency to convert to | ||
116 | * @minsec: guaranteed runtime conversion range in seconds | ||
117 | * | ||
118 | * The function evaluates the shift/mult pair for the scaled math | ||
119 | * operations of clocksources and clockevents. | ||
120 | * | ||
121 | * @to and @from are frequency values in HZ. For clock sources @to is | ||
122 | * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock | ||
123 | * event @to is the counter frequency and @from is NSEC_PER_SEC. | ||
124 | * | ||
125 | * The @minsec conversion range argument controls the time frame in | ||
126 | * seconds which must be covered by the runtime conversion with the | ||
127 | * calculated mult and shift factors. This guarantees that no 64bit | ||
128 | * overflow happens when the input value of the conversion is | ||
129 | * multiplied with the calculated mult factor. Larger ranges may | ||
130 | * reduce the conversion accuracy by chosing smaller mult and shift | ||
131 | * factors. | ||
132 | */ | ||
133 | void | ||
134 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) | ||
135 | { | ||
136 | u64 tmp; | ||
137 | u32 sft, sftacc= 32; | ||
138 | |||
139 | /* | ||
140 | * Calculate the shift factor which is limiting the conversion | ||
141 | * range: | ||
142 | */ | ||
143 | tmp = ((u64)minsec * from) >> 32; | ||
144 | while (tmp) { | ||
145 | tmp >>=1; | ||
146 | sftacc--; | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Find the conversion shift/mult pair which has the best | ||
151 | * accuracy and fits the maxsec conversion range: | ||
152 | */ | ||
153 | for (sft = 32; sft > 0; sft--) { | ||
154 | tmp = (u64) to << sft; | ||
155 | do_div(tmp, from); | ||
156 | if ((tmp >> sftacc) == 0) | ||
157 | break; | ||
158 | } | ||
159 | *mult = tmp; | ||
160 | *shift = sft; | ||
161 | } | ||
162 | |||
110 | /*[Clocksource internal variables]--------- | 163 | /*[Clocksource internal variables]--------- |
111 | * curr_clocksource: | 164 | * curr_clocksource: |
112 | * currently selected clocksource. | 165 | * currently selected clocksource. |
@@ -413,6 +466,47 @@ void clocksource_touch_watchdog(void) | |||
413 | clocksource_resume_watchdog(); | 466 | clocksource_resume_watchdog(); |
414 | } | 467 | } |
415 | 468 | ||
469 | /** | ||
470 | * clocksource_max_deferment - Returns max time the clocksource can be deferred | ||
471 | * @cs: Pointer to clocksource | ||
472 | * | ||
473 | */ | ||
474 | static u64 clocksource_max_deferment(struct clocksource *cs) | ||
475 | { | ||
476 | u64 max_nsecs, max_cycles; | ||
477 | |||
478 | /* | ||
479 | * Calculate the maximum number of cycles that we can pass to the | ||
480 | * cyc2ns function without overflowing a 64-bit signed result. The | ||
481 | * maximum number of cycles is equal to ULLONG_MAX/cs->mult which | ||
482 | * is equivalent to the below. | ||
483 | * max_cycles < (2^63)/cs->mult | ||
484 | * max_cycles < 2^(log2((2^63)/cs->mult)) | ||
485 | * max_cycles < 2^(log2(2^63) - log2(cs->mult)) | ||
486 | * max_cycles < 2^(63 - log2(cs->mult)) | ||
487 | * max_cycles < 1 << (63 - log2(cs->mult)) | ||
488 | * Please note that we add 1 to the result of the log2 to account for | ||
489 | * any rounding errors, ensure the above inequality is satisfied and | ||
490 | * no overflow will occur. | ||
491 | */ | ||
492 | max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); | ||
493 | |||
494 | /* | ||
495 | * The actual maximum number of cycles we can defer the clocksource is | ||
496 | * determined by the minimum of max_cycles and cs->mask. | ||
497 | */ | ||
498 | max_cycles = min_t(u64, max_cycles, (u64) cs->mask); | ||
499 | max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); | ||
500 | |||
501 | /* | ||
502 | * To ensure that the clocksource does not wrap whilst we are idle, | ||
503 | * limit the time the clocksource can be deferred by 12.5%. Please | ||
504 | * note a margin of 12.5% is used because this can be computed with | ||
505 | * a shift, versus say 10% which would require division. | ||
506 | */ | ||
507 | return max_nsecs - (max_nsecs >> 5); | ||
508 | } | ||
509 | |||
416 | #ifdef CONFIG_GENERIC_TIME | 510 | #ifdef CONFIG_GENERIC_TIME |
417 | 511 | ||
418 | /** | 512 | /** |
@@ -511,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
511 | */ | 605 | */ |
512 | int clocksource_register(struct clocksource *cs) | 606 | int clocksource_register(struct clocksource *cs) |
513 | { | 607 | { |
608 | /* calculate max idle time permitted for this clocksource */ | ||
609 | cs->max_idle_ns = clocksource_max_deferment(cs); | ||
610 | |||
514 | mutex_lock(&clocksource_mutex); | 611 | mutex_lock(&clocksource_mutex); |
515 | clocksource_enqueue(cs); | 612 | clocksource_enqueue(cs); |
516 | clocksource_select(); | 613 | clocksource_select(); |
@@ -580,7 +677,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, | |||
580 | * @count: length of buffer | 677 | * @count: length of buffer |
581 | * | 678 | * |
582 | * Takes input from sysfs interface for manually overriding the default | 679 | * Takes input from sysfs interface for manually overriding the default |
583 | * clocksource selction. | 680 | * clocksource selection. |
584 | */ | 681 | */ |
585 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | 682 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, |
586 | struct sysdev_attribute *attr, | 683 | struct sysdev_attribute *attr, |
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index a96c0e2b89cf..0a8a213016f0 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c | |||
@@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, | |||
50 | dev->min_delta_ns += dev->min_delta_ns >> 1; | 50 | dev->min_delta_ns += dev->min_delta_ns >> 1; |
51 | 51 | ||
52 | printk(KERN_WARNING | 52 | printk(KERN_WARNING |
53 | "CE: %s increasing min_delta_ns to %lu nsec\n", | 53 | "CE: %s increasing min_delta_ns to %llu nsec\n", |
54 | dev->name ? dev->name : "?", | 54 | dev->name ? dev->name : "?", |
55 | dev->min_delta_ns << 1); | 55 | (unsigned long long) dev->min_delta_ns << 1); |
56 | 56 | ||
57 | i = 0; | 57 | i = 0; |
58 | } | 58 | } |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 89aed5933ed4..f992762d7f51 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz); | |||
134 | * value. We do this unconditionally on any cpu, as we don't know whether the | 134 | * value. We do this unconditionally on any cpu, as we don't know whether the |
135 | * cpu, which has the update task assigned is in a long sleep. | 135 | * cpu, which has the update task assigned is in a long sleep. |
136 | */ | 136 | */ |
137 | static void tick_nohz_update_jiffies(void) | 137 | static void tick_nohz_update_jiffies(ktime_t now) |
138 | { | 138 | { |
139 | int cpu = smp_processor_id(); | 139 | int cpu = smp_processor_id(); |
140 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 140 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
141 | unsigned long flags; | 141 | unsigned long flags; |
142 | ktime_t now; | ||
143 | |||
144 | if (!ts->tick_stopped) | ||
145 | return; | ||
146 | 142 | ||
147 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | 143 | cpumask_clear_cpu(cpu, nohz_cpu_mask); |
148 | now = ktime_get(); | ||
149 | ts->idle_waketime = now; | 144 | ts->idle_waketime = now; |
150 | 145 | ||
151 | local_irq_save(flags); | 146 | local_irq_save(flags); |
@@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void) | |||
155 | touch_softlockup_watchdog(); | 150 | touch_softlockup_watchdog(); |
156 | } | 151 | } |
157 | 152 | ||
158 | static void tick_nohz_stop_idle(int cpu) | 153 | static void tick_nohz_stop_idle(int cpu, ktime_t now) |
159 | { | 154 | { |
160 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 155 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
156 | ktime_t delta; | ||
161 | 157 | ||
162 | if (ts->idle_active) { | 158 | delta = ktime_sub(now, ts->idle_entrytime); |
163 | ktime_t now, delta; | 159 | ts->idle_lastupdate = now; |
164 | now = ktime_get(); | 160 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
165 | delta = ktime_sub(now, ts->idle_entrytime); | 161 | ts->idle_active = 0; |
166 | ts->idle_lastupdate = now; | ||
167 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
168 | ts->idle_active = 0; | ||
169 | 162 | ||
170 | sched_clock_idle_wakeup_event(0); | 163 | sched_clock_idle_wakeup_event(0); |
171 | } | ||
172 | } | 164 | } |
173 | 165 | ||
174 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) | 166 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) |
@@ -216,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
216 | struct tick_sched *ts; | 208 | struct tick_sched *ts; |
217 | ktime_t last_update, expires, now; | 209 | ktime_t last_update, expires, now; |
218 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 210 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
211 | u64 time_delta; | ||
219 | int cpu; | 212 | int cpu; |
220 | 213 | ||
221 | local_irq_save(flags); | 214 | local_irq_save(flags); |
@@ -263,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
263 | 256 | ||
264 | if (ratelimit < 10) { | 257 | if (ratelimit < 10) { |
265 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 258 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", |
266 | local_softirq_pending()); | 259 | (unsigned int) local_softirq_pending()); |
267 | ratelimit++; | 260 | ratelimit++; |
268 | } | 261 | } |
269 | goto end; | 262 | goto end; |
@@ -275,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
275 | seq = read_seqbegin(&xtime_lock); | 268 | seq = read_seqbegin(&xtime_lock); |
276 | last_update = last_jiffies_update; | 269 | last_update = last_jiffies_update; |
277 | last_jiffies = jiffies; | 270 | last_jiffies = jiffies; |
271 | time_delta = timekeeping_max_deferment(); | ||
278 | } while (read_seqretry(&xtime_lock, seq)); | 272 | } while (read_seqretry(&xtime_lock, seq)); |
279 | 273 | ||
280 | /* Get the next timer wheel timer */ | 274 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
281 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 275 | arch_needs_cpu(cpu)) { |
282 | delta_jiffies = next_jiffies - last_jiffies; | 276 | next_jiffies = last_jiffies + 1; |
283 | |||
284 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) | ||
285 | delta_jiffies = 1; | 277 | delta_jiffies = 1; |
278 | } else { | ||
279 | /* Get the next timer wheel timer */ | ||
280 | next_jiffies = get_next_timer_interrupt(last_jiffies); | ||
281 | delta_jiffies = next_jiffies - last_jiffies; | ||
282 | } | ||
286 | /* | 283 | /* |
287 | * Do not stop the tick, if we are only one off | 284 | * Do not stop the tick, if we are only one off |
288 | * or if the cpu is required for rcu | 285 | * or if the cpu is required for rcu |
@@ -294,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
294 | if ((long)delta_jiffies >= 1) { | 291 | if ((long)delta_jiffies >= 1) { |
295 | 292 | ||
296 | /* | 293 | /* |
297 | * calculate the expiry time for the next timer wheel | ||
298 | * timer | ||
299 | */ | ||
300 | expires = ktime_add_ns(last_update, tick_period.tv64 * | ||
301 | delta_jiffies); | ||
302 | |||
303 | /* | ||
304 | * If this cpu is the one which updates jiffies, then | 294 | * If this cpu is the one which updates jiffies, then |
305 | * give up the assignment and let it be taken by the | 295 | * give up the assignment and let it be taken by the |
306 | * cpu which runs the tick timer next, which might be | 296 | * cpu which runs the tick timer next, which might be |
307 | * this cpu as well. If we don't drop this here the | 297 | * this cpu as well. If we don't drop this here the |
308 | * jiffies might be stale and do_timer() never | 298 | * jiffies might be stale and do_timer() never |
309 | * invoked. | 299 | * invoked. Keep track of the fact that it was the one |
300 | * which had the do_timer() duty last. If this cpu is | ||
301 | * the one which had the do_timer() duty last, we | ||
302 | * limit the sleep time to the timekeeping | ||
303 | * max_deferement value which we retrieved | ||
304 | * above. Otherwise we can sleep as long as we want. | ||
310 | */ | 305 | */ |
311 | if (cpu == tick_do_timer_cpu) | 306 | if (cpu == tick_do_timer_cpu) { |
312 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 307 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
308 | ts->do_timer_last = 1; | ||
309 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | ||
310 | time_delta = KTIME_MAX; | ||
311 | ts->do_timer_last = 0; | ||
312 | } else if (!ts->do_timer_last) { | ||
313 | time_delta = KTIME_MAX; | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * calculate the expiry time for the next timer wheel | ||
318 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals | ||
319 | * that there is no timer pending or at least extremely | ||
320 | * far into the future (12 days for HZ=1000). In this | ||
321 | * case we set the expiry to the end of time. | ||
322 | */ | ||
323 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { | ||
324 | /* | ||
325 | * Calculate the time delta for the next timer event. | ||
326 | * If the time delta exceeds the maximum time delta | ||
327 | * permitted by the current clocksource then adjust | ||
328 | * the time delta accordingly to ensure the | ||
329 | * clocksource does not wrap. | ||
330 | */ | ||
331 | time_delta = min_t(u64, time_delta, | ||
332 | tick_period.tv64 * delta_jiffies); | ||
333 | } | ||
334 | |||
335 | if (time_delta < KTIME_MAX) | ||
336 | expires = ktime_add_ns(last_update, time_delta); | ||
337 | else | ||
338 | expires.tv64 = KTIME_MAX; | ||
313 | 339 | ||
314 | if (delta_jiffies > 1) | 340 | if (delta_jiffies > 1) |
315 | cpumask_set_cpu(cpu, nohz_cpu_mask); | 341 | cpumask_set_cpu(cpu, nohz_cpu_mask); |
@@ -342,22 +368,19 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
342 | 368 | ||
343 | ts->idle_sleeps++; | 369 | ts->idle_sleeps++; |
344 | 370 | ||
371 | /* Mark expires */ | ||
372 | ts->idle_expires = expires; | ||
373 | |||
345 | /* | 374 | /* |
346 | * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that | 375 | * If the expiration time == KTIME_MAX, then |
347 | * there is no timer pending or at least extremly far | 376 | * in this case we simply stop the tick timer. |
348 | * into the future (12 days for HZ=1000). In this case | ||
349 | * we simply stop the tick timer: | ||
350 | */ | 377 | */ |
351 | if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { | 378 | if (unlikely(expires.tv64 == KTIME_MAX)) { |
352 | ts->idle_expires.tv64 = KTIME_MAX; | ||
353 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 379 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
354 | hrtimer_cancel(&ts->sched_timer); | 380 | hrtimer_cancel(&ts->sched_timer); |
355 | goto out; | 381 | goto out; |
356 | } | 382 | } |
357 | 383 | ||
358 | /* Mark expiries */ | ||
359 | ts->idle_expires = expires; | ||
360 | |||
361 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 384 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
362 | hrtimer_start(&ts->sched_timer, expires, | 385 | hrtimer_start(&ts->sched_timer, expires, |
363 | HRTIMER_MODE_ABS_PINNED); | 386 | HRTIMER_MODE_ABS_PINNED); |
@@ -436,7 +459,11 @@ void tick_nohz_restart_sched_tick(void) | |||
436 | ktime_t now; | 459 | ktime_t now; |
437 | 460 | ||
438 | local_irq_disable(); | 461 | local_irq_disable(); |
439 | tick_nohz_stop_idle(cpu); | 462 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) |
463 | now = ktime_get(); | ||
464 | |||
465 | if (ts->idle_active) | ||
466 | tick_nohz_stop_idle(cpu, now); | ||
440 | 467 | ||
441 | if (!ts->inidle || !ts->tick_stopped) { | 468 | if (!ts->inidle || !ts->tick_stopped) { |
442 | ts->inidle = 0; | 469 | ts->inidle = 0; |
@@ -450,7 +477,6 @@ void tick_nohz_restart_sched_tick(void) | |||
450 | 477 | ||
451 | /* Update jiffies first */ | 478 | /* Update jiffies first */ |
452 | select_nohz_load_balancer(0); | 479 | select_nohz_load_balancer(0); |
453 | now = ktime_get(); | ||
454 | tick_do_update_jiffies64(now); | 480 | tick_do_update_jiffies64(now); |
455 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | 481 | cpumask_clear_cpu(cpu, nohz_cpu_mask); |
456 | 482 | ||
@@ -584,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void) | |||
584 | * timer and do not touch the other magic bits which need to be done | 610 | * timer and do not touch the other magic bits which need to be done |
585 | * when idle is left. | 611 | * when idle is left. |
586 | */ | 612 | */ |
587 | static void tick_nohz_kick_tick(int cpu) | 613 | static void tick_nohz_kick_tick(int cpu, ktime_t now) |
588 | { | 614 | { |
589 | #if 0 | 615 | #if 0 |
590 | /* Switch back to 2.6.27 behaviour */ | 616 | /* Switch back to 2.6.27 behaviour */ |
591 | 617 | ||
592 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 618 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
593 | ktime_t delta, now; | 619 | ktime_t delta; |
594 | |||
595 | if (!ts->tick_stopped) | ||
596 | return; | ||
597 | 620 | ||
598 | /* | 621 | /* |
599 | * Do not touch the tick device, when the next expiry is either | 622 | * Do not touch the tick device, when the next expiry is either |
600 | * already reached or less/equal than the tick period. | 623 | * already reached or less/equal than the tick period. |
601 | */ | 624 | */ |
602 | now = ktime_get(); | ||
603 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); | 625 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); |
604 | if (delta.tv64 <= tick_period.tv64) | 626 | if (delta.tv64 <= tick_period.tv64) |
605 | return; | 627 | return; |
@@ -608,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu) | |||
608 | #endif | 630 | #endif |
609 | } | 631 | } |
610 | 632 | ||
633 | static inline void tick_check_nohz(int cpu) | ||
634 | { | ||
635 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
636 | ktime_t now; | ||
637 | |||
638 | if (!ts->idle_active && !ts->tick_stopped) | ||
639 | return; | ||
640 | now = ktime_get(); | ||
641 | if (ts->idle_active) | ||
642 | tick_nohz_stop_idle(cpu, now); | ||
643 | if (ts->tick_stopped) { | ||
644 | tick_nohz_update_jiffies(now); | ||
645 | tick_nohz_kick_tick(cpu, now); | ||
646 | } | ||
647 | } | ||
648 | |||
611 | #else | 649 | #else |
612 | 650 | ||
613 | static inline void tick_nohz_switch_to_nohz(void) { } | 651 | static inline void tick_nohz_switch_to_nohz(void) { } |
652 | static inline void tick_check_nohz(int cpu) { } | ||
614 | 653 | ||
615 | #endif /* NO_HZ */ | 654 | #endif /* NO_HZ */ |
616 | 655 | ||
@@ -620,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { } | |||
620 | void tick_check_idle(int cpu) | 659 | void tick_check_idle(int cpu) |
621 | { | 660 | { |
622 | tick_check_oneshot_broadcast(cpu); | 661 | tick_check_oneshot_broadcast(cpu); |
623 | #ifdef CONFIG_NO_HZ | 662 | tick_check_nohz(cpu); |
624 | tick_nohz_stop_idle(cpu); | ||
625 | tick_nohz_update_jiffies(); | ||
626 | tick_nohz_kick_tick(cpu); | ||
627 | #endif | ||
628 | } | 663 | } |
629 | 664 | ||
630 | /* | 665 | /* |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3a4e2907eaa..af4135f05825 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -165,19 +165,12 @@ struct timespec raw_time; | |||
165 | /* flag for if timekeeping is suspended */ | 165 | /* flag for if timekeeping is suspended */ |
166 | int __read_mostly timekeeping_suspended; | 166 | int __read_mostly timekeeping_suspended; |
167 | 167 | ||
168 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | ||
169 | void update_xtime_cache(u64 nsec) | ||
170 | { | ||
171 | xtime_cache = xtime; | ||
172 | timespec_add_ns(&xtime_cache, nsec); | ||
173 | } | ||
174 | |||
175 | /* must hold xtime_lock */ | 168 | /* must hold xtime_lock */ |
176 | void timekeeping_leap_insert(int leapsecond) | 169 | void timekeeping_leap_insert(int leapsecond) |
177 | { | 170 | { |
178 | xtime.tv_sec += leapsecond; | 171 | xtime.tv_sec += leapsecond; |
179 | wall_to_monotonic.tv_sec -= leapsecond; | 172 | wall_to_monotonic.tv_sec -= leapsecond; |
180 | update_vsyscall(&xtime, timekeeper.clock); | 173 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
181 | } | 174 | } |
182 | 175 | ||
183 | #ifdef CONFIG_GENERIC_TIME | 176 | #ifdef CONFIG_GENERIC_TIME |
@@ -332,12 +325,10 @@ int do_settimeofday(struct timespec *tv) | |||
332 | 325 | ||
333 | xtime = *tv; | 326 | xtime = *tv; |
334 | 327 | ||
335 | update_xtime_cache(0); | ||
336 | |||
337 | timekeeper.ntp_error = 0; | 328 | timekeeper.ntp_error = 0; |
338 | ntp_clear(); | 329 | ntp_clear(); |
339 | 330 | ||
340 | update_vsyscall(&xtime, timekeeper.clock); | 331 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
341 | 332 | ||
342 | write_sequnlock_irqrestore(&xtime_lock, flags); | 333 | write_sequnlock_irqrestore(&xtime_lock, flags); |
343 | 334 | ||
@@ -488,6 +479,17 @@ int timekeeping_valid_for_hres(void) | |||
488 | } | 479 | } |
489 | 480 | ||
490 | /** | 481 | /** |
482 | * timekeeping_max_deferment - Returns max time the clocksource can be deferred | ||
483 | * | ||
484 | * Caller must observe xtime_lock via read_seqbegin/read_seqretry to | ||
485 | * ensure that the clocksource does not change! | ||
486 | */ | ||
487 | u64 timekeeping_max_deferment(void) | ||
488 | { | ||
489 | return timekeeper.clock->max_idle_ns; | ||
490 | } | ||
491 | |||
492 | /** | ||
491 | * read_persistent_clock - Return time from the persistent clock. | 493 | * read_persistent_clock - Return time from the persistent clock. |
492 | * | 494 | * |
493 | * Weak dummy function for arches that do not yet support it. | 495 | * Weak dummy function for arches that do not yet support it. |
@@ -548,7 +550,6 @@ void __init timekeeping_init(void) | |||
548 | } | 550 | } |
549 | set_normalized_timespec(&wall_to_monotonic, | 551 | set_normalized_timespec(&wall_to_monotonic, |
550 | -boot.tv_sec, -boot.tv_nsec); | 552 | -boot.tv_sec, -boot.tv_nsec); |
551 | update_xtime_cache(0); | ||
552 | total_sleep_time.tv_sec = 0; | 553 | total_sleep_time.tv_sec = 0; |
553 | total_sleep_time.tv_nsec = 0; | 554 | total_sleep_time.tv_nsec = 0; |
554 | write_sequnlock_irqrestore(&xtime_lock, flags); | 555 | write_sequnlock_irqrestore(&xtime_lock, flags); |
@@ -582,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
582 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); | 583 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); |
583 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); | 584 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); |
584 | } | 585 | } |
585 | update_xtime_cache(0); | ||
586 | /* re-base the last cycle value */ | 586 | /* re-base the last cycle value */ |
587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
588 | timekeeper.ntp_error = 0; | 588 | timekeeper.ntp_error = 0; |
@@ -723,6 +723,49 @@ static void timekeeping_adjust(s64 offset) | |||
723 | } | 723 | } |
724 | 724 | ||
725 | /** | 725 | /** |
726 | * logarithmic_accumulation - shifted accumulation of cycles | ||
727 | * | ||
728 | * This functions accumulates a shifted interval of cycles into | ||
729 | * into a shifted interval nanoseconds. Allows for O(log) accumulation | ||
730 | * loop. | ||
731 | * | ||
732 | * Returns the unconsumed cycles. | ||
733 | */ | ||
734 | static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | ||
735 | { | ||
736 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; | ||
737 | |||
738 | /* If the offset is smaller then a shifted interval, do nothing */ | ||
739 | if (offset < timekeeper.cycle_interval<<shift) | ||
740 | return offset; | ||
741 | |||
742 | /* Accumulate one shifted interval */ | ||
743 | offset -= timekeeper.cycle_interval << shift; | ||
744 | timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; | ||
745 | |||
746 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; | ||
747 | while (timekeeper.xtime_nsec >= nsecps) { | ||
748 | timekeeper.xtime_nsec -= nsecps; | ||
749 | xtime.tv_sec++; | ||
750 | second_overflow(); | ||
751 | } | ||
752 | |||
753 | /* Accumulate into raw time */ | ||
754 | raw_time.tv_nsec += timekeeper.raw_interval << shift;; | ||
755 | while (raw_time.tv_nsec >= NSEC_PER_SEC) { | ||
756 | raw_time.tv_nsec -= NSEC_PER_SEC; | ||
757 | raw_time.tv_sec++; | ||
758 | } | ||
759 | |||
760 | /* Accumulate error between NTP and clock interval */ | ||
761 | timekeeper.ntp_error += tick_length << shift; | ||
762 | timekeeper.ntp_error -= timekeeper.xtime_interval << | ||
763 | (timekeeper.ntp_error_shift + shift); | ||
764 | |||
765 | return offset; | ||
766 | } | ||
767 | |||
768 | /** | ||
726 | * update_wall_time - Uses the current clocksource to increment the wall time | 769 | * update_wall_time - Uses the current clocksource to increment the wall time |
727 | * | 770 | * |
728 | * Called from the timer interrupt, must hold a write on xtime_lock. | 771 | * Called from the timer interrupt, must hold a write on xtime_lock. |
@@ -731,7 +774,7 @@ void update_wall_time(void) | |||
731 | { | 774 | { |
732 | struct clocksource *clock; | 775 | struct clocksource *clock; |
733 | cycle_t offset; | 776 | cycle_t offset; |
734 | u64 nsecs; | 777 | int shift = 0, maxshift; |
735 | 778 | ||
736 | /* Make sure we're fully resumed: */ | 779 | /* Make sure we're fully resumed: */ |
737 | if (unlikely(timekeeping_suspended)) | 780 | if (unlikely(timekeeping_suspended)) |
@@ -745,33 +788,22 @@ void update_wall_time(void) | |||
745 | #endif | 788 | #endif |
746 | timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; | 789 | timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; |
747 | 790 | ||
748 | /* normally this loop will run just once, however in the | 791 | /* |
749 | * case of lost or late ticks, it will accumulate correctly. | 792 | * With NO_HZ we may have to accumulate many cycle_intervals |
793 | * (think "ticks") worth of time at once. To do this efficiently, | ||
794 | * we calculate the largest doubling multiple of cycle_intervals | ||
795 | * that is smaller then the offset. We then accumulate that | ||
796 | * chunk in one go, and then try to consume the next smaller | ||
797 | * doubled multiple. | ||
750 | */ | 798 | */ |
799 | shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); | ||
800 | shift = max(0, shift); | ||
801 | /* Bound shift to one less then what overflows tick_length */ | ||
802 | maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1; | ||
803 | shift = min(shift, maxshift); | ||
751 | while (offset >= timekeeper.cycle_interval) { | 804 | while (offset >= timekeeper.cycle_interval) { |
752 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; | 805 | offset = logarithmic_accumulation(offset, shift); |
753 | 806 | shift--; | |
754 | /* accumulate one interval */ | ||
755 | offset -= timekeeper.cycle_interval; | ||
756 | clock->cycle_last += timekeeper.cycle_interval; | ||
757 | |||
758 | timekeeper.xtime_nsec += timekeeper.xtime_interval; | ||
759 | if (timekeeper.xtime_nsec >= nsecps) { | ||
760 | timekeeper.xtime_nsec -= nsecps; | ||
761 | xtime.tv_sec++; | ||
762 | second_overflow(); | ||
763 | } | ||
764 | |||
765 | raw_time.tv_nsec += timekeeper.raw_interval; | ||
766 | if (raw_time.tv_nsec >= NSEC_PER_SEC) { | ||
767 | raw_time.tv_nsec -= NSEC_PER_SEC; | ||
768 | raw_time.tv_sec++; | ||
769 | } | ||
770 | |||
771 | /* accumulate error between NTP and clock interval */ | ||
772 | timekeeper.ntp_error += tick_length; | ||
773 | timekeeper.ntp_error -= timekeeper.xtime_interval << | ||
774 | timekeeper.ntp_error_shift; | ||
775 | } | 807 | } |
776 | 808 | ||
777 | /* correct the clock when NTP error is too big */ | 809 | /* correct the clock when NTP error is too big */ |
@@ -807,11 +839,8 @@ void update_wall_time(void) | |||
807 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 839 | timekeeper.ntp_error += timekeeper.xtime_nsec << |
808 | timekeeper.ntp_error_shift; | 840 | timekeeper.ntp_error_shift; |
809 | 841 | ||
810 | nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); | ||
811 | update_xtime_cache(nsecs); | ||
812 | |||
813 | /* check to see if there is a new clocksource to use */ | 842 | /* check to see if there is a new clocksource to use */ |
814 | update_vsyscall(&xtime, timekeeper.clock); | 843 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
815 | } | 844 | } |
816 | 845 | ||
817 | /** | 846 | /** |
@@ -846,13 +875,13 @@ void monotonic_to_bootbased(struct timespec *ts) | |||
846 | 875 | ||
847 | unsigned long get_seconds(void) | 876 | unsigned long get_seconds(void) |
848 | { | 877 | { |
849 | return xtime_cache.tv_sec; | 878 | return xtime.tv_sec; |
850 | } | 879 | } |
851 | EXPORT_SYMBOL(get_seconds); | 880 | EXPORT_SYMBOL(get_seconds); |
852 | 881 | ||
853 | struct timespec __current_kernel_time(void) | 882 | struct timespec __current_kernel_time(void) |
854 | { | 883 | { |
855 | return xtime_cache; | 884 | return xtime; |
856 | } | 885 | } |
857 | 886 | ||
858 | struct timespec current_kernel_time(void) | 887 | struct timespec current_kernel_time(void) |
@@ -862,8 +891,7 @@ struct timespec current_kernel_time(void) | |||
862 | 891 | ||
863 | do { | 892 | do { |
864 | seq = read_seqbegin(&xtime_lock); | 893 | seq = read_seqbegin(&xtime_lock); |
865 | 894 | now = xtime; | |
866 | now = xtime_cache; | ||
867 | } while (read_seqretry(&xtime_lock, seq)); | 895 | } while (read_seqretry(&xtime_lock, seq)); |
868 | 896 | ||
869 | return now; | 897 | return now; |
@@ -877,8 +905,7 @@ struct timespec get_monotonic_coarse(void) | |||
877 | 905 | ||
878 | do { | 906 | do { |
879 | seq = read_seqbegin(&xtime_lock); | 907 | seq = read_seqbegin(&xtime_lock); |
880 | 908 | now = xtime; | |
881 | now = xtime_cache; | ||
882 | mono = wall_to_monotonic; | 909 | mono = wall_to_monotonic; |
883 | } while (read_seqretry(&xtime_lock, seq)); | 910 | } while (read_seqretry(&xtime_lock, seq)); |
884 | 911 | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 1b5b7aa2fdfd..665c76edbf17 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -204,10 +204,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) | |||
204 | return; | 204 | return; |
205 | } | 205 | } |
206 | SEQ_printf(m, "%s\n", dev->name); | 206 | SEQ_printf(m, "%s\n", dev->name); |
207 | SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); | 207 | SEQ_printf(m, " max_delta_ns: %llu\n", |
208 | SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); | 208 | (unsigned long long) dev->max_delta_ns); |
209 | SEQ_printf(m, " mult: %lu\n", dev->mult); | 209 | SEQ_printf(m, " min_delta_ns: %llu\n", |
210 | SEQ_printf(m, " shift: %d\n", dev->shift); | 210 | (unsigned long long) dev->min_delta_ns); |
211 | SEQ_printf(m, " mult: %u\n", dev->mult); | ||
212 | SEQ_printf(m, " shift: %u\n", dev->shift); | ||
211 | SEQ_printf(m, " mode: %d\n", dev->mode); | 213 | SEQ_printf(m, " mode: %d\n", dev->mode); |
212 | SEQ_printf(m, " next_event: %Ld nsecs\n", | 214 | SEQ_printf(m, " next_event: %Ld nsecs\n", |
213 | (unsigned long long) ktime_to_ns(dev->next_event)); | 215 | (unsigned long long) ktime_to_ns(dev->next_event)); |
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 03e2d6fd9b18..eb27fd3430a2 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c | |||
@@ -6,8 +6,6 @@ | |||
6 | 6 | ||
7 | static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); | 7 | static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); |
8 | 8 | ||
9 | #define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id()) | ||
10 | |||
11 | /* | 9 | /* |
12 | * Request a notification when the current cpu returns to userspace. Must be | 10 | * Request a notification when the current cpu returns to userspace. Must be |
13 | * called in atomic context. The notifier will also be called in atomic | 11 | * called in atomic context. The notifier will also be called in atomic |
@@ -16,7 +14,7 @@ static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); | |||
16 | void user_return_notifier_register(struct user_return_notifier *urn) | 14 | void user_return_notifier_register(struct user_return_notifier *urn) |
17 | { | 15 | { |
18 | set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); | 16 | set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); |
19 | hlist_add_head(&urn->link, &URN_LIST_HEAD); | 17 | hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list)); |
20 | } | 18 | } |
21 | EXPORT_SYMBOL_GPL(user_return_notifier_register); | 19 | EXPORT_SYMBOL_GPL(user_return_notifier_register); |
22 | 20 | ||
@@ -27,7 +25,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register); | |||
27 | void user_return_notifier_unregister(struct user_return_notifier *urn) | 25 | void user_return_notifier_unregister(struct user_return_notifier *urn) |
28 | { | 26 | { |
29 | hlist_del(&urn->link); | 27 | hlist_del(&urn->link); |
30 | if (hlist_empty(&URN_LIST_HEAD)) | 28 | if (hlist_empty(&__get_cpu_var(return_notifier_list))) |
31 | clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); | 29 | clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); |
32 | } | 30 | } |
33 | EXPORT_SYMBOL_GPL(user_return_notifier_unregister); | 31 | EXPORT_SYMBOL_GPL(user_return_notifier_unregister); |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 67e526b6ae81..dee48658805c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -68,6 +68,116 @@ struct workqueue_struct { | |||
68 | #endif | 68 | #endif |
69 | }; | 69 | }; |
70 | 70 | ||
71 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | ||
72 | |||
73 | static struct debug_obj_descr work_debug_descr; | ||
74 | |||
75 | /* | ||
76 | * fixup_init is called when: | ||
77 | * - an active object is initialized | ||
78 | */ | ||
79 | static int work_fixup_init(void *addr, enum debug_obj_state state) | ||
80 | { | ||
81 | struct work_struct *work = addr; | ||
82 | |||
83 | switch (state) { | ||
84 | case ODEBUG_STATE_ACTIVE: | ||
85 | cancel_work_sync(work); | ||
86 | debug_object_init(work, &work_debug_descr); | ||
87 | return 1; | ||
88 | default: | ||
89 | return 0; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * fixup_activate is called when: | ||
95 | * - an active object is activated | ||
96 | * - an unknown object is activated (might be a statically initialized object) | ||
97 | */ | ||
98 | static int work_fixup_activate(void *addr, enum debug_obj_state state) | ||
99 | { | ||
100 | struct work_struct *work = addr; | ||
101 | |||
102 | switch (state) { | ||
103 | |||
104 | case ODEBUG_STATE_NOTAVAILABLE: | ||
105 | /* | ||
106 | * This is not really a fixup. The work struct was | ||
107 | * statically initialized. We just make sure that it | ||
108 | * is tracked in the object tracker. | ||
109 | */ | ||
110 | if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) { | ||
111 | debug_object_init(work, &work_debug_descr); | ||
112 | debug_object_activate(work, &work_debug_descr); | ||
113 | return 0; | ||
114 | } | ||
115 | WARN_ON_ONCE(1); | ||
116 | return 0; | ||
117 | |||
118 | case ODEBUG_STATE_ACTIVE: | ||
119 | WARN_ON(1); | ||
120 | |||
121 | default: | ||
122 | return 0; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * fixup_free is called when: | ||
128 | * - an active object is freed | ||
129 | */ | ||
130 | static int work_fixup_free(void *addr, enum debug_obj_state state) | ||
131 | { | ||
132 | struct work_struct *work = addr; | ||
133 | |||
134 | switch (state) { | ||
135 | case ODEBUG_STATE_ACTIVE: | ||
136 | cancel_work_sync(work); | ||
137 | debug_object_free(work, &work_debug_descr); | ||
138 | return 1; | ||
139 | default: | ||
140 | return 0; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static struct debug_obj_descr work_debug_descr = { | ||
145 | .name = "work_struct", | ||
146 | .fixup_init = work_fixup_init, | ||
147 | .fixup_activate = work_fixup_activate, | ||
148 | .fixup_free = work_fixup_free, | ||
149 | }; | ||
150 | |||
151 | static inline void debug_work_activate(struct work_struct *work) | ||
152 | { | ||
153 | debug_object_activate(work, &work_debug_descr); | ||
154 | } | ||
155 | |||
156 | static inline void debug_work_deactivate(struct work_struct *work) | ||
157 | { | ||
158 | debug_object_deactivate(work, &work_debug_descr); | ||
159 | } | ||
160 | |||
161 | void __init_work(struct work_struct *work, int onstack) | ||
162 | { | ||
163 | if (onstack) | ||
164 | debug_object_init_on_stack(work, &work_debug_descr); | ||
165 | else | ||
166 | debug_object_init(work, &work_debug_descr); | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(__init_work); | ||
169 | |||
170 | void destroy_work_on_stack(struct work_struct *work) | ||
171 | { | ||
172 | debug_object_free(work, &work_debug_descr); | ||
173 | } | ||
174 | EXPORT_SYMBOL_GPL(destroy_work_on_stack); | ||
175 | |||
176 | #else | ||
177 | static inline void debug_work_activate(struct work_struct *work) { } | ||
178 | static inline void debug_work_deactivate(struct work_struct *work) { } | ||
179 | #endif | ||
180 | |||
71 | /* Serializes the accesses to the list of workqueues. */ | 181 | /* Serializes the accesses to the list of workqueues. */ |
72 | static DEFINE_SPINLOCK(workqueue_lock); | 182 | static DEFINE_SPINLOCK(workqueue_lock); |
73 | static LIST_HEAD(workqueues); | 183 | static LIST_HEAD(workqueues); |
@@ -145,6 +255,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, | |||
145 | { | 255 | { |
146 | unsigned long flags; | 256 | unsigned long flags; |
147 | 257 | ||
258 | debug_work_activate(work); | ||
148 | spin_lock_irqsave(&cwq->lock, flags); | 259 | spin_lock_irqsave(&cwq->lock, flags); |
149 | insert_work(cwq, work, &cwq->worklist); | 260 | insert_work(cwq, work, &cwq->worklist); |
150 | spin_unlock_irqrestore(&cwq->lock, flags); | 261 | spin_unlock_irqrestore(&cwq->lock, flags); |
@@ -280,6 +391,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) | |||
280 | struct lockdep_map lockdep_map = work->lockdep_map; | 391 | struct lockdep_map lockdep_map = work->lockdep_map; |
281 | #endif | 392 | #endif |
282 | trace_workqueue_execution(cwq->thread, work); | 393 | trace_workqueue_execution(cwq->thread, work); |
394 | debug_work_deactivate(work); | ||
283 | cwq->current_work = work; | 395 | cwq->current_work = work; |
284 | list_del_init(cwq->worklist.next); | 396 | list_del_init(cwq->worklist.next); |
285 | spin_unlock_irq(&cwq->lock); | 397 | spin_unlock_irq(&cwq->lock); |
@@ -350,11 +462,18 @@ static void wq_barrier_func(struct work_struct *work) | |||
350 | static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, | 462 | static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, |
351 | struct wq_barrier *barr, struct list_head *head) | 463 | struct wq_barrier *barr, struct list_head *head) |
352 | { | 464 | { |
353 | INIT_WORK(&barr->work, wq_barrier_func); | 465 | /* |
466 | * debugobject calls are safe here even with cwq->lock locked | ||
467 | * as we know for sure that this will not trigger any of the | ||
468 | * checks and call back into the fixup functions where we | ||
469 | * might deadlock. | ||
470 | */ | ||
471 | INIT_WORK_ON_STACK(&barr->work, wq_barrier_func); | ||
354 | __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); | 472 | __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); |
355 | 473 | ||
356 | init_completion(&barr->done); | 474 | init_completion(&barr->done); |
357 | 475 | ||
476 | debug_work_activate(&barr->work); | ||
358 | insert_work(cwq, &barr->work, head); | 477 | insert_work(cwq, &barr->work, head); |
359 | } | 478 | } |
360 | 479 | ||
@@ -372,8 +491,10 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | |||
372 | } | 491 | } |
373 | spin_unlock_irq(&cwq->lock); | 492 | spin_unlock_irq(&cwq->lock); |
374 | 493 | ||
375 | if (active) | 494 | if (active) { |
376 | wait_for_completion(&barr.done); | 495 | wait_for_completion(&barr.done); |
496 | destroy_work_on_stack(&barr.work); | ||
497 | } | ||
377 | 498 | ||
378 | return active; | 499 | return active; |
379 | } | 500 | } |
@@ -451,6 +572,7 @@ out: | |||
451 | return 0; | 572 | return 0; |
452 | 573 | ||
453 | wait_for_completion(&barr.done); | 574 | wait_for_completion(&barr.done); |
575 | destroy_work_on_stack(&barr.work); | ||
454 | return 1; | 576 | return 1; |
455 | } | 577 | } |
456 | EXPORT_SYMBOL_GPL(flush_work); | 578 | EXPORT_SYMBOL_GPL(flush_work); |
@@ -485,6 +607,7 @@ static int try_to_grab_pending(struct work_struct *work) | |||
485 | */ | 607 | */ |
486 | smp_rmb(); | 608 | smp_rmb(); |
487 | if (cwq == get_wq_data(work)) { | 609 | if (cwq == get_wq_data(work)) { |
610 | debug_work_deactivate(work); | ||
488 | list_del_init(&work->entry); | 611 | list_del_init(&work->entry); |
489 | ret = 1; | 612 | ret = 1; |
490 | } | 613 | } |
@@ -507,8 +630,10 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq, | |||
507 | } | 630 | } |
508 | spin_unlock_irq(&cwq->lock); | 631 | spin_unlock_irq(&cwq->lock); |
509 | 632 | ||
510 | if (unlikely(running)) | 633 | if (unlikely(running)) { |
511 | wait_for_completion(&barr.done); | 634 | wait_for_completion(&barr.done); |
635 | destroy_work_on_stack(&barr.work); | ||
636 | } | ||
512 | } | 637 | } |
513 | 638 | ||
514 | static void wait_on_work(struct work_struct *work) | 639 | static void wait_on_work(struct work_struct *work) |