diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 2 | ||||
| -rw-r--r-- | kernel/cpu.c | 10 | ||||
| -rw-r--r-- | kernel/cpuset.c | 26 | ||||
| -rw-r--r-- | kernel/exit.c | 7 | ||||
| -rw-r--r-- | kernel/fork.c | 15 | ||||
| -rw-r--r-- | kernel/kprobes.c | 58 | ||||
| -rw-r--r-- | kernel/module.c | 16 | ||||
| -rw-r--r-- | kernel/mutex-debug.c | 12 | ||||
| -rw-r--r-- | kernel/mutex-debug.h | 25 | ||||
| -rw-r--r-- | kernel/mutex.c | 21 | ||||
| -rw-r--r-- | kernel/mutex.h | 6 | ||||
| -rw-r--r-- | kernel/ptrace.c | 23 | ||||
| -rw-r--r-- | kernel/sched.c | 11 | ||||
| -rw-r--r-- | kernel/signal.c | 35 | ||||
| -rw-r--r-- | kernel/sysctl.c | 11 | ||||
| -rw-r--r-- | kernel/time.c | 2 | ||||
| -rw-r--r-- | kernel/time/Makefile | 1 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 349 | ||||
| -rw-r--r-- | kernel/time/jiffies.c | 73 | ||||
| -rw-r--r-- | kernel/timer.c | 396 | ||||
| -rw-r--r-- | kernel/unwind.c | 918 |
21 files changed, 1890 insertions, 127 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f6ef00f4f90f..752bd7d383af 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -10,6 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
| 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
| 11 | hrtimer.o | 11 | hrtimer.o |
| 12 | 12 | ||
| 13 | obj-y += time/ | ||
| 13 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 14 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
| 14 | obj-$(CONFIG_FUTEX) += futex.o | 15 | obj-$(CONFIG_FUTEX) += futex.o |
| 15 | ifeq ($(CONFIG_COMPAT),y) | 16 | ifeq ($(CONFIG_COMPAT),y) |
| @@ -21,6 +22,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |||
| 21 | obj-$(CONFIG_UID16) += uid16.o | 22 | obj-$(CONFIG_UID16) += uid16.o |
| 22 | obj-$(CONFIG_MODULES) += module.o | 23 | obj-$(CONFIG_MODULES) += module.o |
| 23 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 24 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
| 25 | obj-$(CONFIG_STACK_UNWIND) += unwind.o | ||
| 24 | obj-$(CONFIG_PM) += power/ | 26 | obj-$(CONFIG_PM) += power/ |
| 25 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 27 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
| 26 | obj-$(CONFIG_KEXEC) += kexec.o | 28 | obj-$(CONFIG_KEXEC) += kexec.o |
diff --git a/kernel/cpu.c b/kernel/cpu.c index fe2b8d0bfe4c..03dcd981846a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -13,10 +13,10 @@ | |||
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
| 15 | #include <linux/stop_machine.h> | 15 | #include <linux/stop_machine.h> |
| 16 | #include <asm/semaphore.h> | 16 | #include <linux/mutex.h> |
| 17 | 17 | ||
| 18 | /* This protects CPUs going up and down... */ | 18 | /* This protects CPUs going up and down... */ |
| 19 | static DECLARE_MUTEX(cpucontrol); | 19 | static DEFINE_MUTEX(cpucontrol); |
| 20 | 20 | ||
| 21 | static BLOCKING_NOTIFIER_HEAD(cpu_chain); | 21 | static BLOCKING_NOTIFIER_HEAD(cpu_chain); |
| 22 | 22 | ||
| @@ -30,9 +30,9 @@ static int __lock_cpu_hotplug(int interruptible) | |||
| 30 | 30 | ||
| 31 | if (lock_cpu_hotplug_owner != current) { | 31 | if (lock_cpu_hotplug_owner != current) { |
| 32 | if (interruptible) | 32 | if (interruptible) |
| 33 | ret = down_interruptible(&cpucontrol); | 33 | ret = mutex_lock_interruptible(&cpucontrol); |
| 34 | else | 34 | else |
| 35 | down(&cpucontrol); | 35 | mutex_lock(&cpucontrol); |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | /* | 38 | /* |
| @@ -56,7 +56,7 @@ void unlock_cpu_hotplug(void) | |||
| 56 | { | 56 | { |
| 57 | if (--lock_cpu_hotplug_depth == 0) { | 57 | if (--lock_cpu_hotplug_depth == 0) { |
| 58 | lock_cpu_hotplug_owner = NULL; | 58 | lock_cpu_hotplug_owner = NULL; |
| 59 | up(&cpucontrol); | 59 | mutex_unlock(&cpucontrol); |
| 60 | } | 60 | } |
| 61 | } | 61 | } |
| 62 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); | 62 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b602f73fb38d..1535af3a912d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -2442,31 +2442,43 @@ void __cpuset_memory_pressure_bump(void) | |||
| 2442 | */ | 2442 | */ |
| 2443 | static int proc_cpuset_show(struct seq_file *m, void *v) | 2443 | static int proc_cpuset_show(struct seq_file *m, void *v) |
| 2444 | { | 2444 | { |
| 2445 | struct pid *pid; | ||
| 2445 | struct task_struct *tsk; | 2446 | struct task_struct *tsk; |
| 2446 | char *buf; | 2447 | char *buf; |
| 2447 | int retval = 0; | 2448 | int retval; |
| 2448 | 2449 | ||
| 2450 | retval = -ENOMEM; | ||
| 2449 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 2451 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
| 2450 | if (!buf) | 2452 | if (!buf) |
| 2451 | return -ENOMEM; | 2453 | goto out; |
| 2454 | |||
| 2455 | retval = -ESRCH; | ||
| 2456 | pid = m->private; | ||
| 2457 | tsk = get_pid_task(pid, PIDTYPE_PID); | ||
| 2458 | if (!tsk) | ||
| 2459 | goto out_free; | ||
| 2452 | 2460 | ||
| 2453 | tsk = m->private; | 2461 | retval = -EINVAL; |
| 2454 | mutex_lock(&manage_mutex); | 2462 | mutex_lock(&manage_mutex); |
| 2463 | |||
| 2455 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); | 2464 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); |
| 2456 | if (retval < 0) | 2465 | if (retval < 0) |
| 2457 | goto out; | 2466 | goto out_unlock; |
| 2458 | seq_puts(m, buf); | 2467 | seq_puts(m, buf); |
| 2459 | seq_putc(m, '\n'); | 2468 | seq_putc(m, '\n'); |
| 2460 | out: | 2469 | out_unlock: |
| 2461 | mutex_unlock(&manage_mutex); | 2470 | mutex_unlock(&manage_mutex); |
| 2471 | put_task_struct(tsk); | ||
| 2472 | out_free: | ||
| 2462 | kfree(buf); | 2473 | kfree(buf); |
| 2474 | out: | ||
| 2463 | return retval; | 2475 | return retval; |
| 2464 | } | 2476 | } |
| 2465 | 2477 | ||
| 2466 | static int cpuset_open(struct inode *inode, struct file *file) | 2478 | static int cpuset_open(struct inode *inode, struct file *file) |
| 2467 | { | 2479 | { |
| 2468 | struct task_struct *tsk = PROC_I(inode)->task; | 2480 | struct pid *pid = PROC_I(inode)->pid; |
| 2469 | return single_open(file, proc_cpuset_show, tsk); | 2481 | return single_open(file, proc_cpuset_show, pid); |
| 2470 | } | 2482 | } |
| 2471 | 2483 | ||
| 2472 | struct file_operations proc_cpuset_operations = { | 2484 | struct file_operations proc_cpuset_operations = { |
diff --git a/kernel/exit.c b/kernel/exit.c index e76bd02e930e..304ef637be6c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -137,12 +137,8 @@ void release_task(struct task_struct * p) | |||
| 137 | { | 137 | { |
| 138 | int zap_leader; | 138 | int zap_leader; |
| 139 | task_t *leader; | 139 | task_t *leader; |
| 140 | struct dentry *proc_dentry; | ||
| 141 | |||
| 142 | repeat: | 140 | repeat: |
| 143 | atomic_dec(&p->user->processes); | 141 | atomic_dec(&p->user->processes); |
| 144 | spin_lock(&p->proc_lock); | ||
| 145 | proc_dentry = proc_pid_unhash(p); | ||
| 146 | write_lock_irq(&tasklist_lock); | 142 | write_lock_irq(&tasklist_lock); |
| 147 | ptrace_unlink(p); | 143 | ptrace_unlink(p); |
| 148 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 144 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
| @@ -171,8 +167,7 @@ repeat: | |||
| 171 | 167 | ||
| 172 | sched_exit(p); | 168 | sched_exit(p); |
| 173 | write_unlock_irq(&tasklist_lock); | 169 | write_unlock_irq(&tasklist_lock); |
| 174 | spin_unlock(&p->proc_lock); | 170 | proc_flush_task(p); |
| 175 | proc_pid_flush(proc_dentry); | ||
| 176 | release_thread(p); | 171 | release_thread(p); |
| 177 | call_rcu(&p->rcu, delayed_put_task_struct); | 172 | call_rcu(&p->rcu, delayed_put_task_struct); |
| 178 | 173 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index dfd10cb370c3..9b4e54ef0225 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags, | |||
| 993 | if (put_user(p->pid, parent_tidptr)) | 993 | if (put_user(p->pid, parent_tidptr)) |
| 994 | goto bad_fork_cleanup; | 994 | goto bad_fork_cleanup; |
| 995 | 995 | ||
| 996 | p->proc_dentry = NULL; | ||
| 997 | |||
| 998 | INIT_LIST_HEAD(&p->children); | 996 | INIT_LIST_HEAD(&p->children); |
| 999 | INIT_LIST_HEAD(&p->sibling); | 997 | INIT_LIST_HEAD(&p->sibling); |
| 1000 | p->vfork_done = NULL; | 998 | p->vfork_done = NULL; |
| 1001 | spin_lock_init(&p->alloc_lock); | 999 | spin_lock_init(&p->alloc_lock); |
| 1002 | spin_lock_init(&p->proc_lock); | ||
| 1003 | 1000 | ||
| 1004 | clear_tsk_thread_flag(p, TIF_SIGPENDING); | 1001 | clear_tsk_thread_flag(p, TIF_SIGPENDING); |
| 1005 | init_sigpending(&p->pending); | 1002 | init_sigpending(&p->pending); |
| @@ -1159,18 +1156,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
| 1159 | } | 1156 | } |
| 1160 | 1157 | ||
| 1161 | if (clone_flags & CLONE_THREAD) { | 1158 | if (clone_flags & CLONE_THREAD) { |
| 1162 | /* | ||
| 1163 | * Important: if an exit-all has been started then | ||
| 1164 | * do not create this new thread - the whole thread | ||
| 1165 | * group is supposed to exit anyway. | ||
| 1166 | */ | ||
| 1167 | if (current->signal->flags & SIGNAL_GROUP_EXIT) { | ||
| 1168 | spin_unlock(¤t->sighand->siglock); | ||
| 1169 | write_unlock_irq(&tasklist_lock); | ||
| 1170 | retval = -EAGAIN; | ||
| 1171 | goto bad_fork_cleanup_namespace; | ||
| 1172 | } | ||
| 1173 | |||
| 1174 | p->group_leader = current->group_leader; | 1159 | p->group_leader = current->group_leader; |
| 1175 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); | 1160 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
| 1176 | 1161 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1fbf466a29aa..64aab081153b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -47,11 +47,17 @@ | |||
| 47 | 47 | ||
| 48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | 48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; |
| 49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
| 50 | static atomic_t kprobe_count; | ||
| 50 | 51 | ||
| 51 | DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 52 | DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ |
| 52 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ | 53 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ |
| 53 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 54 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
| 54 | 55 | ||
| 56 | static struct notifier_block kprobe_page_fault_nb = { | ||
| 57 | .notifier_call = kprobe_exceptions_notify, | ||
| 58 | .priority = 0x7fffffff /* we need to notified first */ | ||
| 59 | }; | ||
| 60 | |||
| 55 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT | 61 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT |
| 56 | /* | 62 | /* |
| 57 | * kprobe->ainsn.insn points to the copy of the instruction to be | 63 | * kprobe->ainsn.insn points to the copy of the instruction to be |
| @@ -368,16 +374,15 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
| 368 | */ | 374 | */ |
| 369 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 375 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) |
| 370 | { | 376 | { |
| 371 | struct kprobe *kp; | ||
| 372 | |||
| 373 | if (p->break_handler) { | 377 | if (p->break_handler) { |
| 374 | list_for_each_entry_rcu(kp, &old_p->list, list) { | 378 | if (old_p->break_handler) |
| 375 | if (kp->break_handler) | 379 | return -EEXIST; |
| 376 | return -EEXIST; | ||
| 377 | } | ||
| 378 | list_add_tail_rcu(&p->list, &old_p->list); | 380 | list_add_tail_rcu(&p->list, &old_p->list); |
| 381 | old_p->break_handler = aggr_break_handler; | ||
| 379 | } else | 382 | } else |
| 380 | list_add_rcu(&p->list, &old_p->list); | 383 | list_add_rcu(&p->list, &old_p->list); |
| 384 | if (p->post_handler && !old_p->post_handler) | ||
| 385 | old_p->post_handler = aggr_post_handler; | ||
| 381 | return 0; | 386 | return 0; |
| 382 | } | 387 | } |
| 383 | 388 | ||
| @@ -390,9 +395,11 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
| 390 | copy_kprobe(p, ap); | 395 | copy_kprobe(p, ap); |
| 391 | ap->addr = p->addr; | 396 | ap->addr = p->addr; |
| 392 | ap->pre_handler = aggr_pre_handler; | 397 | ap->pre_handler = aggr_pre_handler; |
| 393 | ap->post_handler = aggr_post_handler; | ||
| 394 | ap->fault_handler = aggr_fault_handler; | 398 | ap->fault_handler = aggr_fault_handler; |
| 395 | ap->break_handler = aggr_break_handler; | 399 | if (p->post_handler) |
| 400 | ap->post_handler = aggr_post_handler; | ||
| 401 | if (p->break_handler) | ||
| 402 | ap->break_handler = aggr_break_handler; | ||
| 396 | 403 | ||
| 397 | INIT_LIST_HEAD(&ap->list); | 404 | INIT_LIST_HEAD(&ap->list); |
| 398 | list_add_rcu(&p->list, &ap->list); | 405 | list_add_rcu(&p->list, &ap->list); |
| @@ -464,6 +471,8 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
| 464 | old_p = get_kprobe(p->addr); | 471 | old_p = get_kprobe(p->addr); |
| 465 | if (old_p) { | 472 | if (old_p) { |
| 466 | ret = register_aggr_kprobe(old_p, p); | 473 | ret = register_aggr_kprobe(old_p, p); |
| 474 | if (!ret) | ||
| 475 | atomic_inc(&kprobe_count); | ||
| 467 | goto out; | 476 | goto out; |
| 468 | } | 477 | } |
| 469 | 478 | ||
| @@ -474,6 +483,10 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
| 474 | hlist_add_head_rcu(&p->hlist, | 483 | hlist_add_head_rcu(&p->hlist, |
| 475 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 484 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
| 476 | 485 | ||
| 486 | if (atomic_add_return(1, &kprobe_count) == \ | ||
| 487 | (ARCH_INACTIVE_KPROBE_COUNT + 1)) | ||
| 488 | register_page_fault_notifier(&kprobe_page_fault_nb); | ||
| 489 | |||
| 477 | arch_arm_kprobe(p); | 490 | arch_arm_kprobe(p); |
| 478 | 491 | ||
| 479 | out: | 492 | out: |
| @@ -536,14 +549,40 @@ valid_p: | |||
| 536 | kfree(old_p); | 549 | kfree(old_p); |
| 537 | } | 550 | } |
| 538 | arch_remove_kprobe(p); | 551 | arch_remove_kprobe(p); |
| 552 | } else { | ||
| 553 | mutex_lock(&kprobe_mutex); | ||
| 554 | if (p->break_handler) | ||
| 555 | old_p->break_handler = NULL; | ||
| 556 | if (p->post_handler){ | ||
| 557 | list_for_each_entry_rcu(list_p, &old_p->list, list){ | ||
| 558 | if (list_p->post_handler){ | ||
| 559 | cleanup_p = 2; | ||
| 560 | break; | ||
| 561 | } | ||
| 562 | } | ||
| 563 | if (cleanup_p == 0) | ||
| 564 | old_p->post_handler = NULL; | ||
| 565 | } | ||
| 566 | mutex_unlock(&kprobe_mutex); | ||
| 539 | } | 567 | } |
| 568 | |||
| 569 | /* Call unregister_page_fault_notifier() | ||
| 570 | * if no probes are active | ||
| 571 | */ | ||
| 572 | mutex_lock(&kprobe_mutex); | ||
| 573 | if (atomic_add_return(-1, &kprobe_count) == \ | ||
| 574 | ARCH_INACTIVE_KPROBE_COUNT) | ||
| 575 | unregister_page_fault_notifier(&kprobe_page_fault_nb); | ||
| 576 | mutex_unlock(&kprobe_mutex); | ||
| 577 | return; | ||
| 540 | } | 578 | } |
| 541 | 579 | ||
| 542 | static struct notifier_block kprobe_exceptions_nb = { | 580 | static struct notifier_block kprobe_exceptions_nb = { |
| 543 | .notifier_call = kprobe_exceptions_notify, | 581 | .notifier_call = kprobe_exceptions_notify, |
| 544 | .priority = 0x7fffffff /* we need to notified first */ | 582 | .priority = 0x7fffffff /* we need to be notified first */ |
| 545 | }; | 583 | }; |
| 546 | 584 | ||
| 585 | |||
| 547 | int __kprobes register_jprobe(struct jprobe *jp) | 586 | int __kprobes register_jprobe(struct jprobe *jp) |
| 548 | { | 587 | { |
| 549 | /* Todo: Verify probepoint is a function entry point */ | 588 | /* Todo: Verify probepoint is a function entry point */ |
| @@ -652,6 +691,7 @@ static int __init init_kprobes(void) | |||
| 652 | INIT_HLIST_HEAD(&kprobe_table[i]); | 691 | INIT_HLIST_HEAD(&kprobe_table[i]); |
| 653 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); | 692 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); |
| 654 | } | 693 | } |
| 694 | atomic_set(&kprobe_count, 0); | ||
| 655 | 695 | ||
| 656 | err = arch_init_kprobes(); | 696 | err = arch_init_kprobes(); |
| 657 | if (!err) | 697 | if (!err) |
diff --git a/kernel/module.c b/kernel/module.c index d75275de1c28..08811e26ac9d 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/string.h> | 40 | #include <linux/string.h> |
| 41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
| 42 | #include <linux/mutex.h> | 42 | #include <linux/mutex.h> |
| 43 | #include <linux/unwind.h> | ||
| 43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
| 44 | #include <asm/semaphore.h> | 45 | #include <asm/semaphore.h> |
| 45 | #include <asm/cacheflush.h> | 46 | #include <asm/cacheflush.h> |
| @@ -1051,6 +1052,8 @@ static void free_module(struct module *mod) | |||
| 1051 | remove_sect_attrs(mod); | 1052 | remove_sect_attrs(mod); |
| 1052 | mod_kobject_remove(mod); | 1053 | mod_kobject_remove(mod); |
| 1053 | 1054 | ||
| 1055 | unwind_remove_table(mod->unwind_info, 0); | ||
| 1056 | |||
| 1054 | /* Arch-specific cleanup. */ | 1057 | /* Arch-specific cleanup. */ |
| 1055 | module_arch_cleanup(mod); | 1058 | module_arch_cleanup(mod); |
| 1056 | 1059 | ||
| @@ -1412,7 +1415,7 @@ static struct module *load_module(void __user *umod, | |||
| 1412 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, | 1415 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, |
| 1413 | exportindex, modindex, obsparmindex, infoindex, gplindex, | 1416 | exportindex, modindex, obsparmindex, infoindex, gplindex, |
| 1414 | crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, | 1417 | crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, |
| 1415 | gplfuturecrcindex; | 1418 | gplfuturecrcindex, unwindex = 0; |
| 1416 | struct module *mod; | 1419 | struct module *mod; |
| 1417 | long err = 0; | 1420 | long err = 0; |
| 1418 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1421 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
| @@ -1502,6 +1505,9 @@ static struct module *load_module(void __user *umod, | |||
| 1502 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1505 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
| 1503 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1506 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
| 1504 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1507 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
| 1508 | #ifdef ARCH_UNWIND_SECTION_NAME | ||
| 1509 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); | ||
| 1510 | #endif | ||
| 1505 | 1511 | ||
| 1506 | /* Don't keep modinfo section */ | 1512 | /* Don't keep modinfo section */ |
| 1507 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1513 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
| @@ -1510,6 +1516,8 @@ static struct module *load_module(void __user *umod, | |||
| 1510 | sechdrs[symindex].sh_flags |= SHF_ALLOC; | 1516 | sechdrs[symindex].sh_flags |= SHF_ALLOC; |
| 1511 | sechdrs[strindex].sh_flags |= SHF_ALLOC; | 1517 | sechdrs[strindex].sh_flags |= SHF_ALLOC; |
| 1512 | #endif | 1518 | #endif |
| 1519 | if (unwindex) | ||
| 1520 | sechdrs[unwindex].sh_flags |= SHF_ALLOC; | ||
| 1513 | 1521 | ||
| 1514 | /* Check module struct version now, before we try to use module. */ | 1522 | /* Check module struct version now, before we try to use module. */ |
| 1515 | if (!check_modstruct_version(sechdrs, versindex, mod)) { | 1523 | if (!check_modstruct_version(sechdrs, versindex, mod)) { |
| @@ -1738,6 +1746,11 @@ static struct module *load_module(void __user *umod, | |||
| 1738 | goto arch_cleanup; | 1746 | goto arch_cleanup; |
| 1739 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 1747 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
| 1740 | 1748 | ||
| 1749 | /* Size of section 0 is 0, so this works well if no unwind info. */ | ||
| 1750 | mod->unwind_info = unwind_add_table(mod, | ||
| 1751 | (void *)sechdrs[unwindex].sh_addr, | ||
| 1752 | sechdrs[unwindex].sh_size); | ||
| 1753 | |||
| 1741 | /* Get rid of temporary copy */ | 1754 | /* Get rid of temporary copy */ |
| 1742 | vfree(hdr); | 1755 | vfree(hdr); |
| 1743 | 1756 | ||
| @@ -1836,6 +1849,7 @@ sys_init_module(void __user *umod, | |||
| 1836 | mod->state = MODULE_STATE_LIVE; | 1849 | mod->state = MODULE_STATE_LIVE; |
| 1837 | /* Drop initial reference. */ | 1850 | /* Drop initial reference. */ |
| 1838 | module_put(mod); | 1851 | module_put(mod); |
| 1852 | unwind_remove_table(mod->unwind_info, 1); | ||
| 1839 | module_free(mod, mod->module_init); | 1853 | module_free(mod, mod->module_init); |
| 1840 | mod->module_init = NULL; | 1854 | mod->module_init = NULL; |
| 1841 | mod->init_size = 0; | 1855 | mod->init_size = 0; |
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index f4913c376950..036b6285b15c 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
| @@ -153,13 +153,13 @@ next: | |||
| 153 | continue; | 153 | continue; |
| 154 | count++; | 154 | count++; |
| 155 | cursor = curr->next; | 155 | cursor = curr->next; |
| 156 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 156 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
| 157 | 157 | ||
| 158 | printk("\n#%03d: ", count); | 158 | printk("\n#%03d: ", count); |
| 159 | printk_lock(lock, filter ? 0 : 1); | 159 | printk_lock(lock, filter ? 0 : 1); |
| 160 | goto next; | 160 | goto next; |
| 161 | } | 161 | } |
| 162 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 162 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
| 163 | printk("\n"); | 163 | printk("\n"); |
| 164 | } | 164 | } |
| 165 | 165 | ||
| @@ -316,7 +316,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task) | |||
| 316 | continue; | 316 | continue; |
| 317 | list_del_init(curr); | 317 | list_del_init(curr); |
| 318 | DEBUG_OFF(); | 318 | DEBUG_OFF(); |
| 319 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 319 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
| 320 | 320 | ||
| 321 | printk("BUG: %s/%d, lock held at task exit time!\n", | 321 | printk("BUG: %s/%d, lock held at task exit time!\n", |
| 322 | task->comm, task->pid); | 322 | task->comm, task->pid); |
| @@ -325,7 +325,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task) | |||
| 325 | printk("exiting task is not even the owner??\n"); | 325 | printk("exiting task is not even the owner??\n"); |
| 326 | return; | 326 | return; |
| 327 | } | 327 | } |
| 328 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 328 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
| 329 | } | 329 | } |
| 330 | 330 | ||
| 331 | /* | 331 | /* |
| @@ -352,7 +352,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) | |||
| 352 | continue; | 352 | continue; |
| 353 | list_del_init(curr); | 353 | list_del_init(curr); |
| 354 | DEBUG_OFF(); | 354 | DEBUG_OFF(); |
| 355 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 355 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
| 356 | 356 | ||
| 357 | printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", | 357 | printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", |
| 358 | current->comm, current->pid, lock, from, to); | 358 | current->comm, current->pid, lock, from, to); |
| @@ -362,7 +362,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) | |||
| 362 | printk("freeing task is not even the owner??\n"); | 362 | printk("freeing task is not even the owner??\n"); |
| 363 | return; | 363 | return; |
| 364 | } | 364 | } |
| 365 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 365 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
| 366 | } | 366 | } |
| 367 | 367 | ||
| 368 | /* | 368 | /* |
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index fd384050acb1..a5196c36a5fd 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h | |||
| @@ -46,21 +46,6 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | |||
| 46 | extern void debug_mutex_unlock(struct mutex *lock); | 46 | extern void debug_mutex_unlock(struct mutex *lock); |
| 47 | extern void debug_mutex_init(struct mutex *lock, const char *name); | 47 | extern void debug_mutex_init(struct mutex *lock, const char *name); |
| 48 | 48 | ||
| 49 | #define debug_spin_lock(lock) \ | ||
| 50 | do { \ | ||
| 51 | local_irq_disable(); \ | ||
| 52 | if (debug_mutex_on) \ | ||
| 53 | spin_lock(lock); \ | ||
| 54 | } while (0) | ||
| 55 | |||
| 56 | #define debug_spin_unlock(lock) \ | ||
| 57 | do { \ | ||
| 58 | if (debug_mutex_on) \ | ||
| 59 | spin_unlock(lock); \ | ||
| 60 | local_irq_enable(); \ | ||
| 61 | preempt_check_resched(); \ | ||
| 62 | } while (0) | ||
| 63 | |||
| 64 | #define debug_spin_lock_save(lock, flags) \ | 49 | #define debug_spin_lock_save(lock, flags) \ |
| 65 | do { \ | 50 | do { \ |
| 66 | local_irq_save(flags); \ | 51 | local_irq_save(flags); \ |
| @@ -68,7 +53,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name); | |||
| 68 | spin_lock(lock); \ | 53 | spin_lock(lock); \ |
| 69 | } while (0) | 54 | } while (0) |
| 70 | 55 | ||
| 71 | #define debug_spin_lock_restore(lock, flags) \ | 56 | #define debug_spin_unlock_restore(lock, flags) \ |
| 72 | do { \ | 57 | do { \ |
| 73 | if (debug_mutex_on) \ | 58 | if (debug_mutex_on) \ |
| 74 | spin_unlock(lock); \ | 59 | spin_unlock(lock); \ |
| @@ -76,20 +61,20 @@ extern void debug_mutex_init(struct mutex *lock, const char *name); | |||
| 76 | preempt_check_resched(); \ | 61 | preempt_check_resched(); \ |
| 77 | } while (0) | 62 | } while (0) |
| 78 | 63 | ||
| 79 | #define spin_lock_mutex(lock) \ | 64 | #define spin_lock_mutex(lock, flags) \ |
| 80 | do { \ | 65 | do { \ |
| 81 | struct mutex *l = container_of(lock, struct mutex, wait_lock); \ | 66 | struct mutex *l = container_of(lock, struct mutex, wait_lock); \ |
| 82 | \ | 67 | \ |
| 83 | DEBUG_WARN_ON(in_interrupt()); \ | 68 | DEBUG_WARN_ON(in_interrupt()); \ |
| 84 | debug_spin_lock(&debug_mutex_lock); \ | 69 | debug_spin_lock_save(&debug_mutex_lock, flags); \ |
| 85 | spin_lock(lock); \ | 70 | spin_lock(lock); \ |
| 86 | DEBUG_WARN_ON(l->magic != l); \ | 71 | DEBUG_WARN_ON(l->magic != l); \ |
| 87 | } while (0) | 72 | } while (0) |
| 88 | 73 | ||
| 89 | #define spin_unlock_mutex(lock) \ | 74 | #define spin_unlock_mutex(lock, flags) \ |
| 90 | do { \ | 75 | do { \ |
| 91 | spin_unlock(lock); \ | 76 | spin_unlock(lock); \ |
| 92 | debug_spin_unlock(&debug_mutex_lock); \ | 77 | debug_spin_unlock_restore(&debug_mutex_lock, flags); \ |
| 93 | } while (0) | 78 | } while (0) |
| 94 | 79 | ||
| 95 | #define DEBUG_OFF() \ | 80 | #define DEBUG_OFF() \ |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 5449b210d9ed..7043db21bbce 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
| @@ -125,10 +125,11 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
| 125 | struct task_struct *task = current; | 125 | struct task_struct *task = current; |
| 126 | struct mutex_waiter waiter; | 126 | struct mutex_waiter waiter; |
| 127 | unsigned int old_val; | 127 | unsigned int old_val; |
| 128 | unsigned long flags; | ||
| 128 | 129 | ||
| 129 | debug_mutex_init_waiter(&waiter); | 130 | debug_mutex_init_waiter(&waiter); |
| 130 | 131 | ||
| 131 | spin_lock_mutex(&lock->wait_lock); | 132 | spin_lock_mutex(&lock->wait_lock, flags); |
| 132 | 133 | ||
| 133 | debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); | 134 | debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); |
| 134 | 135 | ||
| @@ -157,7 +158,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
| 157 | if (unlikely(state == TASK_INTERRUPTIBLE && | 158 | if (unlikely(state == TASK_INTERRUPTIBLE && |
| 158 | signal_pending(task))) { | 159 | signal_pending(task))) { |
| 159 | mutex_remove_waiter(lock, &waiter, task->thread_info); | 160 | mutex_remove_waiter(lock, &waiter, task->thread_info); |
| 160 | spin_unlock_mutex(&lock->wait_lock); | 161 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 161 | 162 | ||
| 162 | debug_mutex_free_waiter(&waiter); | 163 | debug_mutex_free_waiter(&waiter); |
| 163 | return -EINTR; | 164 | return -EINTR; |
| @@ -165,9 +166,9 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
| 165 | __set_task_state(task, state); | 166 | __set_task_state(task, state); |
| 166 | 167 | ||
| 167 | /* didnt get the lock, go to sleep: */ | 168 | /* didnt get the lock, go to sleep: */ |
| 168 | spin_unlock_mutex(&lock->wait_lock); | 169 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 169 | schedule(); | 170 | schedule(); |
| 170 | spin_lock_mutex(&lock->wait_lock); | 171 | spin_lock_mutex(&lock->wait_lock, flags); |
| 171 | } | 172 | } |
| 172 | 173 | ||
| 173 | /* got the lock - rejoice! */ | 174 | /* got the lock - rejoice! */ |
| @@ -178,7 +179,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
| 178 | if (likely(list_empty(&lock->wait_list))) | 179 | if (likely(list_empty(&lock->wait_list))) |
| 179 | atomic_set(&lock->count, 0); | 180 | atomic_set(&lock->count, 0); |
| 180 | 181 | ||
| 181 | spin_unlock_mutex(&lock->wait_lock); | 182 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 182 | 183 | ||
| 183 | debug_mutex_free_waiter(&waiter); | 184 | debug_mutex_free_waiter(&waiter); |
| 184 | 185 | ||
| @@ -203,10 +204,11 @@ static fastcall noinline void | |||
| 203 | __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) | 204 | __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) |
| 204 | { | 205 | { |
| 205 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 206 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
| 207 | unsigned long flags; | ||
| 206 | 208 | ||
| 207 | DEBUG_WARN_ON(lock->owner != current_thread_info()); | 209 | DEBUG_WARN_ON(lock->owner != current_thread_info()); |
| 208 | 210 | ||
| 209 | spin_lock_mutex(&lock->wait_lock); | 211 | spin_lock_mutex(&lock->wait_lock, flags); |
| 210 | 212 | ||
| 211 | /* | 213 | /* |
| 212 | * some architectures leave the lock unlocked in the fastpath failure | 214 | * some architectures leave the lock unlocked in the fastpath failure |
| @@ -231,7 +233,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) | |||
| 231 | 233 | ||
| 232 | debug_mutex_clear_owner(lock); | 234 | debug_mutex_clear_owner(lock); |
| 233 | 235 | ||
| 234 | spin_unlock_mutex(&lock->wait_lock); | 236 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 235 | } | 237 | } |
| 236 | 238 | ||
| 237 | /* | 239 | /* |
| @@ -276,9 +278,10 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) | |||
| 276 | static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | 278 | static inline int __mutex_trylock_slowpath(atomic_t *lock_count) |
| 277 | { | 279 | { |
| 278 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 280 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
| 281 | unsigned long flags; | ||
| 279 | int prev; | 282 | int prev; |
| 280 | 283 | ||
| 281 | spin_lock_mutex(&lock->wait_lock); | 284 | spin_lock_mutex(&lock->wait_lock, flags); |
| 282 | 285 | ||
| 283 | prev = atomic_xchg(&lock->count, -1); | 286 | prev = atomic_xchg(&lock->count, -1); |
| 284 | if (likely(prev == 1)) | 287 | if (likely(prev == 1)) |
| @@ -287,7 +290,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | |||
| 287 | if (likely(list_empty(&lock->wait_list))) | 290 | if (likely(list_empty(&lock->wait_list))) |
| 288 | atomic_set(&lock->count, 0); | 291 | atomic_set(&lock->count, 0); |
| 289 | 292 | ||
| 290 | spin_unlock_mutex(&lock->wait_lock); | 293 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 291 | 294 | ||
| 292 | return prev == 1; | 295 | return prev == 1; |
| 293 | } | 296 | } |
diff --git a/kernel/mutex.h b/kernel/mutex.h index 00fe84e7b672..069189947257 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h | |||
| @@ -9,8 +9,10 @@ | |||
| 9 | * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: | 9 | * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: |
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #define spin_lock_mutex(lock) spin_lock(lock) | 12 | #define spin_lock_mutex(lock, flags) \ |
| 13 | #define spin_unlock_mutex(lock) spin_unlock(lock) | 13 | do { spin_lock(lock); (void)(flags); } while (0) |
| 14 | #define spin_unlock_mutex(lock, flags) \ | ||
| 15 | do { spin_unlock(lock); (void)(flags); } while (0) | ||
| 14 | #define mutex_remove_waiter(lock, waiter, ti) \ | 16 | #define mutex_remove_waiter(lock, waiter, ti) \ |
| 15 | __list_del((waiter)->list.prev, (waiter)->list.next) | 17 | __list_del((waiter)->list.prev, (waiter)->list.next) |
| 16 | 18 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 921c22ad16e4..335c5b932e14 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -120,8 +120,18 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
| 120 | 120 | ||
| 121 | static int may_attach(struct task_struct *task) | 121 | static int may_attach(struct task_struct *task) |
| 122 | { | 122 | { |
| 123 | if (!task->mm) | 123 | /* May we inspect the given task? |
| 124 | return -EPERM; | 124 | * This check is used both for attaching with ptrace |
| 125 | * and for allowing access to sensitive information in /proc. | ||
| 126 | * | ||
| 127 | * ptrace_attach denies several cases that /proc allows | ||
| 128 | * because setting up the necessary parent/child relationship | ||
| 129 | * or halting the specified task is impossible. | ||
| 130 | */ | ||
| 131 | int dumpable = 0; | ||
| 132 | /* Don't let security modules deny introspection */ | ||
| 133 | if (task == current) | ||
| 134 | return 0; | ||
| 125 | if (((current->uid != task->euid) || | 135 | if (((current->uid != task->euid) || |
| 126 | (current->uid != task->suid) || | 136 | (current->uid != task->suid) || |
| 127 | (current->uid != task->uid) || | 137 | (current->uid != task->uid) || |
| @@ -130,7 +140,9 @@ static int may_attach(struct task_struct *task) | |||
| 130 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | 140 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) |
| 131 | return -EPERM; | 141 | return -EPERM; |
| 132 | smp_rmb(); | 142 | smp_rmb(); |
| 133 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | 143 | if (task->mm) |
| 144 | dumpable = task->mm->dumpable; | ||
| 145 | if (!dumpable && !capable(CAP_SYS_PTRACE)) | ||
| 134 | return -EPERM; | 146 | return -EPERM; |
| 135 | 147 | ||
| 136 | return security_ptrace(current, task); | 148 | return security_ptrace(current, task); |
| @@ -176,6 +188,8 @@ repeat: | |||
| 176 | goto repeat; | 188 | goto repeat; |
| 177 | } | 189 | } |
| 178 | 190 | ||
| 191 | if (!task->mm) | ||
| 192 | goto bad; | ||
| 179 | /* the same process cannot be attached many times */ | 193 | /* the same process cannot be attached many times */ |
| 180 | if (task->ptrace & PT_PTRACED) | 194 | if (task->ptrace & PT_PTRACED) |
| 181 | goto bad; | 195 | goto bad; |
| @@ -200,7 +214,7 @@ out: | |||
| 200 | return retval; | 214 | return retval; |
| 201 | } | 215 | } |
| 202 | 216 | ||
| 203 | void __ptrace_detach(struct task_struct *child, unsigned int data) | 217 | static inline void __ptrace_detach(struct task_struct *child, unsigned int data) |
| 204 | { | 218 | { |
| 205 | child->exit_code = data; | 219 | child->exit_code = data; |
| 206 | /* .. re-parent .. */ | 220 | /* .. re-parent .. */ |
| @@ -219,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
| 219 | ptrace_disable(child); | 233 | ptrace_disable(child); |
| 220 | 234 | ||
| 221 | write_lock_irq(&tasklist_lock); | 235 | write_lock_irq(&tasklist_lock); |
| 236 | /* protect against de_thread()->release_task() */ | ||
| 222 | if (child->ptrace) | 237 | if (child->ptrace) |
| 223 | __ptrace_detach(child, data); | 238 | __ptrace_detach(child, data); |
| 224 | write_unlock_irq(&tasklist_lock); | 239 | write_unlock_irq(&tasklist_lock); |
diff --git a/kernel/sched.c b/kernel/sched.c index f06d059edef5..a856040c200a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -818,6 +818,11 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) | |||
| 818 | * the target CPU. | 818 | * the target CPU. |
| 819 | */ | 819 | */ |
| 820 | #ifdef CONFIG_SMP | 820 | #ifdef CONFIG_SMP |
| 821 | |||
| 822 | #ifndef tsk_is_polling | ||
| 823 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | ||
| 824 | #endif | ||
| 825 | |||
| 821 | static void resched_task(task_t *p) | 826 | static void resched_task(task_t *p) |
| 822 | { | 827 | { |
| 823 | int cpu; | 828 | int cpu; |
| @@ -833,9 +838,9 @@ static void resched_task(task_t *p) | |||
| 833 | if (cpu == smp_processor_id()) | 838 | if (cpu == smp_processor_id()) |
| 834 | return; | 839 | return; |
| 835 | 840 | ||
| 836 | /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ | 841 | /* NEED_RESCHED must be visible before we test polling */ |
| 837 | smp_mb(); | 842 | smp_mb(); |
| 838 | if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) | 843 | if (!tsk_is_polling(p)) |
| 839 | smp_send_reschedule(cpu); | 844 | smp_send_reschedule(cpu); |
| 840 | } | 845 | } |
| 841 | #else | 846 | #else |
| @@ -4247,7 +4252,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
| 4247 | if (retval) | 4252 | if (retval) |
| 4248 | goto out_unlock; | 4253 | goto out_unlock; |
| 4249 | 4254 | ||
| 4250 | jiffies_to_timespec(p->policy & SCHED_FIFO ? | 4255 | jiffies_to_timespec(p->policy == SCHED_FIFO ? |
| 4251 | 0 : task_timeslice(p), &t); | 4256 | 0 : task_timeslice(p), &t); |
| 4252 | read_unlock(&tasklist_lock); | 4257 | read_unlock(&tasklist_lock); |
| 4253 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 4258 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
diff --git a/kernel/signal.c b/kernel/signal.c index 1b3c921737e2..52adf53929f6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -1531,6 +1531,35 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | |||
| 1531 | spin_unlock_irqrestore(&sighand->siglock, flags); | 1531 | spin_unlock_irqrestore(&sighand->siglock, flags); |
| 1532 | } | 1532 | } |
| 1533 | 1533 | ||
| 1534 | static inline int may_ptrace_stop(void) | ||
| 1535 | { | ||
| 1536 | if (!likely(current->ptrace & PT_PTRACED)) | ||
| 1537 | return 0; | ||
| 1538 | |||
| 1539 | if (unlikely(current->parent == current->real_parent && | ||
| 1540 | (current->ptrace & PT_ATTACHED))) | ||
| 1541 | return 0; | ||
| 1542 | |||
| 1543 | if (unlikely(current->signal == current->parent->signal) && | ||
| 1544 | unlikely(current->signal->flags & SIGNAL_GROUP_EXIT)) | ||
| 1545 | return 0; | ||
| 1546 | |||
| 1547 | /* | ||
| 1548 | * Are we in the middle of do_coredump? | ||
| 1549 | * If so and our tracer is also part of the coredump stopping | ||
| 1550 | * is a deadlock situation, and pointless because our tracer | ||
| 1551 | * is dead so don't allow us to stop. | ||
| 1552 | * If SIGKILL was already sent before the caller unlocked | ||
| 1553 | * ->siglock we must see ->core_waiters != 0. Otherwise it | ||
| 1554 | * is safe to enter schedule(). | ||
| 1555 | */ | ||
| 1556 | if (unlikely(current->mm->core_waiters) && | ||
| 1557 | unlikely(current->mm == current->parent->mm)) | ||
| 1558 | return 0; | ||
| 1559 | |||
| 1560 | return 1; | ||
| 1561 | } | ||
| 1562 | |||
| 1534 | /* | 1563 | /* |
| 1535 | * This must be called with current->sighand->siglock held. | 1564 | * This must be called with current->sighand->siglock held. |
| 1536 | * | 1565 | * |
| @@ -1559,11 +1588,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
| 1559 | spin_unlock_irq(¤t->sighand->siglock); | 1588 | spin_unlock_irq(¤t->sighand->siglock); |
| 1560 | try_to_freeze(); | 1589 | try_to_freeze(); |
| 1561 | read_lock(&tasklist_lock); | 1590 | read_lock(&tasklist_lock); |
| 1562 | if (likely(current->ptrace & PT_PTRACED) && | 1591 | if (may_ptrace_stop()) { |
| 1563 | likely(current->parent != current->real_parent || | ||
| 1564 | !(current->ptrace & PT_ATTACHED)) && | ||
| 1565 | (likely(current->parent->signal != current->signal) || | ||
| 1566 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { | ||
| 1567 | do_notify_parent_cldstop(current, CLD_TRAPPED); | 1592 | do_notify_parent_cldstop(current, CLD_TRAPPED); |
| 1568 | read_unlock(&tasklist_lock); | 1593 | read_unlock(&tasklist_lock); |
| 1569 | schedule(); | 1594 | schedule(); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2c0e65819448..f1a4eb1a655e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -73,6 +73,7 @@ extern int printk_ratelimit_burst; | |||
| 73 | extern int pid_max_min, pid_max_max; | 73 | extern int pid_max_min, pid_max_max; |
| 74 | extern int sysctl_drop_caches; | 74 | extern int sysctl_drop_caches; |
| 75 | extern int percpu_pagelist_fraction; | 75 | extern int percpu_pagelist_fraction; |
| 76 | extern int compat_log; | ||
| 76 | 77 | ||
| 77 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | 78 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
| 78 | int unknown_nmi_panic; | 79 | int unknown_nmi_panic; |
| @@ -677,6 +678,16 @@ static ctl_table kern_table[] = { | |||
| 677 | .proc_handler = &proc_dointvec, | 678 | .proc_handler = &proc_dointvec, |
| 678 | }, | 679 | }, |
| 679 | #endif | 680 | #endif |
| 681 | #ifdef CONFIG_COMPAT | ||
| 682 | { | ||
| 683 | .ctl_name = KERN_COMPAT_LOG, | ||
| 684 | .procname = "compat-log", | ||
| 685 | .data = &compat_log, | ||
| 686 | .maxlen = sizeof (int), | ||
| 687 | .mode = 0644, | ||
| 688 | .proc_handler = &proc_dointvec, | ||
| 689 | }, | ||
| 690 | #endif | ||
| 680 | { .ctl_name = 0 } | 691 | { .ctl_name = 0 } |
| 681 | }; | 692 | }; |
| 682 | 693 | ||
diff --git a/kernel/time.c b/kernel/time.c index b00ddc71cedb..5bd489747643 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -523,6 +523,7 @@ EXPORT_SYMBOL(do_gettimeofday); | |||
| 523 | 523 | ||
| 524 | 524 | ||
| 525 | #else | 525 | #else |
| 526 | #ifndef CONFIG_GENERIC_TIME | ||
| 526 | /* | 527 | /* |
| 527 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval | 528 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval |
| 528 | * and therefore only yields usec accuracy | 529 | * and therefore only yields usec accuracy |
| @@ -537,6 +538,7 @@ void getnstimeofday(struct timespec *tv) | |||
| 537 | } | 538 | } |
| 538 | EXPORT_SYMBOL_GPL(getnstimeofday); | 539 | EXPORT_SYMBOL_GPL(getnstimeofday); |
| 539 | #endif | 540 | #endif |
| 541 | #endif | ||
| 540 | 542 | ||
| 541 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. | 543 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. |
| 542 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 | 544 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile new file mode 100644 index 000000000000..e1dfd8e86cce --- /dev/null +++ b/kernel/time/Makefile | |||
| @@ -0,0 +1 @@ | |||
| obj-y += clocksource.o jiffies.o | |||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c new file mode 100644 index 000000000000..74eca5939bd9 --- /dev/null +++ b/kernel/time/clocksource.c | |||
| @@ -0,0 +1,349 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/time/clocksource.c | ||
| 3 | * | ||
| 4 | * This file contains the functions which manage clocksource drivers. | ||
| 5 | * | ||
| 6 | * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | * GNU General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public License | ||
| 19 | * along with this program; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 21 | * | ||
| 22 | * TODO WishList: | ||
| 23 | * o Allow clocksource drivers to be unregistered | ||
| 24 | * o get rid of clocksource_jiffies extern | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/clocksource.h> | ||
| 28 | #include <linux/sysdev.h> | ||
| 29 | #include <linux/init.h> | ||
| 30 | #include <linux/module.h> | ||
| 31 | |||
| 32 | /* XXX - Would like a better way for initializing curr_clocksource */ | ||
| 33 | extern struct clocksource clocksource_jiffies; | ||
| 34 | |||
| 35 | /*[Clocksource internal variables]--------- | ||
| 36 | * curr_clocksource: | ||
| 37 | * currently selected clocksource. Initialized to clocksource_jiffies. | ||
| 38 | * next_clocksource: | ||
| 39 | * pending next selected clocksource. | ||
| 40 | * clocksource_list: | ||
| 41 | * linked list with the registered clocksources | ||
| 42 | * clocksource_lock: | ||
| 43 | * protects manipulations to curr_clocksource and next_clocksource | ||
| 44 | * and the clocksource_list | ||
| 45 | * override_name: | ||
| 46 | * Name of the user-specified clocksource. | ||
| 47 | */ | ||
| 48 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | ||
| 49 | static struct clocksource *next_clocksource; | ||
| 50 | static LIST_HEAD(clocksource_list); | ||
| 51 | static DEFINE_SPINLOCK(clocksource_lock); | ||
| 52 | static char override_name[32]; | ||
| 53 | static int finished_booting; | ||
| 54 | |||
| 55 | /* clocksource_done_booting - Called near the end of bootup | ||
| 56 | * | ||
| 57 | * Hack to avoid lots of clocksource churn at boot time | ||
| 58 | */ | ||
| 59 | static int __init clocksource_done_booting(void) | ||
| 60 | { | ||
| 61 | finished_booting = 1; | ||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | late_initcall(clocksource_done_booting); | ||
| 66 | |||
| 67 | /** | ||
| 68 | * clocksource_get_next - Returns the selected clocksource | ||
| 69 | * | ||
| 70 | */ | ||
| 71 | struct clocksource *clocksource_get_next(void) | ||
| 72 | { | ||
| 73 | unsigned long flags; | ||
| 74 | |||
| 75 | spin_lock_irqsave(&clocksource_lock, flags); | ||
| 76 | if (next_clocksource && finished_booting) { | ||
| 77 | curr_clocksource = next_clocksource; | ||
| 78 | next_clocksource = NULL; | ||
| 79 | } | ||
| 80 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 81 | |||
| 82 | return curr_clocksource; | ||
| 83 | } | ||
| 84 | |||
| 85 | /** | ||
| 86 | * select_clocksource - Finds the best registered clocksource. | ||
| 87 | * | ||
| 88 | * Private function. Must hold clocksource_lock when called. | ||
| 89 | * | ||
| 90 | * Looks through the list of registered clocksources, returning | ||
| 91 | * the one with the highest rating value. If there is a clocksource | ||
| 92 | * name that matches the override string, it returns that clocksource. | ||
| 93 | */ | ||
| 94 | static struct clocksource *select_clocksource(void) | ||
| 95 | { | ||
| 96 | struct clocksource *best = NULL; | ||
| 97 | struct list_head *tmp; | ||
| 98 | |||
| 99 | list_for_each(tmp, &clocksource_list) { | ||
| 100 | struct clocksource *src; | ||
| 101 | |||
| 102 | src = list_entry(tmp, struct clocksource, list); | ||
| 103 | if (!best) | ||
| 104 | best = src; | ||
| 105 | |||
| 106 | /* check for override: */ | ||
| 107 | if (strlen(src->name) == strlen(override_name) && | ||
| 108 | !strcmp(src->name, override_name)) { | ||
| 109 | best = src; | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | /* pick the highest rating: */ | ||
| 113 | if (src->rating > best->rating) | ||
| 114 | best = src; | ||
| 115 | } | ||
| 116 | |||
| 117 | return best; | ||
| 118 | } | ||
| 119 | |||
| 120 | /** | ||
| 121 | * is_registered_source - Checks if clocksource is registered | ||
| 122 | * @c: pointer to a clocksource | ||
| 123 | * | ||
| 124 | * Private helper function. Must hold clocksource_lock when called. | ||
| 125 | * | ||
| 126 | * Returns one if the clocksource is already registered, zero otherwise. | ||
| 127 | */ | ||
| 128 | static int is_registered_source(struct clocksource *c) | ||
| 129 | { | ||
| 130 | int len = strlen(c->name); | ||
| 131 | struct list_head *tmp; | ||
| 132 | |||
| 133 | list_for_each(tmp, &clocksource_list) { | ||
| 134 | struct clocksource *src; | ||
| 135 | |||
| 136 | src = list_entry(tmp, struct clocksource, list); | ||
| 137 | if (strlen(src->name) == len && !strcmp(src->name, c->name)) | ||
| 138 | return 1; | ||
| 139 | } | ||
| 140 | |||
| 141 | return 0; | ||
| 142 | } | ||
| 143 | |||
| 144 | /** | ||
| 145 | * clocksource_register - Used to install new clocksources | ||
| 146 | * @t: clocksource to be registered | ||
| 147 | * | ||
| 148 | * Returns -EBUSY if registration fails, zero otherwise. | ||
| 149 | */ | ||
| 150 | int clocksource_register(struct clocksource *c) | ||
| 151 | { | ||
| 152 | int ret = 0; | ||
| 153 | unsigned long flags; | ||
| 154 | |||
| 155 | spin_lock_irqsave(&clocksource_lock, flags); | ||
| 156 | /* check if clocksource is already registered */ | ||
| 157 | if (is_registered_source(c)) { | ||
| 158 | printk("register_clocksource: Cannot register %s. " | ||
| 159 | "Already registered!", c->name); | ||
| 160 | ret = -EBUSY; | ||
| 161 | } else { | ||
| 162 | /* register it */ | ||
| 163 | list_add(&c->list, &clocksource_list); | ||
| 164 | /* scan the registered clocksources, and pick the best one */ | ||
| 165 | next_clocksource = select_clocksource(); | ||
| 166 | } | ||
| 167 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 168 | return ret; | ||
| 169 | } | ||
| 170 | EXPORT_SYMBOL(clocksource_register); | ||
| 171 | |||
| 172 | /** | ||
| 173 | * clocksource_reselect - Rescan list for next clocksource | ||
| 174 | * | ||
| 175 | * A quick helper function to be used if a clocksource changes its | ||
| 176 | * rating. Forces the clocksource list to be re-scanned for the best | ||
| 177 | * clocksource. | ||
| 178 | */ | ||
| 179 | void clocksource_reselect(void) | ||
| 180 | { | ||
| 181 | unsigned long flags; | ||
| 182 | |||
| 183 | spin_lock_irqsave(&clocksource_lock, flags); | ||
| 184 | next_clocksource = select_clocksource(); | ||
| 185 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 186 | } | ||
| 187 | EXPORT_SYMBOL(clocksource_reselect); | ||
| 188 | |||
| 189 | /** | ||
| 190 | * sysfs_show_current_clocksources - sysfs interface for current clocksource | ||
| 191 | * @dev: unused | ||
| 192 | * @buf: char buffer to be filled with clocksource list | ||
| 193 | * | ||
| 194 | * Provides sysfs interface for listing current clocksource. | ||
| 195 | */ | ||
| 196 | static ssize_t | ||
| 197 | sysfs_show_current_clocksources(struct sys_device *dev, char *buf) | ||
| 198 | { | ||
| 199 | char *curr = buf; | ||
| 200 | |||
| 201 | spin_lock_irq(&clocksource_lock); | ||
| 202 | curr += sprintf(curr, "%s ", curr_clocksource->name); | ||
| 203 | spin_unlock_irq(&clocksource_lock); | ||
| 204 | |||
| 205 | curr += sprintf(curr, "\n"); | ||
| 206 | |||
| 207 | return curr - buf; | ||
| 208 | } | ||
| 209 | |||
| 210 | /** | ||
| 211 | * sysfs_override_clocksource - interface for manually overriding clocksource | ||
| 212 | * @dev: unused | ||
| 213 | * @buf: name of override clocksource | ||
| 214 | * @count: length of buffer | ||
| 215 | * | ||
| 216 | * Takes input from sysfs interface for manually overriding the default | ||
| 217 | * clocksource selction. | ||
| 218 | */ | ||
| 219 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | ||
| 220 | const char *buf, size_t count) | ||
| 221 | { | ||
| 222 | size_t ret = count; | ||
| 223 | /* strings from sysfs write are not 0 terminated! */ | ||
| 224 | if (count >= sizeof(override_name)) | ||
| 225 | return -EINVAL; | ||
| 226 | |||
| 227 | /* strip of \n: */ | ||
| 228 | if (buf[count-1] == '\n') | ||
| 229 | count--; | ||
| 230 | if (count < 1) | ||
| 231 | return -EINVAL; | ||
| 232 | |||
| 233 | spin_lock_irq(&clocksource_lock); | ||
| 234 | |||
| 235 | /* copy the name given: */ | ||
| 236 | memcpy(override_name, buf, count); | ||
| 237 | override_name[count] = 0; | ||
| 238 | |||
| 239 | /* try to select it: */ | ||
| 240 | next_clocksource = select_clocksource(); | ||
| 241 | |||
| 242 | spin_unlock_irq(&clocksource_lock); | ||
| 243 | |||
| 244 | return ret; | ||
| 245 | } | ||
| 246 | |||
| 247 | /** | ||
| 248 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource | ||
| 249 | * @dev: unused | ||
| 250 | * @buf: char buffer to be filled with clocksource list | ||
| 251 | * | ||
| 252 | * Provides sysfs interface for listing registered clocksources | ||
| 253 | */ | ||
| 254 | static ssize_t | ||
| 255 | sysfs_show_available_clocksources(struct sys_device *dev, char *buf) | ||
| 256 | { | ||
| 257 | struct list_head *tmp; | ||
| 258 | char *curr = buf; | ||
| 259 | |||
| 260 | spin_lock_irq(&clocksource_lock); | ||
| 261 | list_for_each(tmp, &clocksource_list) { | ||
| 262 | struct clocksource *src; | ||
| 263 | |||
| 264 | src = list_entry(tmp, struct clocksource, list); | ||
| 265 | curr += sprintf(curr, "%s ", src->name); | ||
| 266 | } | ||
| 267 | spin_unlock_irq(&clocksource_lock); | ||
| 268 | |||
| 269 | curr += sprintf(curr, "\n"); | ||
| 270 | |||
| 271 | return curr - buf; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * Sysfs setup bits: | ||
| 276 | */ | ||
| 277 | static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources, | ||
| 278 | sysfs_override_clocksource); | ||
| 279 | |||
| 280 | static SYSDEV_ATTR(available_clocksource, 0600, | ||
| 281 | sysfs_show_available_clocksources, NULL); | ||
| 282 | |||
| 283 | static struct sysdev_class clocksource_sysclass = { | ||
| 284 | set_kset_name("clocksource"), | ||
| 285 | }; | ||
| 286 | |||
| 287 | static struct sys_device device_clocksource = { | ||
| 288 | .id = 0, | ||
| 289 | .cls = &clocksource_sysclass, | ||
| 290 | }; | ||
| 291 | |||
| 292 | static int __init init_clocksource_sysfs(void) | ||
| 293 | { | ||
| 294 | int error = sysdev_class_register(&clocksource_sysclass); | ||
| 295 | |||
| 296 | if (!error) | ||
| 297 | error = sysdev_register(&device_clocksource); | ||
| 298 | if (!error) | ||
| 299 | error = sysdev_create_file( | ||
| 300 | &device_clocksource, | ||
| 301 | &attr_current_clocksource); | ||
| 302 | if (!error) | ||
| 303 | error = sysdev_create_file( | ||
| 304 | &device_clocksource, | ||
| 305 | &attr_available_clocksource); | ||
| 306 | return error; | ||
| 307 | } | ||
| 308 | |||
| 309 | device_initcall(init_clocksource_sysfs); | ||
| 310 | |||
| 311 | /** | ||
| 312 | * boot_override_clocksource - boot clock override | ||
| 313 | * @str: override name | ||
| 314 | * | ||
| 315 | * Takes a clocksource= boot argument and uses it | ||
| 316 | * as the clocksource override name. | ||
| 317 | */ | ||
| 318 | static int __init boot_override_clocksource(char* str) | ||
| 319 | { | ||
| 320 | unsigned long flags; | ||
| 321 | spin_lock_irqsave(&clocksource_lock, flags); | ||
| 322 | if (str) | ||
| 323 | strlcpy(override_name, str, sizeof(override_name)); | ||
| 324 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 325 | return 1; | ||
| 326 | } | ||
| 327 | |||
| 328 | __setup("clocksource=", boot_override_clocksource); | ||
| 329 | |||
| 330 | /** | ||
| 331 | * boot_override_clock - Compatibility layer for deprecated boot option | ||
| 332 | * @str: override name | ||
| 333 | * | ||
| 334 | * DEPRECATED! Takes a clock= boot argument and uses it | ||
| 335 | * as the clocksource override name | ||
| 336 | */ | ||
| 337 | static int __init boot_override_clock(char* str) | ||
| 338 | { | ||
| 339 | if (!strcmp(str, "pmtmr")) { | ||
| 340 | printk("Warning: clock=pmtmr is deprecated. " | ||
| 341 | "Use clocksource=acpi_pm.\n"); | ||
| 342 | return boot_override_clocksource("acpi_pm"); | ||
| 343 | } | ||
| 344 | printk("Warning! clock= boot option is deprecated. " | ||
| 345 | "Use clocksource=xyz\n"); | ||
| 346 | return boot_override_clocksource(str); | ||
| 347 | } | ||
| 348 | |||
| 349 | __setup("clock=", boot_override_clock); | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c new file mode 100644 index 000000000000..126bb30c4afe --- /dev/null +++ b/kernel/time/jiffies.c | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /*********************************************************************** | ||
| 2 | * linux/kernel/time/jiffies.c | ||
| 3 | * | ||
| 4 | * This file contains the jiffies based clocksource. | ||
| 5 | * | ||
| 6 | * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | * GNU General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public License | ||
| 19 | * along with this program; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 21 | * | ||
| 22 | ************************************************************************/ | ||
| 23 | #include <linux/clocksource.h> | ||
| 24 | #include <linux/jiffies.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | |||
| 27 | /* The Jiffies based clocksource is the lowest common | ||
| 28 | * denominator clock source which should function on | ||
| 29 | * all systems. It has the same coarse resolution as | ||
| 30 | * the timer interrupt frequency HZ and it suffers | ||
| 31 | * inaccuracies caused by missed or lost timer | ||
| 32 | * interrupts and the inability for the timer | ||
| 33 | * interrupt hardware to accuratly tick at the | ||
| 34 | * requested HZ value. It is also not reccomended | ||
| 35 | * for "tick-less" systems. | ||
| 36 | */ | ||
| 37 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) | ||
| 38 | |||
| 39 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier | ||
| 40 | * conversion, the .shift value could be zero. However | ||
| 41 | * this would make NTP adjustments impossible as they are | ||
| 42 | * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to | ||
| 43 | * shift both the nominator and denominator the same | ||
| 44 | * amount, and give ntp adjustments in units of 1/2^8 | ||
| 45 | * | ||
| 46 | * The value 8 is somewhat carefully chosen, as anything | ||
| 47 | * larger can result in overflows. NSEC_PER_JIFFY grows as | ||
| 48 | * HZ shrinks, so values greater then 8 overflow 32bits when | ||
| 49 | * HZ=100. | ||
| 50 | */ | ||
| 51 | #define JIFFIES_SHIFT 8 | ||
| 52 | |||
| 53 | static cycle_t jiffies_read(void) | ||
| 54 | { | ||
| 55 | return (cycle_t) jiffies; | ||
| 56 | } | ||
| 57 | |||
| 58 | struct clocksource clocksource_jiffies = { | ||
| 59 | .name = "jiffies", | ||
| 60 | .rating = 0, /* lowest rating*/ | ||
| 61 | .read = jiffies_read, | ||
| 62 | .mask = 0xffffffff, /*32bits*/ | ||
| 63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | ||
| 64 | .shift = JIFFIES_SHIFT, | ||
| 65 | .is_continuous = 0, /* tick based, not free running */ | ||
| 66 | }; | ||
| 67 | |||
| 68 | static int __init init_jiffies_clocksource(void) | ||
| 69 | { | ||
| 70 | return clocksource_register(&clocksource_jiffies); | ||
| 71 | } | ||
| 72 | |||
| 73 | module_init(init_jiffies_clocksource); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index eb97371b87d8..5bb6b7976eec 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -597,7 +597,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ | |||
| 597 | long time_precision = 1; /* clock precision (us) */ | 597 | long time_precision = 1; /* clock precision (us) */ |
| 598 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | 598 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ |
| 599 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | 599 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ |
| 600 | static long time_phase; /* phase offset (scaled us) */ | ||
| 601 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; | 600 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; |
| 602 | /* frequency offset (scaled ppm)*/ | 601 | /* frequency offset (scaled ppm)*/ |
| 603 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ | 602 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ |
| @@ -747,27 +746,14 @@ static long adjtime_adjustment(void) | |||
| 747 | } | 746 | } |
| 748 | 747 | ||
| 749 | /* in the NTP reference this is called "hardclock()" */ | 748 | /* in the NTP reference this is called "hardclock()" */ |
| 750 | static void update_wall_time_one_tick(void) | 749 | static void update_ntp_one_tick(void) |
| 751 | { | 750 | { |
| 752 | long time_adjust_step, delta_nsec; | 751 | long time_adjust_step; |
| 753 | 752 | ||
| 754 | time_adjust_step = adjtime_adjustment(); | 753 | time_adjust_step = adjtime_adjustment(); |
| 755 | if (time_adjust_step) | 754 | if (time_adjust_step) |
| 756 | /* Reduce by this step the amount of time left */ | 755 | /* Reduce by this step the amount of time left */ |
| 757 | time_adjust -= time_adjust_step; | 756 | time_adjust -= time_adjust_step; |
| 758 | delta_nsec = tick_nsec + time_adjust_step * 1000; | ||
| 759 | /* | ||
| 760 | * Advance the phase, once it gets to one microsecond, then | ||
| 761 | * advance the tick more. | ||
| 762 | */ | ||
| 763 | time_phase += time_adj; | ||
| 764 | if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) { | ||
| 765 | long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10)); | ||
| 766 | time_phase -= ltemp << (SHIFT_SCALE - 10); | ||
| 767 | delta_nsec += ltemp; | ||
| 768 | } | ||
| 769 | xtime.tv_nsec += delta_nsec; | ||
| 770 | time_interpolator_update(delta_nsec); | ||
| 771 | 757 | ||
| 772 | /* Changes by adjtime() do not take effect till next tick. */ | 758 | /* Changes by adjtime() do not take effect till next tick. */ |
| 773 | if (time_next_adjust != 0) { | 759 | if (time_next_adjust != 0) { |
| @@ -780,36 +766,378 @@ static void update_wall_time_one_tick(void) | |||
| 780 | * Return how long ticks are at the moment, that is, how much time | 766 | * Return how long ticks are at the moment, that is, how much time |
| 781 | * update_wall_time_one_tick will add to xtime next time we call it | 767 | * update_wall_time_one_tick will add to xtime next time we call it |
| 782 | * (assuming no calls to do_adjtimex in the meantime). | 768 | * (assuming no calls to do_adjtimex in the meantime). |
| 783 | * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 | 769 | * The return value is in fixed-point nanoseconds shifted by the |
| 784 | * bits to the right of the binary point. | 770 | * specified number of bits to the right of the binary point. |
| 785 | * This function has no side-effects. | 771 | * This function has no side-effects. |
| 786 | */ | 772 | */ |
| 787 | u64 current_tick_length(void) | 773 | u64 current_tick_length(void) |
| 788 | { | 774 | { |
| 789 | long delta_nsec; | 775 | long delta_nsec; |
| 776 | u64 ret; | ||
| 790 | 777 | ||
| 778 | /* calculate the finest interval NTP will allow. | ||
| 779 | * ie: nanosecond value shifted by (SHIFT_SCALE - 10) | ||
| 780 | */ | ||
| 791 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; | 781 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; |
| 792 | return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; | 782 | ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; |
| 783 | ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); | ||
| 784 | |||
| 785 | return ret; | ||
| 793 | } | 786 | } |
| 794 | 787 | ||
| 795 | /* | 788 | /* XXX - all of this timekeeping code should be later moved to time.c */ |
| 796 | * Using a loop looks inefficient, but "ticks" is | 789 | #include <linux/clocksource.h> |
| 797 | * usually just one (we shouldn't be losing ticks, | 790 | static struct clocksource *clock; /* pointer to current clocksource */ |
| 798 | * we're doing this this way mainly for interrupt | 791 | |
| 799 | * latency reasons, not because we think we'll | 792 | #ifdef CONFIG_GENERIC_TIME |
| 800 | * have lots of lost timer ticks | 793 | /** |
| 794 | * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook | ||
| 795 | * | ||
| 796 | * private function, must hold xtime_lock lock when being | ||
| 797 | * called. Returns the number of nanoseconds since the | ||
| 798 | * last call to update_wall_time() (adjusted by NTP scaling) | ||
| 799 | */ | ||
| 800 | static inline s64 __get_nsec_offset(void) | ||
| 801 | { | ||
| 802 | cycle_t cycle_now, cycle_delta; | ||
| 803 | s64 ns_offset; | ||
| 804 | |||
| 805 | /* read clocksource: */ | ||
| 806 | cycle_now = clocksource_read(clock); | ||
| 807 | |||
| 808 | /* calculate the delta since the last update_wall_time: */ | ||
| 809 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
| 810 | |||
| 811 | /* convert to nanoseconds: */ | ||
| 812 | ns_offset = cyc2ns(clock, cycle_delta); | ||
| 813 | |||
| 814 | return ns_offset; | ||
| 815 | } | ||
| 816 | |||
| 817 | /** | ||
| 818 | * __get_realtime_clock_ts - Returns the time of day in a timespec | ||
| 819 | * @ts: pointer to the timespec to be set | ||
| 820 | * | ||
| 821 | * Returns the time of day in a timespec. Used by | ||
| 822 | * do_gettimeofday() and get_realtime_clock_ts(). | ||
| 801 | */ | 823 | */ |
| 802 | static void update_wall_time(unsigned long ticks) | 824 | static inline void __get_realtime_clock_ts(struct timespec *ts) |
| 803 | { | 825 | { |
| 826 | unsigned long seq; | ||
| 827 | s64 nsecs; | ||
| 828 | |||
| 829 | do { | ||
| 830 | seq = read_seqbegin(&xtime_lock); | ||
| 831 | |||
| 832 | *ts = xtime; | ||
| 833 | nsecs = __get_nsec_offset(); | ||
| 834 | |||
| 835 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 836 | |||
| 837 | timespec_add_ns(ts, nsecs); | ||
| 838 | } | ||
| 839 | |||
| 840 | /** | ||
| 841 | * getnstimeofday - Returns the time of day in a timespec | ||
| 842 | * @ts: pointer to the timespec to be set | ||
| 843 | * | ||
| 844 | * Returns the time of day in a timespec. | ||
| 845 | */ | ||
| 846 | void getnstimeofday(struct timespec *ts) | ||
| 847 | { | ||
| 848 | __get_realtime_clock_ts(ts); | ||
| 849 | } | ||
| 850 | |||
| 851 | EXPORT_SYMBOL(getnstimeofday); | ||
| 852 | |||
| 853 | /** | ||
| 854 | * do_gettimeofday - Returns the time of day in a timeval | ||
| 855 | * @tv: pointer to the timeval to be set | ||
| 856 | * | ||
| 857 | * NOTE: Users should be converted to using get_realtime_clock_ts() | ||
| 858 | */ | ||
| 859 | void do_gettimeofday(struct timeval *tv) | ||
| 860 | { | ||
| 861 | struct timespec now; | ||
| 862 | |||
| 863 | __get_realtime_clock_ts(&now); | ||
| 864 | tv->tv_sec = now.tv_sec; | ||
| 865 | tv->tv_usec = now.tv_nsec/1000; | ||
| 866 | } | ||
| 867 | |||
| 868 | EXPORT_SYMBOL(do_gettimeofday); | ||
| 869 | /** | ||
| 870 | * do_settimeofday - Sets the time of day | ||
| 871 | * @tv: pointer to the timespec variable containing the new time | ||
| 872 | * | ||
| 873 | * Sets the time of day to the new time and update NTP and notify hrtimers | ||
| 874 | */ | ||
| 875 | int do_settimeofday(struct timespec *tv) | ||
| 876 | { | ||
| 877 | unsigned long flags; | ||
| 878 | time_t wtm_sec, sec = tv->tv_sec; | ||
| 879 | long wtm_nsec, nsec = tv->tv_nsec; | ||
| 880 | |||
| 881 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
| 882 | return -EINVAL; | ||
| 883 | |||
| 884 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 885 | |||
| 886 | nsec -= __get_nsec_offset(); | ||
| 887 | |||
| 888 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
| 889 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
| 890 | |||
| 891 | set_normalized_timespec(&xtime, sec, nsec); | ||
| 892 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
| 893 | |||
| 894 | ntp_clear(); | ||
| 895 | |||
| 896 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 897 | |||
| 898 | /* signal hrtimers about time change */ | ||
| 899 | clock_was_set(); | ||
| 900 | |||
| 901 | return 0; | ||
| 902 | } | ||
| 903 | |||
| 904 | EXPORT_SYMBOL(do_settimeofday); | ||
| 905 | |||
| 906 | /** | ||
| 907 | * change_clocksource - Swaps clocksources if a new one is available | ||
| 908 | * | ||
| 909 | * Accumulates current time interval and initializes new clocksource | ||
| 910 | */ | ||
| 911 | static int change_clocksource(void) | ||
| 912 | { | ||
| 913 | struct clocksource *new; | ||
| 914 | cycle_t now; | ||
| 915 | u64 nsec; | ||
| 916 | new = clocksource_get_next(); | ||
| 917 | if (clock != new) { | ||
| 918 | now = clocksource_read(new); | ||
| 919 | nsec = __get_nsec_offset(); | ||
| 920 | timespec_add_ns(&xtime, nsec); | ||
| 921 | |||
| 922 | clock = new; | ||
| 923 | clock->cycle_last = now; | ||
| 924 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | ||
| 925 | clock->name); | ||
| 926 | return 1; | ||
| 927 | } else if (clock->update_callback) { | ||
| 928 | return clock->update_callback(); | ||
| 929 | } | ||
| 930 | return 0; | ||
| 931 | } | ||
| 932 | #else | ||
| 933 | #define change_clocksource() (0) | ||
| 934 | #endif | ||
| 935 | |||
| 936 | /** | ||
| 937 | * timeofday_is_continuous - check to see if timekeeping is free running | ||
| 938 | */ | ||
| 939 | int timekeeping_is_continuous(void) | ||
| 940 | { | ||
| 941 | unsigned long seq; | ||
| 942 | int ret; | ||
| 943 | |||
| 804 | do { | 944 | do { |
| 805 | ticks--; | 945 | seq = read_seqbegin(&xtime_lock); |
| 806 | update_wall_time_one_tick(); | 946 | |
| 807 | if (xtime.tv_nsec >= 1000000000) { | 947 | ret = clock->is_continuous; |
| 808 | xtime.tv_nsec -= 1000000000; | 948 | |
| 949 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 950 | |||
| 951 | return ret; | ||
| 952 | } | ||
| 953 | |||
| 954 | /* | ||
| 955 | * timekeeping_init - Initializes the clocksource and common timekeeping values | ||
| 956 | */ | ||
| 957 | void __init timekeeping_init(void) | ||
| 958 | { | ||
| 959 | unsigned long flags; | ||
| 960 | |||
| 961 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 962 | clock = clocksource_get_next(); | ||
| 963 | clocksource_calculate_interval(clock, tick_nsec); | ||
| 964 | clock->cycle_last = clocksource_read(clock); | ||
| 965 | ntp_clear(); | ||
| 966 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 967 | } | ||
| 968 | |||
| 969 | |||
| 970 | /* | ||
| 971 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | ||
| 972 | * @dev: unused | ||
| 973 | * | ||
| 974 | * This is for the generic clocksource timekeeping. | ||
| 975 | * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are | ||
| 976 | * still managed by arch specific suspend/resume code. | ||
| 977 | */ | ||
| 978 | static int timekeeping_resume(struct sys_device *dev) | ||
| 979 | { | ||
| 980 | unsigned long flags; | ||
| 981 | |||
| 982 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 983 | /* restart the last cycle value */ | ||
| 984 | clock->cycle_last = clocksource_read(clock); | ||
| 985 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 986 | return 0; | ||
| 987 | } | ||
| 988 | |||
| 989 | /* sysfs resume/suspend bits for timekeeping */ | ||
| 990 | static struct sysdev_class timekeeping_sysclass = { | ||
| 991 | .resume = timekeeping_resume, | ||
| 992 | set_kset_name("timekeeping"), | ||
| 993 | }; | ||
| 994 | |||
| 995 | static struct sys_device device_timer = { | ||
| 996 | .id = 0, | ||
| 997 | .cls = &timekeeping_sysclass, | ||
| 998 | }; | ||
| 999 | |||
| 1000 | static int __init timekeeping_init_device(void) | ||
| 1001 | { | ||
| 1002 | int error = sysdev_class_register(&timekeeping_sysclass); | ||
| 1003 | if (!error) | ||
| 1004 | error = sysdev_register(&device_timer); | ||
| 1005 | return error; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | device_initcall(timekeeping_init_device); | ||
| 1009 | |||
| 1010 | /* | ||
| 1011 | * If the error is already larger, we look ahead another tick, | ||
| 1012 | * to compensate for late or lost adjustments. | ||
| 1013 | */ | ||
| 1014 | static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset) | ||
| 1015 | { | ||
| 1016 | int adj; | ||
| 1017 | |||
| 1018 | /* | ||
| 1019 | * As soon as the machine is synchronized to the external time | ||
| 1020 | * source this should be the common case. | ||
| 1021 | */ | ||
| 1022 | error >>= 2; | ||
| 1023 | if (likely(sign > 0 ? error <= *interval : error >= *interval)) | ||
| 1024 | return sign; | ||
| 1025 | |||
| 1026 | /* | ||
| 1027 | * An extra look ahead dampens the effect of the current error, | ||
| 1028 | * which can grow quite large with continously late updates, as | ||
| 1029 | * it would dominate the adjustment value and can lead to | ||
| 1030 | * oscillation. | ||
| 1031 | */ | ||
| 1032 | error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); | ||
| 1033 | error -= clock->xtime_interval >> 1; | ||
| 1034 | |||
| 1035 | adj = 0; | ||
| 1036 | while (1) { | ||
| 1037 | error >>= 1; | ||
| 1038 | if (sign > 0 ? error <= *interval : error >= *interval) | ||
| 1039 | break; | ||
| 1040 | adj++; | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | /* | ||
| 1044 | * Add the current adjustments to the error and take the offset | ||
| 1045 | * into account, the latter can cause the error to be hardly | ||
| 1046 | * reduced at the next tick. Check the error again if there's | ||
| 1047 | * room for another adjustment, thus further reducing the error | ||
| 1048 | * which otherwise had to be corrected at the next update. | ||
| 1049 | */ | ||
| 1050 | error = (error << 1) - *interval + *offset; | ||
| 1051 | if (sign > 0 ? error > *interval : error < *interval) | ||
| 1052 | adj++; | ||
| 1053 | |||
| 1054 | *interval <<= adj; | ||
| 1055 | *offset <<= adj; | ||
| 1056 | return sign << adj; | ||
| 1057 | } | ||
| 1058 | |||
| 1059 | /* | ||
| 1060 | * Adjust the multiplier to reduce the error value, | ||
| 1061 | * this is optimized for the most common adjustments of -1,0,1, | ||
| 1062 | * for other values we can do a bit more work. | ||
| 1063 | */ | ||
| 1064 | static void clocksource_adjust(struct clocksource *clock, s64 offset) | ||
| 1065 | { | ||
| 1066 | s64 error, interval = clock->cycle_interval; | ||
| 1067 | int adj; | ||
| 1068 | |||
| 1069 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); | ||
| 1070 | if (error > interval) { | ||
| 1071 | adj = clocksource_bigadjust(1, error, &interval, &offset); | ||
| 1072 | } else if (error < -interval) { | ||
| 1073 | interval = -interval; | ||
| 1074 | offset = -offset; | ||
| 1075 | adj = clocksource_bigadjust(-1, error, &interval, &offset); | ||
| 1076 | } else | ||
| 1077 | return; | ||
| 1078 | |||
| 1079 | clock->mult += adj; | ||
| 1080 | clock->xtime_interval += interval; | ||
| 1081 | clock->xtime_nsec -= offset; | ||
| 1082 | clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | /* | ||
| 1086 | * update_wall_time - Uses the current clocksource to increment the wall time | ||
| 1087 | * | ||
| 1088 | * Called from the timer interrupt, must hold a write on xtime_lock. | ||
| 1089 | */ | ||
| 1090 | static void update_wall_time(void) | ||
| 1091 | { | ||
| 1092 | cycle_t offset; | ||
| 1093 | |||
| 1094 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
| 1095 | |||
| 1096 | #ifdef CONFIG_GENERIC_TIME | ||
| 1097 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | ||
| 1098 | #else | ||
| 1099 | offset = clock->cycle_interval; | ||
| 1100 | #endif | ||
| 1101 | |||
| 1102 | /* normally this loop will run just once, however in the | ||
| 1103 | * case of lost or late ticks, it will accumulate correctly. | ||
| 1104 | */ | ||
| 1105 | while (offset >= clock->cycle_interval) { | ||
| 1106 | /* accumulate one interval */ | ||
| 1107 | clock->xtime_nsec += clock->xtime_interval; | ||
| 1108 | clock->cycle_last += clock->cycle_interval; | ||
| 1109 | offset -= clock->cycle_interval; | ||
| 1110 | |||
| 1111 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { | ||
| 1112 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; | ||
| 809 | xtime.tv_sec++; | 1113 | xtime.tv_sec++; |
| 810 | second_overflow(); | 1114 | second_overflow(); |
| 811 | } | 1115 | } |
| 812 | } while (ticks); | 1116 | |
| 1117 | /* interpolator bits */ | ||
| 1118 | time_interpolator_update(clock->xtime_interval | ||
| 1119 | >> clock->shift); | ||
| 1120 | /* increment the NTP state machine */ | ||
| 1121 | update_ntp_one_tick(); | ||
| 1122 | |||
| 1123 | /* accumulate error between NTP and clock interval */ | ||
| 1124 | clock->error += current_tick_length(); | ||
| 1125 | clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | /* correct the clock when NTP error is too big */ | ||
| 1129 | clocksource_adjust(clock, offset); | ||
| 1130 | |||
| 1131 | /* store full nanoseconds into xtime */ | ||
| 1132 | xtime.tv_nsec = clock->xtime_nsec >> clock->shift; | ||
| 1133 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | ||
| 1134 | |||
| 1135 | /* check to see if there is a new clocksource to use */ | ||
| 1136 | if (change_clocksource()) { | ||
| 1137 | clock->error = 0; | ||
| 1138 | clock->xtime_nsec = 0; | ||
| 1139 | clocksource_calculate_interval(clock, tick_nsec); | ||
| 1140 | } | ||
| 813 | } | 1141 | } |
| 814 | 1142 | ||
| 815 | /* | 1143 | /* |
| @@ -915,10 +1243,8 @@ static inline void update_times(void) | |||
| 915 | unsigned long ticks; | 1243 | unsigned long ticks; |
| 916 | 1244 | ||
| 917 | ticks = jiffies - wall_jiffies; | 1245 | ticks = jiffies - wall_jiffies; |
| 918 | if (ticks) { | 1246 | wall_jiffies += ticks; |
| 919 | wall_jiffies += ticks; | 1247 | update_wall_time(); |
| 920 | update_wall_time(ticks); | ||
| 921 | } | ||
| 922 | calc_load(ticks); | 1248 | calc_load(ticks); |
| 923 | } | 1249 | } |
| 924 | 1250 | ||
diff --git a/kernel/unwind.c b/kernel/unwind.c new file mode 100644 index 000000000000..f69c804c8e62 --- /dev/null +++ b/kernel/unwind.c | |||
| @@ -0,0 +1,918 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2002-2006 Novell, Inc. | ||
| 3 | * Jan Beulich <jbeulich@novell.com> | ||
| 4 | * This code is released under version 2 of the GNU GPL. | ||
| 5 | * | ||
| 6 | * A simple API for unwinding kernel stacks. This is used for | ||
| 7 | * debugging and error reporting purposes. The kernel doesn't need | ||
| 8 | * full-blown stack unwinding with all the bells and whistles, so there | ||
| 9 | * is not much point in implementing the full Dwarf2 unwind API. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/unwind.h> | ||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/delay.h> | ||
| 15 | #include <linux/stop_machine.h> | ||
| 16 | #include <asm/sections.h> | ||
| 17 | #include <asm/uaccess.h> | ||
| 18 | #include <asm/unaligned.h> | ||
| 19 | |||
| 20 | extern char __start_unwind[], __end_unwind[]; | ||
| 21 | |||
| 22 | #define MAX_STACK_DEPTH 8 | ||
| 23 | |||
| 24 | #define EXTRA_INFO(f) { \ | ||
| 25 | BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ | ||
| 26 | % FIELD_SIZEOF(struct unwind_frame_info, f)) \ | ||
| 27 | + offsetof(struct unwind_frame_info, f) \ | ||
| 28 | / FIELD_SIZEOF(struct unwind_frame_info, f), \ | ||
| 29 | FIELD_SIZEOF(struct unwind_frame_info, f) \ | ||
| 30 | } | ||
| 31 | #define PTREGS_INFO(f) EXTRA_INFO(regs.f) | ||
| 32 | |||
| 33 | static const struct { | ||
| 34 | unsigned offs:BITS_PER_LONG / 2; | ||
| 35 | unsigned width:BITS_PER_LONG / 2; | ||
| 36 | } reg_info[] = { | ||
| 37 | UNW_REGISTER_INFO | ||
| 38 | }; | ||
| 39 | |||
| 40 | #undef PTREGS_INFO | ||
| 41 | #undef EXTRA_INFO | ||
| 42 | |||
| 43 | #ifndef REG_INVALID | ||
| 44 | #define REG_INVALID(r) (reg_info[r].width == 0) | ||
| 45 | #endif | ||
| 46 | |||
| 47 | #define DW_CFA_nop 0x00 | ||
| 48 | #define DW_CFA_set_loc 0x01 | ||
| 49 | #define DW_CFA_advance_loc1 0x02 | ||
| 50 | #define DW_CFA_advance_loc2 0x03 | ||
| 51 | #define DW_CFA_advance_loc4 0x04 | ||
| 52 | #define DW_CFA_offset_extended 0x05 | ||
| 53 | #define DW_CFA_restore_extended 0x06 | ||
| 54 | #define DW_CFA_undefined 0x07 | ||
| 55 | #define DW_CFA_same_value 0x08 | ||
| 56 | #define DW_CFA_register 0x09 | ||
| 57 | #define DW_CFA_remember_state 0x0a | ||
| 58 | #define DW_CFA_restore_state 0x0b | ||
| 59 | #define DW_CFA_def_cfa 0x0c | ||
| 60 | #define DW_CFA_def_cfa_register 0x0d | ||
| 61 | #define DW_CFA_def_cfa_offset 0x0e | ||
| 62 | #define DW_CFA_def_cfa_expression 0x0f | ||
| 63 | #define DW_CFA_expression 0x10 | ||
| 64 | #define DW_CFA_offset_extended_sf 0x11 | ||
| 65 | #define DW_CFA_def_cfa_sf 0x12 | ||
| 66 | #define DW_CFA_def_cfa_offset_sf 0x13 | ||
| 67 | #define DW_CFA_val_offset 0x14 | ||
| 68 | #define DW_CFA_val_offset_sf 0x15 | ||
| 69 | #define DW_CFA_val_expression 0x16 | ||
| 70 | #define DW_CFA_lo_user 0x1c | ||
| 71 | #define DW_CFA_GNU_window_save 0x2d | ||
| 72 | #define DW_CFA_GNU_args_size 0x2e | ||
| 73 | #define DW_CFA_GNU_negative_offset_extended 0x2f | ||
| 74 | #define DW_CFA_hi_user 0x3f | ||
| 75 | |||
| 76 | #define DW_EH_PE_FORM 0x07 | ||
| 77 | #define DW_EH_PE_native 0x00 | ||
| 78 | #define DW_EH_PE_leb128 0x01 | ||
| 79 | #define DW_EH_PE_data2 0x02 | ||
| 80 | #define DW_EH_PE_data4 0x03 | ||
| 81 | #define DW_EH_PE_data8 0x04 | ||
| 82 | #define DW_EH_PE_signed 0x08 | ||
| 83 | #define DW_EH_PE_ADJUST 0x70 | ||
| 84 | #define DW_EH_PE_abs 0x00 | ||
| 85 | #define DW_EH_PE_pcrel 0x10 | ||
| 86 | #define DW_EH_PE_textrel 0x20 | ||
| 87 | #define DW_EH_PE_datarel 0x30 | ||
| 88 | #define DW_EH_PE_funcrel 0x40 | ||
| 89 | #define DW_EH_PE_aligned 0x50 | ||
| 90 | #define DW_EH_PE_indirect 0x80 | ||
| 91 | #define DW_EH_PE_omit 0xff | ||
| 92 | |||
| 93 | typedef unsigned long uleb128_t; | ||
| 94 | typedef signed long sleb128_t; | ||
| 95 | |||
| 96 | static struct unwind_table { | ||
| 97 | struct { | ||
| 98 | unsigned long pc; | ||
| 99 | unsigned long range; | ||
| 100 | } core, init; | ||
| 101 | const void *address; | ||
| 102 | unsigned long size; | ||
| 103 | struct unwind_table *link; | ||
| 104 | const char *name; | ||
| 105 | } root_table, *last_table; | ||
| 106 | |||
| 107 | struct unwind_item { | ||
| 108 | enum item_location { | ||
| 109 | Nowhere, | ||
| 110 | Memory, | ||
| 111 | Register, | ||
| 112 | Value | ||
| 113 | } where; | ||
| 114 | uleb128_t value; | ||
| 115 | }; | ||
| 116 | |||
| 117 | struct unwind_state { | ||
| 118 | uleb128_t loc, org; | ||
| 119 | const u8 *cieStart, *cieEnd; | ||
| 120 | uleb128_t codeAlign; | ||
| 121 | sleb128_t dataAlign; | ||
| 122 | struct cfa { | ||
| 123 | uleb128_t reg, offs; | ||
| 124 | } cfa; | ||
| 125 | struct unwind_item regs[ARRAY_SIZE(reg_info)]; | ||
| 126 | unsigned stackDepth:8; | ||
| 127 | unsigned version:8; | ||
| 128 | const u8 *label; | ||
| 129 | const u8 *stack[MAX_STACK_DEPTH]; | ||
| 130 | }; | ||
| 131 | |||
| 132 | static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; | ||
| 133 | |||
| 134 | static struct unwind_table *find_table(unsigned long pc) | ||
| 135 | { | ||
| 136 | struct unwind_table *table; | ||
| 137 | |||
| 138 | for (table = &root_table; table; table = table->link) | ||
| 139 | if ((pc >= table->core.pc | ||
| 140 | && pc < table->core.pc + table->core.range) | ||
| 141 | || (pc >= table->init.pc | ||
| 142 | && pc < table->init.pc + table->init.range)) | ||
| 143 | break; | ||
| 144 | |||
| 145 | return table; | ||
| 146 | } | ||
| 147 | |||
| 148 | static void init_unwind_table(struct unwind_table *table, | ||
| 149 | const char *name, | ||
| 150 | const void *core_start, | ||
| 151 | unsigned long core_size, | ||
| 152 | const void *init_start, | ||
| 153 | unsigned long init_size, | ||
| 154 | const void *table_start, | ||
| 155 | unsigned long table_size) | ||
| 156 | { | ||
| 157 | table->core.pc = (unsigned long)core_start; | ||
| 158 | table->core.range = core_size; | ||
| 159 | table->init.pc = (unsigned long)init_start; | ||
| 160 | table->init.range = init_size; | ||
| 161 | table->address = table_start; | ||
| 162 | table->size = table_size; | ||
| 163 | table->link = NULL; | ||
| 164 | table->name = name; | ||
| 165 | } | ||
| 166 | |||
| 167 | void __init unwind_init(void) | ||
| 168 | { | ||
| 169 | init_unwind_table(&root_table, "kernel", | ||
| 170 | _text, _end - _text, | ||
| 171 | NULL, 0, | ||
| 172 | __start_unwind, __end_unwind - __start_unwind); | ||
| 173 | } | ||
| 174 | |||
| 175 | #ifdef CONFIG_MODULES | ||
| 176 | |||
| 177 | /* Must be called with module_mutex held. */ | ||
| 178 | void *unwind_add_table(struct module *module, | ||
| 179 | const void *table_start, | ||
| 180 | unsigned long table_size) | ||
| 181 | { | ||
| 182 | struct unwind_table *table; | ||
| 183 | |||
| 184 | if (table_size <= 0) | ||
| 185 | return NULL; | ||
| 186 | |||
| 187 | table = kmalloc(sizeof(*table), GFP_KERNEL); | ||
| 188 | if (!table) | ||
| 189 | return NULL; | ||
| 190 | |||
| 191 | init_unwind_table(table, module->name, | ||
| 192 | module->module_core, module->core_size, | ||
| 193 | module->module_init, module->init_size, | ||
| 194 | table_start, table_size); | ||
| 195 | |||
| 196 | if (last_table) | ||
| 197 | last_table->link = table; | ||
| 198 | else | ||
| 199 | root_table.link = table; | ||
| 200 | last_table = table; | ||
| 201 | |||
| 202 | return table; | ||
| 203 | } | ||
| 204 | |||
| 205 | struct unlink_table_info | ||
| 206 | { | ||
| 207 | struct unwind_table *table; | ||
| 208 | int init_only; | ||
| 209 | }; | ||
| 210 | |||
| 211 | static int unlink_table(void *arg) | ||
| 212 | { | ||
| 213 | struct unlink_table_info *info = arg; | ||
| 214 | struct unwind_table *table = info->table, *prev; | ||
| 215 | |||
| 216 | for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) | ||
| 217 | ; | ||
| 218 | |||
| 219 | if (prev->link) { | ||
| 220 | if (info->init_only) { | ||
| 221 | table->init.pc = 0; | ||
| 222 | table->init.range = 0; | ||
| 223 | info->table = NULL; | ||
| 224 | } else { | ||
| 225 | prev->link = table->link; | ||
| 226 | if (!prev->link) | ||
| 227 | last_table = prev; | ||
| 228 | } | ||
| 229 | } else | ||
| 230 | info->table = NULL; | ||
| 231 | |||
| 232 | return 0; | ||
| 233 | } | ||
| 234 | |||
| 235 | /* Must be called with module_mutex held. */ | ||
| 236 | void unwind_remove_table(void *handle, int init_only) | ||
| 237 | { | ||
| 238 | struct unwind_table *table = handle; | ||
| 239 | struct unlink_table_info info; | ||
| 240 | |||
| 241 | if (!table || table == &root_table) | ||
| 242 | return; | ||
| 243 | |||
| 244 | if (init_only && table == last_table) { | ||
| 245 | table->init.pc = 0; | ||
| 246 | table->init.range = 0; | ||
| 247 | return; | ||
| 248 | } | ||
| 249 | |||
| 250 | info.table = table; | ||
| 251 | info.init_only = init_only; | ||
| 252 | stop_machine_run(unlink_table, &info, NR_CPUS); | ||
| 253 | |||
| 254 | if (info.table) | ||
| 255 | kfree(table); | ||
| 256 | } | ||
| 257 | |||
| 258 | #endif /* CONFIG_MODULES */ | ||
| 259 | |||
| 260 | static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) | ||
| 261 | { | ||
| 262 | const u8 *cur = *pcur; | ||
| 263 | uleb128_t value; | ||
| 264 | unsigned shift; | ||
| 265 | |||
| 266 | for (shift = 0, value = 0; cur < end; shift += 7) { | ||
| 267 | if (shift + 7 > 8 * sizeof(value) | ||
| 268 | && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { | ||
| 269 | cur = end + 1; | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | value |= (uleb128_t)(*cur & 0x7f) << shift; | ||
| 273 | if (!(*cur++ & 0x80)) | ||
| 274 | break; | ||
| 275 | } | ||
| 276 | *pcur = cur; | ||
| 277 | |||
| 278 | return value; | ||
| 279 | } | ||
| 280 | |||
| 281 | static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) | ||
| 282 | { | ||
| 283 | const u8 *cur = *pcur; | ||
| 284 | sleb128_t value; | ||
| 285 | unsigned shift; | ||
| 286 | |||
| 287 | for (shift = 0, value = 0; cur < end; shift += 7) { | ||
| 288 | if (shift + 7 > 8 * sizeof(value) | ||
| 289 | && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { | ||
| 290 | cur = end + 1; | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | value |= (sleb128_t)(*cur & 0x7f) << shift; | ||
| 294 | if (!(*cur & 0x80)) { | ||
| 295 | value |= -(*cur++ & 0x40) << shift; | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | } | ||
| 299 | *pcur = cur; | ||
| 300 | |||
| 301 | return value; | ||
| 302 | } | ||
| 303 | |||
| 304 | static unsigned long read_pointer(const u8 **pLoc, | ||
| 305 | const void *end, | ||
| 306 | signed ptrType) | ||
| 307 | { | ||
| 308 | unsigned long value = 0; | ||
| 309 | union { | ||
| 310 | const u8 *p8; | ||
| 311 | const u16 *p16u; | ||
| 312 | const s16 *p16s; | ||
| 313 | const u32 *p32u; | ||
| 314 | const s32 *p32s; | ||
| 315 | const unsigned long *pul; | ||
| 316 | } ptr; | ||
| 317 | |||
| 318 | if (ptrType < 0 || ptrType == DW_EH_PE_omit) | ||
| 319 | return 0; | ||
| 320 | ptr.p8 = *pLoc; | ||
| 321 | switch(ptrType & DW_EH_PE_FORM) { | ||
| 322 | case DW_EH_PE_data2: | ||
| 323 | if (end < (const void *)(ptr.p16u + 1)) | ||
| 324 | return 0; | ||
| 325 | if(ptrType & DW_EH_PE_signed) | ||
| 326 | value = get_unaligned(ptr.p16s++); | ||
| 327 | else | ||
| 328 | value = get_unaligned(ptr.p16u++); | ||
| 329 | break; | ||
| 330 | case DW_EH_PE_data4: | ||
| 331 | #ifdef CONFIG_64BIT | ||
| 332 | if (end < (const void *)(ptr.p32u + 1)) | ||
| 333 | return 0; | ||
| 334 | if(ptrType & DW_EH_PE_signed) | ||
| 335 | value = get_unaligned(ptr.p32s++); | ||
| 336 | else | ||
| 337 | value = get_unaligned(ptr.p32u++); | ||
| 338 | break; | ||
| 339 | case DW_EH_PE_data8: | ||
| 340 | BUILD_BUG_ON(sizeof(u64) != sizeof(value)); | ||
| 341 | #else | ||
| 342 | BUILD_BUG_ON(sizeof(u32) != sizeof(value)); | ||
| 343 | #endif | ||
| 344 | case DW_EH_PE_native: | ||
| 345 | if (end < (const void *)(ptr.pul + 1)) | ||
| 346 | return 0; | ||
| 347 | value = get_unaligned(ptr.pul++); | ||
| 348 | break; | ||
| 349 | case DW_EH_PE_leb128: | ||
| 350 | BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); | ||
| 351 | value = ptrType & DW_EH_PE_signed | ||
| 352 | ? get_sleb128(&ptr.p8, end) | ||
| 353 | : get_uleb128(&ptr.p8, end); | ||
| 354 | if ((const void *)ptr.p8 > end) | ||
| 355 | return 0; | ||
| 356 | break; | ||
| 357 | default: | ||
| 358 | return 0; | ||
| 359 | } | ||
| 360 | switch(ptrType & DW_EH_PE_ADJUST) { | ||
| 361 | case DW_EH_PE_abs: | ||
| 362 | break; | ||
| 363 | case DW_EH_PE_pcrel: | ||
| 364 | value += (unsigned long)*pLoc; | ||
| 365 | break; | ||
| 366 | default: | ||
| 367 | return 0; | ||
| 368 | } | ||
| 369 | if ((ptrType & DW_EH_PE_indirect) | ||
| 370 | && __get_user(value, (unsigned long *)value)) | ||
| 371 | return 0; | ||
| 372 | *pLoc = ptr.p8; | ||
| 373 | |||
| 374 | return value; | ||
| 375 | } | ||
| 376 | |||
| 377 | static signed fde_pointer_type(const u32 *cie) | ||
| 378 | { | ||
| 379 | const u8 *ptr = (const u8 *)(cie + 2); | ||
| 380 | unsigned version = *ptr; | ||
| 381 | |||
| 382 | if (version != 1) | ||
| 383 | return -1; /* unsupported */ | ||
| 384 | if (*++ptr) { | ||
| 385 | const char *aug; | ||
| 386 | const u8 *end = (const u8 *)(cie + 1) + *cie; | ||
| 387 | uleb128_t len; | ||
| 388 | |||
| 389 | /* check if augmentation size is first (and thus present) */ | ||
| 390 | if (*ptr != 'z') | ||
| 391 | return -1; | ||
| 392 | /* check if augmentation string is nul-terminated */ | ||
| 393 | if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) | ||
| 394 | return -1; | ||
| 395 | ++ptr; /* skip terminator */ | ||
| 396 | get_uleb128(&ptr, end); /* skip code alignment */ | ||
| 397 | get_sleb128(&ptr, end); /* skip data alignment */ | ||
| 398 | /* skip return address column */ | ||
| 399 | version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); | ||
| 400 | len = get_uleb128(&ptr, end); /* augmentation length */ | ||
| 401 | if (ptr + len < ptr || ptr + len > end) | ||
| 402 | return -1; | ||
| 403 | end = ptr + len; | ||
| 404 | while (*++aug) { | ||
| 405 | if (ptr >= end) | ||
| 406 | return -1; | ||
| 407 | switch(*aug) { | ||
| 408 | case 'L': | ||
| 409 | ++ptr; | ||
| 410 | break; | ||
| 411 | case 'P': { | ||
| 412 | signed ptrType = *ptr++; | ||
| 413 | |||
| 414 | if (!read_pointer(&ptr, end, ptrType) || ptr > end) | ||
| 415 | return -1; | ||
| 416 | } | ||
| 417 | break; | ||
| 418 | case 'R': | ||
| 419 | return *ptr; | ||
| 420 | default: | ||
| 421 | return -1; | ||
| 422 | } | ||
| 423 | } | ||
| 424 | } | ||
| 425 | return DW_EH_PE_native|DW_EH_PE_abs; | ||
| 426 | } | ||
| 427 | |||
| 428 | static int advance_loc(unsigned long delta, struct unwind_state *state) | ||
| 429 | { | ||
| 430 | state->loc += delta * state->codeAlign; | ||
| 431 | |||
| 432 | return delta > 0; | ||
| 433 | } | ||
| 434 | |||
| 435 | static void set_rule(uleb128_t reg, | ||
| 436 | enum item_location where, | ||
| 437 | uleb128_t value, | ||
| 438 | struct unwind_state *state) | ||
| 439 | { | ||
| 440 | if (reg < ARRAY_SIZE(state->regs)) { | ||
| 441 | state->regs[reg].where = where; | ||
| 442 | state->regs[reg].value = value; | ||
| 443 | } | ||
| 444 | } | ||
| 445 | |||
| 446 | static int processCFI(const u8 *start, | ||
| 447 | const u8 *end, | ||
| 448 | unsigned long targetLoc, | ||
| 449 | signed ptrType, | ||
| 450 | struct unwind_state *state) | ||
| 451 | { | ||
| 452 | union { | ||
| 453 | const u8 *p8; | ||
| 454 | const u16 *p16; | ||
| 455 | const u32 *p32; | ||
| 456 | } ptr; | ||
| 457 | int result = 1; | ||
| 458 | |||
| 459 | if (start != state->cieStart) { | ||
| 460 | state->loc = state->org; | ||
| 461 | result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); | ||
| 462 | if (targetLoc == 0 && state->label == NULL) | ||
| 463 | return result; | ||
| 464 | } | ||
| 465 | for (ptr.p8 = start; result && ptr.p8 < end; ) { | ||
| 466 | switch(*ptr.p8 >> 6) { | ||
| 467 | uleb128_t value; | ||
| 468 | |||
| 469 | case 0: | ||
| 470 | switch(*ptr.p8++) { | ||
| 471 | case DW_CFA_nop: | ||
| 472 | break; | ||
| 473 | case DW_CFA_set_loc: | ||
| 474 | if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0) | ||
| 475 | result = 0; | ||
| 476 | break; | ||
| 477 | case DW_CFA_advance_loc1: | ||
| 478 | result = ptr.p8 < end && advance_loc(*ptr.p8++, state); | ||
| 479 | break; | ||
| 480 | case DW_CFA_advance_loc2: | ||
| 481 | result = ptr.p8 <= end + 2 | ||
| 482 | && advance_loc(*ptr.p16++, state); | ||
| 483 | break; | ||
| 484 | case DW_CFA_advance_loc4: | ||
| 485 | result = ptr.p8 <= end + 4 | ||
| 486 | && advance_loc(*ptr.p32++, state); | ||
| 487 | break; | ||
| 488 | case DW_CFA_offset_extended: | ||
| 489 | value = get_uleb128(&ptr.p8, end); | ||
| 490 | set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); | ||
| 491 | break; | ||
| 492 | case DW_CFA_val_offset: | ||
| 493 | value = get_uleb128(&ptr.p8, end); | ||
| 494 | set_rule(value, Value, get_uleb128(&ptr.p8, end), state); | ||
| 495 | break; | ||
| 496 | case DW_CFA_offset_extended_sf: | ||
| 497 | value = get_uleb128(&ptr.p8, end); | ||
| 498 | set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); | ||
| 499 | break; | ||
| 500 | case DW_CFA_val_offset_sf: | ||
| 501 | value = get_uleb128(&ptr.p8, end); | ||
| 502 | set_rule(value, Value, get_sleb128(&ptr.p8, end), state); | ||
| 503 | break; | ||
| 504 | case DW_CFA_restore_extended: | ||
| 505 | case DW_CFA_undefined: | ||
| 506 | case DW_CFA_same_value: | ||
| 507 | set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); | ||
| 508 | break; | ||
| 509 | case DW_CFA_register: | ||
| 510 | value = get_uleb128(&ptr.p8, end); | ||
| 511 | set_rule(value, | ||
| 512 | Register, | ||
| 513 | get_uleb128(&ptr.p8, end), state); | ||
| 514 | break; | ||
| 515 | case DW_CFA_remember_state: | ||
| 516 | if (ptr.p8 == state->label) { | ||
| 517 | state->label = NULL; | ||
| 518 | return 1; | ||
| 519 | } | ||
| 520 | if (state->stackDepth >= MAX_STACK_DEPTH) | ||
| 521 | return 0; | ||
| 522 | state->stack[state->stackDepth++] = ptr.p8; | ||
| 523 | break; | ||
| 524 | case DW_CFA_restore_state: | ||
| 525 | if (state->stackDepth) { | ||
| 526 | const uleb128_t loc = state->loc; | ||
| 527 | const u8 *label = state->label; | ||
| 528 | |||
| 529 | state->label = state->stack[state->stackDepth - 1]; | ||
| 530 | memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); | ||
| 531 | memset(state->regs, 0, sizeof(state->regs)); | ||
| 532 | state->stackDepth = 0; | ||
| 533 | result = processCFI(start, end, 0, ptrType, state); | ||
| 534 | state->loc = loc; | ||
| 535 | state->label = label; | ||
| 536 | } else | ||
| 537 | return 0; | ||
| 538 | break; | ||
| 539 | case DW_CFA_def_cfa: | ||
| 540 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
| 541 | /*nobreak*/ | ||
| 542 | case DW_CFA_def_cfa_offset: | ||
| 543 | state->cfa.offs = get_uleb128(&ptr.p8, end); | ||
| 544 | break; | ||
| 545 | case DW_CFA_def_cfa_sf: | ||
| 546 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
| 547 | /*nobreak*/ | ||
| 548 | case DW_CFA_def_cfa_offset_sf: | ||
| 549 | state->cfa.offs = get_sleb128(&ptr.p8, end) | ||
| 550 | * state->dataAlign; | ||
| 551 | break; | ||
| 552 | case DW_CFA_def_cfa_register: | ||
| 553 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
| 554 | break; | ||
| 555 | /*todo case DW_CFA_def_cfa_expression: */ | ||
| 556 | /*todo case DW_CFA_expression: */ | ||
| 557 | /*todo case DW_CFA_val_expression: */ | ||
| 558 | case DW_CFA_GNU_args_size: | ||
| 559 | get_uleb128(&ptr.p8, end); | ||
| 560 | break; | ||
| 561 | case DW_CFA_GNU_negative_offset_extended: | ||
| 562 | value = get_uleb128(&ptr.p8, end); | ||
| 563 | set_rule(value, | ||
| 564 | Memory, | ||
| 565 | (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); | ||
| 566 | break; | ||
| 567 | case DW_CFA_GNU_window_save: | ||
| 568 | default: | ||
| 569 | result = 0; | ||
| 570 | break; | ||
| 571 | } | ||
| 572 | break; | ||
| 573 | case 1: | ||
| 574 | result = advance_loc(*ptr.p8++ & 0x3f, state); | ||
| 575 | break; | ||
| 576 | case 2: | ||
| 577 | value = *ptr.p8++ & 0x3f; | ||
| 578 | set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); | ||
| 579 | break; | ||
| 580 | case 3: | ||
| 581 | set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); | ||
| 582 | break; | ||
| 583 | } | ||
| 584 | if (ptr.p8 > end) | ||
| 585 | result = 0; | ||
| 586 | if (result && targetLoc != 0 && targetLoc < state->loc) | ||
| 587 | return 1; | ||
| 588 | } | ||
| 589 | |||
| 590 | return result | ||
| 591 | && ptr.p8 == end | ||
| 592 | && (targetLoc == 0 | ||
| 593 | || (/*todo While in theory this should apply, gcc in practice omits | ||
| 594 | everything past the function prolog, and hence the location | ||
| 595 | never reaches the end of the function. | ||
| 596 | targetLoc < state->loc &&*/ state->label == NULL)); | ||
| 597 | } | ||
| 598 | |||
| 599 | /* Unwind to previous to frame. Returns 0 if successful, negative | ||
| 600 | * number in case of an error. */ | ||
| 601 | int unwind(struct unwind_frame_info *frame) | ||
| 602 | { | ||
| 603 | #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) | ||
| 604 | const u32 *fde = NULL, *cie = NULL; | ||
| 605 | const u8 *ptr = NULL, *end = NULL; | ||
| 606 | unsigned long startLoc = 0, endLoc = 0, cfa; | ||
| 607 | unsigned i; | ||
| 608 | signed ptrType = -1; | ||
| 609 | uleb128_t retAddrReg = 0; | ||
| 610 | struct unwind_table *table; | ||
| 611 | struct unwind_state state; | ||
| 612 | |||
| 613 | if (UNW_PC(frame) == 0) | ||
| 614 | return -EINVAL; | ||
| 615 | if ((table = find_table(UNW_PC(frame))) != NULL | ||
| 616 | && !(table->size & (sizeof(*fde) - 1))) { | ||
| 617 | unsigned long tableSize = table->size; | ||
| 618 | |||
| 619 | for (fde = table->address; | ||
| 620 | tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; | ||
| 621 | tableSize -= sizeof(*fde) + *fde, | ||
| 622 | fde += 1 + *fde / sizeof(*fde)) { | ||
| 623 | if (!*fde || (*fde & (sizeof(*fde) - 1))) | ||
| 624 | break; | ||
| 625 | if (!fde[1]) | ||
| 626 | continue; /* this is a CIE */ | ||
| 627 | if ((fde[1] & (sizeof(*fde) - 1)) | ||
| 628 | || fde[1] > (unsigned long)(fde + 1) | ||
| 629 | - (unsigned long)table->address) | ||
| 630 | continue; /* this is not a valid FDE */ | ||
| 631 | cie = fde + 1 - fde[1] / sizeof(*fde); | ||
| 632 | if (*cie <= sizeof(*cie) + 4 | ||
| 633 | || *cie >= fde[1] - sizeof(*fde) | ||
| 634 | || (*cie & (sizeof(*cie) - 1)) | ||
| 635 | || cie[1] | ||
| 636 | || (ptrType = fde_pointer_type(cie)) < 0) { | ||
| 637 | cie = NULL; /* this is not a (valid) CIE */ | ||
| 638 | continue; | ||
| 639 | } | ||
| 640 | ptr = (const u8 *)(fde + 2); | ||
| 641 | startLoc = read_pointer(&ptr, | ||
| 642 | (const u8 *)(fde + 1) + *fde, | ||
| 643 | ptrType); | ||
| 644 | endLoc = startLoc | ||
| 645 | + read_pointer(&ptr, | ||
| 646 | (const u8 *)(fde + 1) + *fde, | ||
| 647 | ptrType & DW_EH_PE_indirect | ||
| 648 | ? ptrType | ||
| 649 | : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed)); | ||
| 650 | if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc) | ||
| 651 | break; | ||
| 652 | cie = NULL; | ||
| 653 | } | ||
| 654 | } | ||
| 655 | if (cie != NULL) { | ||
| 656 | memset(&state, 0, sizeof(state)); | ||
| 657 | state.cieEnd = ptr; /* keep here temporarily */ | ||
| 658 | ptr = (const u8 *)(cie + 2); | ||
| 659 | end = (const u8 *)(cie + 1) + *cie; | ||
| 660 | if ((state.version = *ptr) != 1) | ||
| 661 | cie = NULL; /* unsupported version */ | ||
| 662 | else if (*++ptr) { | ||
| 663 | /* check if augmentation size is first (and thus present) */ | ||
| 664 | if (*ptr == 'z') { | ||
| 665 | /* check for ignorable (or already handled) | ||
| 666 | * nul-terminated augmentation string */ | ||
| 667 | while (++ptr < end && *ptr) | ||
| 668 | if (strchr("LPR", *ptr) == NULL) | ||
| 669 | break; | ||
| 670 | } | ||
| 671 | if (ptr >= end || *ptr) | ||
| 672 | cie = NULL; | ||
| 673 | } | ||
| 674 | ++ptr; | ||
| 675 | } | ||
| 676 | if (cie != NULL) { | ||
| 677 | /* get code aligment factor */ | ||
| 678 | state.codeAlign = get_uleb128(&ptr, end); | ||
| 679 | /* get data aligment factor */ | ||
| 680 | state.dataAlign = get_sleb128(&ptr, end); | ||
| 681 | if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) | ||
| 682 | cie = NULL; | ||
| 683 | else { | ||
| 684 | retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); | ||
| 685 | /* skip augmentation */ | ||
| 686 | if (((const char *)(cie + 2))[1] == 'z') | ||
| 687 | ptr += get_uleb128(&ptr, end); | ||
| 688 | if (ptr > end | ||
| 689 | || retAddrReg >= ARRAY_SIZE(reg_info) | ||
| 690 | || REG_INVALID(retAddrReg) | ||
| 691 | || reg_info[retAddrReg].width != sizeof(unsigned long)) | ||
| 692 | cie = NULL; | ||
| 693 | } | ||
| 694 | } | ||
| 695 | if (cie != NULL) { | ||
| 696 | state.cieStart = ptr; | ||
| 697 | ptr = state.cieEnd; | ||
| 698 | state.cieEnd = end; | ||
| 699 | end = (const u8 *)(fde + 1) + *fde; | ||
| 700 | /* skip augmentation */ | ||
| 701 | if (((const char *)(cie + 2))[1] == 'z') { | ||
| 702 | uleb128_t augSize = get_uleb128(&ptr, end); | ||
| 703 | |||
| 704 | if ((ptr += augSize) > end) | ||
| 705 | fde = NULL; | ||
| 706 | } | ||
| 707 | } | ||
| 708 | if (cie == NULL || fde == NULL) { | ||
| 709 | #ifdef CONFIG_FRAME_POINTER | ||
| 710 | unsigned long top, bottom; | ||
| 711 | #endif | ||
| 712 | |||
| 713 | #ifdef CONFIG_FRAME_POINTER | ||
| 714 | top = STACK_TOP(frame->task); | ||
| 715 | bottom = STACK_BOTTOM(frame->task); | ||
| 716 | # if FRAME_RETADDR_OFFSET < 0 | ||
| 717 | if (UNW_SP(frame) < top | ||
| 718 | && UNW_FP(frame) <= UNW_SP(frame) | ||
| 719 | && bottom < UNW_FP(frame) | ||
| 720 | # else | ||
| 721 | if (UNW_SP(frame) > top | ||
| 722 | && UNW_FP(frame) >= UNW_SP(frame) | ||
| 723 | && bottom > UNW_FP(frame) | ||
| 724 | # endif | ||
| 725 | && !((UNW_SP(frame) | UNW_FP(frame)) | ||
| 726 | & (sizeof(unsigned long) - 1))) { | ||
| 727 | unsigned long link; | ||
| 728 | |||
| 729 | if (!__get_user(link, | ||
| 730 | (unsigned long *)(UNW_FP(frame) | ||
| 731 | + FRAME_LINK_OFFSET)) | ||
| 732 | # if FRAME_RETADDR_OFFSET < 0 | ||
| 733 | && link > bottom && link < UNW_FP(frame) | ||
| 734 | # else | ||
| 735 | && link > UNW_FP(frame) && link < bottom | ||
| 736 | # endif | ||
| 737 | && !(link & (sizeof(link) - 1)) | ||
| 738 | && !__get_user(UNW_PC(frame), | ||
| 739 | (unsigned long *)(UNW_FP(frame) | ||
| 740 | + FRAME_RETADDR_OFFSET))) { | ||
| 741 | UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET | ||
| 742 | # if FRAME_RETADDR_OFFSET < 0 | ||
| 743 | - | ||
| 744 | # else | ||
| 745 | + | ||
| 746 | # endif | ||
| 747 | sizeof(UNW_PC(frame)); | ||
| 748 | UNW_FP(frame) = link; | ||
| 749 | return 0; | ||
| 750 | } | ||
| 751 | } | ||
| 752 | #endif | ||
| 753 | return -ENXIO; | ||
| 754 | } | ||
| 755 | state.org = startLoc; | ||
| 756 | memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); | ||
| 757 | /* process instructions */ | ||
| 758 | if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state) | ||
| 759 | || state.loc > endLoc | ||
| 760 | || state.regs[retAddrReg].where == Nowhere | ||
| 761 | || state.cfa.reg >= ARRAY_SIZE(reg_info) | ||
| 762 | || reg_info[state.cfa.reg].width != sizeof(unsigned long) | ||
| 763 | || state.cfa.offs % sizeof(unsigned long)) | ||
| 764 | return -EIO; | ||
| 765 | /* update frame */ | ||
| 766 | cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; | ||
| 767 | startLoc = min((unsigned long)UNW_SP(frame), cfa); | ||
| 768 | endLoc = max((unsigned long)UNW_SP(frame), cfa); | ||
| 769 | if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { | ||
| 770 | startLoc = min(STACK_LIMIT(cfa), cfa); | ||
| 771 | endLoc = max(STACK_LIMIT(cfa), cfa); | ||
| 772 | } | ||
| 773 | #ifndef CONFIG_64BIT | ||
| 774 | # define CASES CASE(8); CASE(16); CASE(32) | ||
| 775 | #else | ||
| 776 | # define CASES CASE(8); CASE(16); CASE(32); CASE(64) | ||
| 777 | #endif | ||
| 778 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { | ||
| 779 | if (REG_INVALID(i)) { | ||
| 780 | if (state.regs[i].where == Nowhere) | ||
| 781 | continue; | ||
| 782 | return -EIO; | ||
| 783 | } | ||
| 784 | switch(state.regs[i].where) { | ||
| 785 | default: | ||
| 786 | break; | ||
| 787 | case Register: | ||
| 788 | if (state.regs[i].value >= ARRAY_SIZE(reg_info) | ||
| 789 | || REG_INVALID(state.regs[i].value) | ||
| 790 | || reg_info[i].width > reg_info[state.regs[i].value].width) | ||
| 791 | return -EIO; | ||
| 792 | switch(reg_info[state.regs[i].value].width) { | ||
| 793 | #define CASE(n) \ | ||
| 794 | case sizeof(u##n): \ | ||
| 795 | state.regs[i].value = FRAME_REG(state.regs[i].value, \ | ||
| 796 | const u##n); \ | ||
| 797 | break | ||
| 798 | CASES; | ||
| 799 | #undef CASE | ||
| 800 | default: | ||
| 801 | return -EIO; | ||
| 802 | } | ||
| 803 | break; | ||
| 804 | } | ||
| 805 | } | ||
| 806 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { | ||
| 807 | if (REG_INVALID(i)) | ||
| 808 | continue; | ||
| 809 | switch(state.regs[i].where) { | ||
| 810 | case Nowhere: | ||
| 811 | if (reg_info[i].width != sizeof(UNW_SP(frame)) | ||
| 812 | || &FRAME_REG(i, __typeof__(UNW_SP(frame))) | ||
| 813 | != &UNW_SP(frame)) | ||
| 814 | continue; | ||
| 815 | UNW_SP(frame) = cfa; | ||
| 816 | break; | ||
| 817 | case Register: | ||
| 818 | switch(reg_info[i].width) { | ||
| 819 | #define CASE(n) case sizeof(u##n): \ | ||
| 820 | FRAME_REG(i, u##n) = state.regs[i].value; \ | ||
| 821 | break | ||
| 822 | CASES; | ||
| 823 | #undef CASE | ||
| 824 | default: | ||
| 825 | return -EIO; | ||
| 826 | } | ||
| 827 | break; | ||
| 828 | case Value: | ||
| 829 | if (reg_info[i].width != sizeof(unsigned long)) | ||
| 830 | return -EIO; | ||
| 831 | FRAME_REG(i, unsigned long) = cfa + state.regs[i].value | ||
| 832 | * state.dataAlign; | ||
| 833 | break; | ||
| 834 | case Memory: { | ||
| 835 | unsigned long addr = cfa + state.regs[i].value | ||
| 836 | * state.dataAlign; | ||
| 837 | |||
| 838 | if ((state.regs[i].value * state.dataAlign) | ||
| 839 | % sizeof(unsigned long) | ||
| 840 | || addr < startLoc | ||
| 841 | || addr + sizeof(unsigned long) < addr | ||
| 842 | || addr + sizeof(unsigned long) > endLoc) | ||
| 843 | return -EIO; | ||
| 844 | switch(reg_info[i].width) { | ||
| 845 | #define CASE(n) case sizeof(u##n): \ | ||
| 846 | __get_user(FRAME_REG(i, u##n), (u##n *)addr); \ | ||
| 847 | break | ||
| 848 | CASES; | ||
| 849 | #undef CASE | ||
| 850 | default: | ||
| 851 | return -EIO; | ||
| 852 | } | ||
| 853 | } | ||
| 854 | break; | ||
| 855 | } | ||
| 856 | } | ||
| 857 | |||
| 858 | return 0; | ||
| 859 | #undef CASES | ||
| 860 | #undef FRAME_REG | ||
| 861 | } | ||
| 862 | EXPORT_SYMBOL(unwind); | ||
| 863 | |||
| 864 | int unwind_init_frame_info(struct unwind_frame_info *info, | ||
| 865 | struct task_struct *tsk, | ||
| 866 | /*const*/ struct pt_regs *regs) | ||
| 867 | { | ||
| 868 | info->task = tsk; | ||
| 869 | arch_unw_init_frame_info(info, regs); | ||
| 870 | |||
| 871 | return 0; | ||
| 872 | } | ||
| 873 | EXPORT_SYMBOL(unwind_init_frame_info); | ||
| 874 | |||
| 875 | /* | ||
| 876 | * Prepare to unwind a blocked task. | ||
| 877 | */ | ||
| 878 | int unwind_init_blocked(struct unwind_frame_info *info, | ||
| 879 | struct task_struct *tsk) | ||
| 880 | { | ||
| 881 | info->task = tsk; | ||
| 882 | arch_unw_init_blocked(info); | ||
| 883 | |||
| 884 | return 0; | ||
| 885 | } | ||
| 886 | EXPORT_SYMBOL(unwind_init_blocked); | ||
| 887 | |||
| 888 | /* | ||
| 889 | * Prepare to unwind the currently running thread. | ||
| 890 | */ | ||
| 891 | int unwind_init_running(struct unwind_frame_info *info, | ||
| 892 | asmlinkage int (*callback)(struct unwind_frame_info *, | ||
| 893 | void *arg), | ||
| 894 | void *arg) | ||
| 895 | { | ||
| 896 | info->task = current; | ||
| 897 | |||
| 898 | return arch_unwind_init_running(info, callback, arg); | ||
| 899 | } | ||
| 900 | EXPORT_SYMBOL(unwind_init_running); | ||
| 901 | |||
| 902 | /* | ||
| 903 | * Unwind until the return pointer is in user-land (or until an error | ||
| 904 | * occurs). Returns 0 if successful, negative number in case of | ||
| 905 | * error. | ||
| 906 | */ | ||
| 907 | int unwind_to_user(struct unwind_frame_info *info) | ||
| 908 | { | ||
| 909 | while (!arch_unw_user_mode(info)) { | ||
| 910 | int err = unwind(info); | ||
| 911 | |||
| 912 | if (err < 0) | ||
| 913 | return err; | ||
| 914 | } | ||
| 915 | |||
| 916 | return 0; | ||
| 917 | } | ||
| 918 | EXPORT_SYMBOL(unwind_to_user); | ||
