diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/cpu.c | 10 | ||||
-rw-r--r-- | kernel/cpuset.c | 26 | ||||
-rw-r--r-- | kernel/exit.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 15 | ||||
-rw-r--r-- | kernel/kprobes.c | 58 | ||||
-rw-r--r-- | kernel/module.c | 27 | ||||
-rw-r--r-- | kernel/mutex-debug.c | 12 | ||||
-rw-r--r-- | kernel/mutex-debug.h | 25 | ||||
-rw-r--r-- | kernel/mutex.c | 21 | ||||
-rw-r--r-- | kernel/mutex.h | 6 | ||||
-rw-r--r-- | kernel/ptrace.c | 23 | ||||
-rw-r--r-- | kernel/sched.c | 11 | ||||
-rw-r--r-- | kernel/signal.c | 35 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | kernel/time.c | 2 | ||||
-rw-r--r-- | kernel/time/Makefile | 1 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 349 | ||||
-rw-r--r-- | kernel/time/jiffies.c | 73 | ||||
-rw-r--r-- | kernel/timer.c | 396 | ||||
-rw-r--r-- | kernel/unwind.c | 918 |
21 files changed, 1891 insertions, 137 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f6ef00f4f90f..752bd7d383af 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -10,6 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o | 11 | hrtimer.o |
12 | 12 | ||
13 | obj-y += time/ | ||
13 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 14 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
14 | obj-$(CONFIG_FUTEX) += futex.o | 15 | obj-$(CONFIG_FUTEX) += futex.o |
15 | ifeq ($(CONFIG_COMPAT),y) | 16 | ifeq ($(CONFIG_COMPAT),y) |
@@ -21,6 +22,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |||
21 | obj-$(CONFIG_UID16) += uid16.o | 22 | obj-$(CONFIG_UID16) += uid16.o |
22 | obj-$(CONFIG_MODULES) += module.o | 23 | obj-$(CONFIG_MODULES) += module.o |
23 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 24 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
25 | obj-$(CONFIG_STACK_UNWIND) += unwind.o | ||
24 | obj-$(CONFIG_PM) += power/ | 26 | obj-$(CONFIG_PM) += power/ |
25 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 27 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
26 | obj-$(CONFIG_KEXEC) += kexec.o | 28 | obj-$(CONFIG_KEXEC) += kexec.o |
diff --git a/kernel/cpu.c b/kernel/cpu.c index fe2b8d0bfe4c..03dcd981846a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -13,10 +13,10 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
15 | #include <linux/stop_machine.h> | 15 | #include <linux/stop_machine.h> |
16 | #include <asm/semaphore.h> | 16 | #include <linux/mutex.h> |
17 | 17 | ||
18 | /* This protects CPUs going up and down... */ | 18 | /* This protects CPUs going up and down... */ |
19 | static DECLARE_MUTEX(cpucontrol); | 19 | static DEFINE_MUTEX(cpucontrol); |
20 | 20 | ||
21 | static BLOCKING_NOTIFIER_HEAD(cpu_chain); | 21 | static BLOCKING_NOTIFIER_HEAD(cpu_chain); |
22 | 22 | ||
@@ -30,9 +30,9 @@ static int __lock_cpu_hotplug(int interruptible) | |||
30 | 30 | ||
31 | if (lock_cpu_hotplug_owner != current) { | 31 | if (lock_cpu_hotplug_owner != current) { |
32 | if (interruptible) | 32 | if (interruptible) |
33 | ret = down_interruptible(&cpucontrol); | 33 | ret = mutex_lock_interruptible(&cpucontrol); |
34 | else | 34 | else |
35 | down(&cpucontrol); | 35 | mutex_lock(&cpucontrol); |
36 | } | 36 | } |
37 | 37 | ||
38 | /* | 38 | /* |
@@ -56,7 +56,7 @@ void unlock_cpu_hotplug(void) | |||
56 | { | 56 | { |
57 | if (--lock_cpu_hotplug_depth == 0) { | 57 | if (--lock_cpu_hotplug_depth == 0) { |
58 | lock_cpu_hotplug_owner = NULL; | 58 | lock_cpu_hotplug_owner = NULL; |
59 | up(&cpucontrol); | 59 | mutex_unlock(&cpucontrol); |
60 | } | 60 | } |
61 | } | 61 | } |
62 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); | 62 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b602f73fb38d..1535af3a912d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2442,31 +2442,43 @@ void __cpuset_memory_pressure_bump(void) | |||
2442 | */ | 2442 | */ |
2443 | static int proc_cpuset_show(struct seq_file *m, void *v) | 2443 | static int proc_cpuset_show(struct seq_file *m, void *v) |
2444 | { | 2444 | { |
2445 | struct pid *pid; | ||
2445 | struct task_struct *tsk; | 2446 | struct task_struct *tsk; |
2446 | char *buf; | 2447 | char *buf; |
2447 | int retval = 0; | 2448 | int retval; |
2448 | 2449 | ||
2450 | retval = -ENOMEM; | ||
2449 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 2451 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
2450 | if (!buf) | 2452 | if (!buf) |
2451 | return -ENOMEM; | 2453 | goto out; |
2454 | |||
2455 | retval = -ESRCH; | ||
2456 | pid = m->private; | ||
2457 | tsk = get_pid_task(pid, PIDTYPE_PID); | ||
2458 | if (!tsk) | ||
2459 | goto out_free; | ||
2452 | 2460 | ||
2453 | tsk = m->private; | 2461 | retval = -EINVAL; |
2454 | mutex_lock(&manage_mutex); | 2462 | mutex_lock(&manage_mutex); |
2463 | |||
2455 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); | 2464 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); |
2456 | if (retval < 0) | 2465 | if (retval < 0) |
2457 | goto out; | 2466 | goto out_unlock; |
2458 | seq_puts(m, buf); | 2467 | seq_puts(m, buf); |
2459 | seq_putc(m, '\n'); | 2468 | seq_putc(m, '\n'); |
2460 | out: | 2469 | out_unlock: |
2461 | mutex_unlock(&manage_mutex); | 2470 | mutex_unlock(&manage_mutex); |
2471 | put_task_struct(tsk); | ||
2472 | out_free: | ||
2462 | kfree(buf); | 2473 | kfree(buf); |
2474 | out: | ||
2463 | return retval; | 2475 | return retval; |
2464 | } | 2476 | } |
2465 | 2477 | ||
2466 | static int cpuset_open(struct inode *inode, struct file *file) | 2478 | static int cpuset_open(struct inode *inode, struct file *file) |
2467 | { | 2479 | { |
2468 | struct task_struct *tsk = PROC_I(inode)->task; | 2480 | struct pid *pid = PROC_I(inode)->pid; |
2469 | return single_open(file, proc_cpuset_show, tsk); | 2481 | return single_open(file, proc_cpuset_show, pid); |
2470 | } | 2482 | } |
2471 | 2483 | ||
2472 | struct file_operations proc_cpuset_operations = { | 2484 | struct file_operations proc_cpuset_operations = { |
diff --git a/kernel/exit.c b/kernel/exit.c index e76bd02e930e..304ef637be6c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -137,12 +137,8 @@ void release_task(struct task_struct * p) | |||
137 | { | 137 | { |
138 | int zap_leader; | 138 | int zap_leader; |
139 | task_t *leader; | 139 | task_t *leader; |
140 | struct dentry *proc_dentry; | ||
141 | |||
142 | repeat: | 140 | repeat: |
143 | atomic_dec(&p->user->processes); | 141 | atomic_dec(&p->user->processes); |
144 | spin_lock(&p->proc_lock); | ||
145 | proc_dentry = proc_pid_unhash(p); | ||
146 | write_lock_irq(&tasklist_lock); | 142 | write_lock_irq(&tasklist_lock); |
147 | ptrace_unlink(p); | 143 | ptrace_unlink(p); |
148 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 144 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
@@ -171,8 +167,7 @@ repeat: | |||
171 | 167 | ||
172 | sched_exit(p); | 168 | sched_exit(p); |
173 | write_unlock_irq(&tasklist_lock); | 169 | write_unlock_irq(&tasklist_lock); |
174 | spin_unlock(&p->proc_lock); | 170 | proc_flush_task(p); |
175 | proc_pid_flush(proc_dentry); | ||
176 | release_thread(p); | 171 | release_thread(p); |
177 | call_rcu(&p->rcu, delayed_put_task_struct); | 172 | call_rcu(&p->rcu, delayed_put_task_struct); |
178 | 173 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index dfd10cb370c3..9b4e54ef0225 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags, | |||
993 | if (put_user(p->pid, parent_tidptr)) | 993 | if (put_user(p->pid, parent_tidptr)) |
994 | goto bad_fork_cleanup; | 994 | goto bad_fork_cleanup; |
995 | 995 | ||
996 | p->proc_dentry = NULL; | ||
997 | |||
998 | INIT_LIST_HEAD(&p->children); | 996 | INIT_LIST_HEAD(&p->children); |
999 | INIT_LIST_HEAD(&p->sibling); | 997 | INIT_LIST_HEAD(&p->sibling); |
1000 | p->vfork_done = NULL; | 998 | p->vfork_done = NULL; |
1001 | spin_lock_init(&p->alloc_lock); | 999 | spin_lock_init(&p->alloc_lock); |
1002 | spin_lock_init(&p->proc_lock); | ||
1003 | 1000 | ||
1004 | clear_tsk_thread_flag(p, TIF_SIGPENDING); | 1001 | clear_tsk_thread_flag(p, TIF_SIGPENDING); |
1005 | init_sigpending(&p->pending); | 1002 | init_sigpending(&p->pending); |
@@ -1159,18 +1156,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1159 | } | 1156 | } |
1160 | 1157 | ||
1161 | if (clone_flags & CLONE_THREAD) { | 1158 | if (clone_flags & CLONE_THREAD) { |
1162 | /* | ||
1163 | * Important: if an exit-all has been started then | ||
1164 | * do not create this new thread - the whole thread | ||
1165 | * group is supposed to exit anyway. | ||
1166 | */ | ||
1167 | if (current->signal->flags & SIGNAL_GROUP_EXIT) { | ||
1168 | spin_unlock(¤t->sighand->siglock); | ||
1169 | write_unlock_irq(&tasklist_lock); | ||
1170 | retval = -EAGAIN; | ||
1171 | goto bad_fork_cleanup_namespace; | ||
1172 | } | ||
1173 | |||
1174 | p->group_leader = current->group_leader; | 1159 | p->group_leader = current->group_leader; |
1175 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); | 1160 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1176 | 1161 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1fbf466a29aa..64aab081153b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -47,11 +47,17 @@ | |||
47 | 47 | ||
48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | 48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; |
49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
50 | static atomic_t kprobe_count; | ||
50 | 51 | ||
51 | DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 52 | DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ |
52 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ | 53 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ |
53 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 54 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
54 | 55 | ||
56 | static struct notifier_block kprobe_page_fault_nb = { | ||
57 | .notifier_call = kprobe_exceptions_notify, | ||
58 | .priority = 0x7fffffff /* we need to notified first */ | ||
59 | }; | ||
60 | |||
55 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT | 61 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT |
56 | /* | 62 | /* |
57 | * kprobe->ainsn.insn points to the copy of the instruction to be | 63 | * kprobe->ainsn.insn points to the copy of the instruction to be |
@@ -368,16 +374,15 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
368 | */ | 374 | */ |
369 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 375 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) |
370 | { | 376 | { |
371 | struct kprobe *kp; | ||
372 | |||
373 | if (p->break_handler) { | 377 | if (p->break_handler) { |
374 | list_for_each_entry_rcu(kp, &old_p->list, list) { | 378 | if (old_p->break_handler) |
375 | if (kp->break_handler) | 379 | return -EEXIST; |
376 | return -EEXIST; | ||
377 | } | ||
378 | list_add_tail_rcu(&p->list, &old_p->list); | 380 | list_add_tail_rcu(&p->list, &old_p->list); |
381 | old_p->break_handler = aggr_break_handler; | ||
379 | } else | 382 | } else |
380 | list_add_rcu(&p->list, &old_p->list); | 383 | list_add_rcu(&p->list, &old_p->list); |
384 | if (p->post_handler && !old_p->post_handler) | ||
385 | old_p->post_handler = aggr_post_handler; | ||
381 | return 0; | 386 | return 0; |
382 | } | 387 | } |
383 | 388 | ||
@@ -390,9 +395,11 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
390 | copy_kprobe(p, ap); | 395 | copy_kprobe(p, ap); |
391 | ap->addr = p->addr; | 396 | ap->addr = p->addr; |
392 | ap->pre_handler = aggr_pre_handler; | 397 | ap->pre_handler = aggr_pre_handler; |
393 | ap->post_handler = aggr_post_handler; | ||
394 | ap->fault_handler = aggr_fault_handler; | 398 | ap->fault_handler = aggr_fault_handler; |
395 | ap->break_handler = aggr_break_handler; | 399 | if (p->post_handler) |
400 | ap->post_handler = aggr_post_handler; | ||
401 | if (p->break_handler) | ||
402 | ap->break_handler = aggr_break_handler; | ||
396 | 403 | ||
397 | INIT_LIST_HEAD(&ap->list); | 404 | INIT_LIST_HEAD(&ap->list); |
398 | list_add_rcu(&p->list, &ap->list); | 405 | list_add_rcu(&p->list, &ap->list); |
@@ -464,6 +471,8 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
464 | old_p = get_kprobe(p->addr); | 471 | old_p = get_kprobe(p->addr); |
465 | if (old_p) { | 472 | if (old_p) { |
466 | ret = register_aggr_kprobe(old_p, p); | 473 | ret = register_aggr_kprobe(old_p, p); |
474 | if (!ret) | ||
475 | atomic_inc(&kprobe_count); | ||
467 | goto out; | 476 | goto out; |
468 | } | 477 | } |
469 | 478 | ||
@@ -474,6 +483,10 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
474 | hlist_add_head_rcu(&p->hlist, | 483 | hlist_add_head_rcu(&p->hlist, |
475 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 484 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
476 | 485 | ||
486 | if (atomic_add_return(1, &kprobe_count) == \ | ||
487 | (ARCH_INACTIVE_KPROBE_COUNT + 1)) | ||
488 | register_page_fault_notifier(&kprobe_page_fault_nb); | ||
489 | |||
477 | arch_arm_kprobe(p); | 490 | arch_arm_kprobe(p); |
478 | 491 | ||
479 | out: | 492 | out: |
@@ -536,14 +549,40 @@ valid_p: | |||
536 | kfree(old_p); | 549 | kfree(old_p); |
537 | } | 550 | } |
538 | arch_remove_kprobe(p); | 551 | arch_remove_kprobe(p); |
552 | } else { | ||
553 | mutex_lock(&kprobe_mutex); | ||
554 | if (p->break_handler) | ||
555 | old_p->break_handler = NULL; | ||
556 | if (p->post_handler){ | ||
557 | list_for_each_entry_rcu(list_p, &old_p->list, list){ | ||
558 | if (list_p->post_handler){ | ||
559 | cleanup_p = 2; | ||
560 | break; | ||
561 | } | ||
562 | } | ||
563 | if (cleanup_p == 0) | ||
564 | old_p->post_handler = NULL; | ||
565 | } | ||
566 | mutex_unlock(&kprobe_mutex); | ||
539 | } | 567 | } |
568 | |||
569 | /* Call unregister_page_fault_notifier() | ||
570 | * if no probes are active | ||
571 | */ | ||
572 | mutex_lock(&kprobe_mutex); | ||
573 | if (atomic_add_return(-1, &kprobe_count) == \ | ||
574 | ARCH_INACTIVE_KPROBE_COUNT) | ||
575 | unregister_page_fault_notifier(&kprobe_page_fault_nb); | ||
576 | mutex_unlock(&kprobe_mutex); | ||
577 | return; | ||
540 | } | 578 | } |
541 | 579 | ||
542 | static struct notifier_block kprobe_exceptions_nb = { | 580 | static struct notifier_block kprobe_exceptions_nb = { |
543 | .notifier_call = kprobe_exceptions_notify, | 581 | .notifier_call = kprobe_exceptions_notify, |
544 | .priority = 0x7fffffff /* we need to notified first */ | 582 | .priority = 0x7fffffff /* we need to be notified first */ |
545 | }; | 583 | }; |
546 | 584 | ||
585 | |||
547 | int __kprobes register_jprobe(struct jprobe *jp) | 586 | int __kprobes register_jprobe(struct jprobe *jp) |
548 | { | 587 | { |
549 | /* Todo: Verify probepoint is a function entry point */ | 588 | /* Todo: Verify probepoint is a function entry point */ |
@@ -652,6 +691,7 @@ static int __init init_kprobes(void) | |||
652 | INIT_HLIST_HEAD(&kprobe_table[i]); | 691 | INIT_HLIST_HEAD(&kprobe_table[i]); |
653 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); | 692 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); |
654 | } | 693 | } |
694 | atomic_set(&kprobe_count, 0); | ||
655 | 695 | ||
656 | err = arch_init_kprobes(); | 696 | err = arch_init_kprobes(); |
657 | if (!err) | 697 | if (!err) |
diff --git a/kernel/module.c b/kernel/module.c index d75275de1c28..10e5b872adf6 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -40,9 +40,11 @@ | |||
40 | #include <linux/string.h> | 40 | #include <linux/string.h> |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/mutex.h> | 42 | #include <linux/mutex.h> |
43 | #include <linux/unwind.h> | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | #include <asm/semaphore.h> | 45 | #include <asm/semaphore.h> |
45 | #include <asm/cacheflush.h> | 46 | #include <asm/cacheflush.h> |
47 | #include <linux/license.h> | ||
46 | 48 | ||
47 | #if 0 | 49 | #if 0 |
48 | #define DEBUGP printk | 50 | #define DEBUGP printk |
@@ -1051,6 +1053,8 @@ static void free_module(struct module *mod) | |||
1051 | remove_sect_attrs(mod); | 1053 | remove_sect_attrs(mod); |
1052 | mod_kobject_remove(mod); | 1054 | mod_kobject_remove(mod); |
1053 | 1055 | ||
1056 | unwind_remove_table(mod->unwind_info, 0); | ||
1057 | |||
1054 | /* Arch-specific cleanup. */ | 1058 | /* Arch-specific cleanup. */ |
1055 | module_arch_cleanup(mod); | 1059 | module_arch_cleanup(mod); |
1056 | 1060 | ||
@@ -1248,16 +1252,6 @@ static void layout_sections(struct module *mod, | |||
1248 | } | 1252 | } |
1249 | } | 1253 | } |
1250 | 1254 | ||
1251 | static inline int license_is_gpl_compatible(const char *license) | ||
1252 | { | ||
1253 | return (strcmp(license, "GPL") == 0 | ||
1254 | || strcmp(license, "GPL v2") == 0 | ||
1255 | || strcmp(license, "GPL and additional rights") == 0 | ||
1256 | || strcmp(license, "Dual BSD/GPL") == 0 | ||
1257 | || strcmp(license, "Dual MIT/GPL") == 0 | ||
1258 | || strcmp(license, "Dual MPL/GPL") == 0); | ||
1259 | } | ||
1260 | |||
1261 | static void set_license(struct module *mod, const char *license) | 1255 | static void set_license(struct module *mod, const char *license) |
1262 | { | 1256 | { |
1263 | if (!license) | 1257 | if (!license) |
@@ -1412,7 +1406,7 @@ static struct module *load_module(void __user *umod, | |||
1412 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, | 1406 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, |
1413 | exportindex, modindex, obsparmindex, infoindex, gplindex, | 1407 | exportindex, modindex, obsparmindex, infoindex, gplindex, |
1414 | crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, | 1408 | crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, |
1415 | gplfuturecrcindex; | 1409 | gplfuturecrcindex, unwindex = 0; |
1416 | struct module *mod; | 1410 | struct module *mod; |
1417 | long err = 0; | 1411 | long err = 0; |
1418 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1412 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
@@ -1502,6 +1496,9 @@ static struct module *load_module(void __user *umod, | |||
1502 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1496 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
1503 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1497 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
1504 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1498 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
1499 | #ifdef ARCH_UNWIND_SECTION_NAME | ||
1500 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); | ||
1501 | #endif | ||
1505 | 1502 | ||
1506 | /* Don't keep modinfo section */ | 1503 | /* Don't keep modinfo section */ |
1507 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1504 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
@@ -1510,6 +1507,8 @@ static struct module *load_module(void __user *umod, | |||
1510 | sechdrs[symindex].sh_flags |= SHF_ALLOC; | 1507 | sechdrs[symindex].sh_flags |= SHF_ALLOC; |
1511 | sechdrs[strindex].sh_flags |= SHF_ALLOC; | 1508 | sechdrs[strindex].sh_flags |= SHF_ALLOC; |
1512 | #endif | 1509 | #endif |
1510 | if (unwindex) | ||
1511 | sechdrs[unwindex].sh_flags |= SHF_ALLOC; | ||
1513 | 1512 | ||
1514 | /* Check module struct version now, before we try to use module. */ | 1513 | /* Check module struct version now, before we try to use module. */ |
1515 | if (!check_modstruct_version(sechdrs, versindex, mod)) { | 1514 | if (!check_modstruct_version(sechdrs, versindex, mod)) { |
@@ -1738,6 +1737,11 @@ static struct module *load_module(void __user *umod, | |||
1738 | goto arch_cleanup; | 1737 | goto arch_cleanup; |
1739 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 1738 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
1740 | 1739 | ||
1740 | /* Size of section 0 is 0, so this works well if no unwind info. */ | ||
1741 | mod->unwind_info = unwind_add_table(mod, | ||
1742 | (void *)sechdrs[unwindex].sh_addr, | ||
1743 | sechdrs[unwindex].sh_size); | ||
1744 | |||
1741 | /* Get rid of temporary copy */ | 1745 | /* Get rid of temporary copy */ |
1742 | vfree(hdr); | 1746 | vfree(hdr); |
1743 | 1747 | ||
@@ -1836,6 +1840,7 @@ sys_init_module(void __user *umod, | |||
1836 | mod->state = MODULE_STATE_LIVE; | 1840 | mod->state = MODULE_STATE_LIVE; |
1837 | /* Drop initial reference. */ | 1841 | /* Drop initial reference. */ |
1838 | module_put(mod); | 1842 | module_put(mod); |
1843 | unwind_remove_table(mod->unwind_info, 1); | ||
1839 | module_free(mod, mod->module_init); | 1844 | module_free(mod, mod->module_init); |
1840 | mod->module_init = NULL; | 1845 | mod->module_init = NULL; |
1841 | mod->init_size = 0; | 1846 | mod->init_size = 0; |
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index f4913c376950..036b6285b15c 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -153,13 +153,13 @@ next: | |||
153 | continue; | 153 | continue; |
154 | count++; | 154 | count++; |
155 | cursor = curr->next; | 155 | cursor = curr->next; |
156 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 156 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
157 | 157 | ||
158 | printk("\n#%03d: ", count); | 158 | printk("\n#%03d: ", count); |
159 | printk_lock(lock, filter ? 0 : 1); | 159 | printk_lock(lock, filter ? 0 : 1); |
160 | goto next; | 160 | goto next; |
161 | } | 161 | } |
162 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 162 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
163 | printk("\n"); | 163 | printk("\n"); |
164 | } | 164 | } |
165 | 165 | ||
@@ -316,7 +316,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task) | |||
316 | continue; | 316 | continue; |
317 | list_del_init(curr); | 317 | list_del_init(curr); |
318 | DEBUG_OFF(); | 318 | DEBUG_OFF(); |
319 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 319 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
320 | 320 | ||
321 | printk("BUG: %s/%d, lock held at task exit time!\n", | 321 | printk("BUG: %s/%d, lock held at task exit time!\n", |
322 | task->comm, task->pid); | 322 | task->comm, task->pid); |
@@ -325,7 +325,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task) | |||
325 | printk("exiting task is not even the owner??\n"); | 325 | printk("exiting task is not even the owner??\n"); |
326 | return; | 326 | return; |
327 | } | 327 | } |
328 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 328 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
329 | } | 329 | } |
330 | 330 | ||
331 | /* | 331 | /* |
@@ -352,7 +352,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) | |||
352 | continue; | 352 | continue; |
353 | list_del_init(curr); | 353 | list_del_init(curr); |
354 | DEBUG_OFF(); | 354 | DEBUG_OFF(); |
355 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 355 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
356 | 356 | ||
357 | printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", | 357 | printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", |
358 | current->comm, current->pid, lock, from, to); | 358 | current->comm, current->pid, lock, from, to); |
@@ -362,7 +362,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) | |||
362 | printk("freeing task is not even the owner??\n"); | 362 | printk("freeing task is not even the owner??\n"); |
363 | return; | 363 | return; |
364 | } | 364 | } |
365 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 365 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
366 | } | 366 | } |
367 | 367 | ||
368 | /* | 368 | /* |
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index fd384050acb1..a5196c36a5fd 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h | |||
@@ -46,21 +46,6 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | |||
46 | extern void debug_mutex_unlock(struct mutex *lock); | 46 | extern void debug_mutex_unlock(struct mutex *lock); |
47 | extern void debug_mutex_init(struct mutex *lock, const char *name); | 47 | extern void debug_mutex_init(struct mutex *lock, const char *name); |
48 | 48 | ||
49 | #define debug_spin_lock(lock) \ | ||
50 | do { \ | ||
51 | local_irq_disable(); \ | ||
52 | if (debug_mutex_on) \ | ||
53 | spin_lock(lock); \ | ||
54 | } while (0) | ||
55 | |||
56 | #define debug_spin_unlock(lock) \ | ||
57 | do { \ | ||
58 | if (debug_mutex_on) \ | ||
59 | spin_unlock(lock); \ | ||
60 | local_irq_enable(); \ | ||
61 | preempt_check_resched(); \ | ||
62 | } while (0) | ||
63 | |||
64 | #define debug_spin_lock_save(lock, flags) \ | 49 | #define debug_spin_lock_save(lock, flags) \ |
65 | do { \ | 50 | do { \ |
66 | local_irq_save(flags); \ | 51 | local_irq_save(flags); \ |
@@ -68,7 +53,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name); | |||
68 | spin_lock(lock); \ | 53 | spin_lock(lock); \ |
69 | } while (0) | 54 | } while (0) |
70 | 55 | ||
71 | #define debug_spin_lock_restore(lock, flags) \ | 56 | #define debug_spin_unlock_restore(lock, flags) \ |
72 | do { \ | 57 | do { \ |
73 | if (debug_mutex_on) \ | 58 | if (debug_mutex_on) \ |
74 | spin_unlock(lock); \ | 59 | spin_unlock(lock); \ |
@@ -76,20 +61,20 @@ extern void debug_mutex_init(struct mutex *lock, const char *name); | |||
76 | preempt_check_resched(); \ | 61 | preempt_check_resched(); \ |
77 | } while (0) | 62 | } while (0) |
78 | 63 | ||
79 | #define spin_lock_mutex(lock) \ | 64 | #define spin_lock_mutex(lock, flags) \ |
80 | do { \ | 65 | do { \ |
81 | struct mutex *l = container_of(lock, struct mutex, wait_lock); \ | 66 | struct mutex *l = container_of(lock, struct mutex, wait_lock); \ |
82 | \ | 67 | \ |
83 | DEBUG_WARN_ON(in_interrupt()); \ | 68 | DEBUG_WARN_ON(in_interrupt()); \ |
84 | debug_spin_lock(&debug_mutex_lock); \ | 69 | debug_spin_lock_save(&debug_mutex_lock, flags); \ |
85 | spin_lock(lock); \ | 70 | spin_lock(lock); \ |
86 | DEBUG_WARN_ON(l->magic != l); \ | 71 | DEBUG_WARN_ON(l->magic != l); \ |
87 | } while (0) | 72 | } while (0) |
88 | 73 | ||
89 | #define spin_unlock_mutex(lock) \ | 74 | #define spin_unlock_mutex(lock, flags) \ |
90 | do { \ | 75 | do { \ |
91 | spin_unlock(lock); \ | 76 | spin_unlock(lock); \ |
92 | debug_spin_unlock(&debug_mutex_lock); \ | 77 | debug_spin_unlock_restore(&debug_mutex_lock, flags); \ |
93 | } while (0) | 78 | } while (0) |
94 | 79 | ||
95 | #define DEBUG_OFF() \ | 80 | #define DEBUG_OFF() \ |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 5449b210d9ed..7043db21bbce 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -125,10 +125,11 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
125 | struct task_struct *task = current; | 125 | struct task_struct *task = current; |
126 | struct mutex_waiter waiter; | 126 | struct mutex_waiter waiter; |
127 | unsigned int old_val; | 127 | unsigned int old_val; |
128 | unsigned long flags; | ||
128 | 129 | ||
129 | debug_mutex_init_waiter(&waiter); | 130 | debug_mutex_init_waiter(&waiter); |
130 | 131 | ||
131 | spin_lock_mutex(&lock->wait_lock); | 132 | spin_lock_mutex(&lock->wait_lock, flags); |
132 | 133 | ||
133 | debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); | 134 | debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); |
134 | 135 | ||
@@ -157,7 +158,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
157 | if (unlikely(state == TASK_INTERRUPTIBLE && | 158 | if (unlikely(state == TASK_INTERRUPTIBLE && |
158 | signal_pending(task))) { | 159 | signal_pending(task))) { |
159 | mutex_remove_waiter(lock, &waiter, task->thread_info); | 160 | mutex_remove_waiter(lock, &waiter, task->thread_info); |
160 | spin_unlock_mutex(&lock->wait_lock); | 161 | spin_unlock_mutex(&lock->wait_lock, flags); |
161 | 162 | ||
162 | debug_mutex_free_waiter(&waiter); | 163 | debug_mutex_free_waiter(&waiter); |
163 | return -EINTR; | 164 | return -EINTR; |
@@ -165,9 +166,9 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
165 | __set_task_state(task, state); | 166 | __set_task_state(task, state); |
166 | 167 | ||
167 | /* didnt get the lock, go to sleep: */ | 168 | /* didnt get the lock, go to sleep: */ |
168 | spin_unlock_mutex(&lock->wait_lock); | 169 | spin_unlock_mutex(&lock->wait_lock, flags); |
169 | schedule(); | 170 | schedule(); |
170 | spin_lock_mutex(&lock->wait_lock); | 171 | spin_lock_mutex(&lock->wait_lock, flags); |
171 | } | 172 | } |
172 | 173 | ||
173 | /* got the lock - rejoice! */ | 174 | /* got the lock - rejoice! */ |
@@ -178,7 +179,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
178 | if (likely(list_empty(&lock->wait_list))) | 179 | if (likely(list_empty(&lock->wait_list))) |
179 | atomic_set(&lock->count, 0); | 180 | atomic_set(&lock->count, 0); |
180 | 181 | ||
181 | spin_unlock_mutex(&lock->wait_lock); | 182 | spin_unlock_mutex(&lock->wait_lock, flags); |
182 | 183 | ||
183 | debug_mutex_free_waiter(&waiter); | 184 | debug_mutex_free_waiter(&waiter); |
184 | 185 | ||
@@ -203,10 +204,11 @@ static fastcall noinline void | |||
203 | __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) | 204 | __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) |
204 | { | 205 | { |
205 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 206 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
207 | unsigned long flags; | ||
206 | 208 | ||
207 | DEBUG_WARN_ON(lock->owner != current_thread_info()); | 209 | DEBUG_WARN_ON(lock->owner != current_thread_info()); |
208 | 210 | ||
209 | spin_lock_mutex(&lock->wait_lock); | 211 | spin_lock_mutex(&lock->wait_lock, flags); |
210 | 212 | ||
211 | /* | 213 | /* |
212 | * some architectures leave the lock unlocked in the fastpath failure | 214 | * some architectures leave the lock unlocked in the fastpath failure |
@@ -231,7 +233,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) | |||
231 | 233 | ||
232 | debug_mutex_clear_owner(lock); | 234 | debug_mutex_clear_owner(lock); |
233 | 235 | ||
234 | spin_unlock_mutex(&lock->wait_lock); | 236 | spin_unlock_mutex(&lock->wait_lock, flags); |
235 | } | 237 | } |
236 | 238 | ||
237 | /* | 239 | /* |
@@ -276,9 +278,10 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) | |||
276 | static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | 278 | static inline int __mutex_trylock_slowpath(atomic_t *lock_count) |
277 | { | 279 | { |
278 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 280 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
281 | unsigned long flags; | ||
279 | int prev; | 282 | int prev; |
280 | 283 | ||
281 | spin_lock_mutex(&lock->wait_lock); | 284 | spin_lock_mutex(&lock->wait_lock, flags); |
282 | 285 | ||
283 | prev = atomic_xchg(&lock->count, -1); | 286 | prev = atomic_xchg(&lock->count, -1); |
284 | if (likely(prev == 1)) | 287 | if (likely(prev == 1)) |
@@ -287,7 +290,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | |||
287 | if (likely(list_empty(&lock->wait_list))) | 290 | if (likely(list_empty(&lock->wait_list))) |
288 | atomic_set(&lock->count, 0); | 291 | atomic_set(&lock->count, 0); |
289 | 292 | ||
290 | spin_unlock_mutex(&lock->wait_lock); | 293 | spin_unlock_mutex(&lock->wait_lock, flags); |
291 | 294 | ||
292 | return prev == 1; | 295 | return prev == 1; |
293 | } | 296 | } |
diff --git a/kernel/mutex.h b/kernel/mutex.h index 00fe84e7b672..069189947257 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h | |||
@@ -9,8 +9,10 @@ | |||
9 | * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: | 9 | * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define spin_lock_mutex(lock) spin_lock(lock) | 12 | #define spin_lock_mutex(lock, flags) \ |
13 | #define spin_unlock_mutex(lock) spin_unlock(lock) | 13 | do { spin_lock(lock); (void)(flags); } while (0) |
14 | #define spin_unlock_mutex(lock, flags) \ | ||
15 | do { spin_unlock(lock); (void)(flags); } while (0) | ||
14 | #define mutex_remove_waiter(lock, waiter, ti) \ | 16 | #define mutex_remove_waiter(lock, waiter, ti) \ |
15 | __list_del((waiter)->list.prev, (waiter)->list.next) | 17 | __list_del((waiter)->list.prev, (waiter)->list.next) |
16 | 18 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 921c22ad16e4..335c5b932e14 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -120,8 +120,18 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
120 | 120 | ||
121 | static int may_attach(struct task_struct *task) | 121 | static int may_attach(struct task_struct *task) |
122 | { | 122 | { |
123 | if (!task->mm) | 123 | /* May we inspect the given task? |
124 | return -EPERM; | 124 | * This check is used both for attaching with ptrace |
125 | * and for allowing access to sensitive information in /proc. | ||
126 | * | ||
127 | * ptrace_attach denies several cases that /proc allows | ||
128 | * because setting up the necessary parent/child relationship | ||
129 | * or halting the specified task is impossible. | ||
130 | */ | ||
131 | int dumpable = 0; | ||
132 | /* Don't let security modules deny introspection */ | ||
133 | if (task == current) | ||
134 | return 0; | ||
125 | if (((current->uid != task->euid) || | 135 | if (((current->uid != task->euid) || |
126 | (current->uid != task->suid) || | 136 | (current->uid != task->suid) || |
127 | (current->uid != task->uid) || | 137 | (current->uid != task->uid) || |
@@ -130,7 +140,9 @@ static int may_attach(struct task_struct *task) | |||
130 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | 140 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) |
131 | return -EPERM; | 141 | return -EPERM; |
132 | smp_rmb(); | 142 | smp_rmb(); |
133 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | 143 | if (task->mm) |
144 | dumpable = task->mm->dumpable; | ||
145 | if (!dumpable && !capable(CAP_SYS_PTRACE)) | ||
134 | return -EPERM; | 146 | return -EPERM; |
135 | 147 | ||
136 | return security_ptrace(current, task); | 148 | return security_ptrace(current, task); |
@@ -176,6 +188,8 @@ repeat: | |||
176 | goto repeat; | 188 | goto repeat; |
177 | } | 189 | } |
178 | 190 | ||
191 | if (!task->mm) | ||
192 | goto bad; | ||
179 | /* the same process cannot be attached many times */ | 193 | /* the same process cannot be attached many times */ |
180 | if (task->ptrace & PT_PTRACED) | 194 | if (task->ptrace & PT_PTRACED) |
181 | goto bad; | 195 | goto bad; |
@@ -200,7 +214,7 @@ out: | |||
200 | return retval; | 214 | return retval; |
201 | } | 215 | } |
202 | 216 | ||
203 | void __ptrace_detach(struct task_struct *child, unsigned int data) | 217 | static inline void __ptrace_detach(struct task_struct *child, unsigned int data) |
204 | { | 218 | { |
205 | child->exit_code = data; | 219 | child->exit_code = data; |
206 | /* .. re-parent .. */ | 220 | /* .. re-parent .. */ |
@@ -219,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
219 | ptrace_disable(child); | 233 | ptrace_disable(child); |
220 | 234 | ||
221 | write_lock_irq(&tasklist_lock); | 235 | write_lock_irq(&tasklist_lock); |
236 | /* protect against de_thread()->release_task() */ | ||
222 | if (child->ptrace) | 237 | if (child->ptrace) |
223 | __ptrace_detach(child, data); | 238 | __ptrace_detach(child, data); |
224 | write_unlock_irq(&tasklist_lock); | 239 | write_unlock_irq(&tasklist_lock); |
diff --git a/kernel/sched.c b/kernel/sched.c index f06d059edef5..a856040c200a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -818,6 +818,11 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) | |||
818 | * the target CPU. | 818 | * the target CPU. |
819 | */ | 819 | */ |
820 | #ifdef CONFIG_SMP | 820 | #ifdef CONFIG_SMP |
821 | |||
822 | #ifndef tsk_is_polling | ||
823 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | ||
824 | #endif | ||
825 | |||
821 | static void resched_task(task_t *p) | 826 | static void resched_task(task_t *p) |
822 | { | 827 | { |
823 | int cpu; | 828 | int cpu; |
@@ -833,9 +838,9 @@ static void resched_task(task_t *p) | |||
833 | if (cpu == smp_processor_id()) | 838 | if (cpu == smp_processor_id()) |
834 | return; | 839 | return; |
835 | 840 | ||
836 | /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ | 841 | /* NEED_RESCHED must be visible before we test polling */ |
837 | smp_mb(); | 842 | smp_mb(); |
838 | if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) | 843 | if (!tsk_is_polling(p)) |
839 | smp_send_reschedule(cpu); | 844 | smp_send_reschedule(cpu); |
840 | } | 845 | } |
841 | #else | 846 | #else |
@@ -4247,7 +4252,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
4247 | if (retval) | 4252 | if (retval) |
4248 | goto out_unlock; | 4253 | goto out_unlock; |
4249 | 4254 | ||
4250 | jiffies_to_timespec(p->policy & SCHED_FIFO ? | 4255 | jiffies_to_timespec(p->policy == SCHED_FIFO ? |
4251 | 0 : task_timeslice(p), &t); | 4256 | 0 : task_timeslice(p), &t); |
4252 | read_unlock(&tasklist_lock); | 4257 | read_unlock(&tasklist_lock); |
4253 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 4258 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
diff --git a/kernel/signal.c b/kernel/signal.c index 1b3c921737e2..52adf53929f6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1531,6 +1531,35 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | |||
1531 | spin_unlock_irqrestore(&sighand->siglock, flags); | 1531 | spin_unlock_irqrestore(&sighand->siglock, flags); |
1532 | } | 1532 | } |
1533 | 1533 | ||
1534 | static inline int may_ptrace_stop(void) | ||
1535 | { | ||
1536 | if (!likely(current->ptrace & PT_PTRACED)) | ||
1537 | return 0; | ||
1538 | |||
1539 | if (unlikely(current->parent == current->real_parent && | ||
1540 | (current->ptrace & PT_ATTACHED))) | ||
1541 | return 0; | ||
1542 | |||
1543 | if (unlikely(current->signal == current->parent->signal) && | ||
1544 | unlikely(current->signal->flags & SIGNAL_GROUP_EXIT)) | ||
1545 | return 0; | ||
1546 | |||
1547 | /* | ||
1548 | * Are we in the middle of do_coredump? | ||
1549 | * If so and our tracer is also part of the coredump stopping | ||
1550 | * is a deadlock situation, and pointless because our tracer | ||
1551 | * is dead so don't allow us to stop. | ||
1552 | * If SIGKILL was already sent before the caller unlocked | ||
1553 | * ->siglock we must see ->core_waiters != 0. Otherwise it | ||
1554 | * is safe to enter schedule(). | ||
1555 | */ | ||
1556 | if (unlikely(current->mm->core_waiters) && | ||
1557 | unlikely(current->mm == current->parent->mm)) | ||
1558 | return 0; | ||
1559 | |||
1560 | return 1; | ||
1561 | } | ||
1562 | |||
1534 | /* | 1563 | /* |
1535 | * This must be called with current->sighand->siglock held. | 1564 | * This must be called with current->sighand->siglock held. |
1536 | * | 1565 | * |
@@ -1559,11 +1588,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
1559 | spin_unlock_irq(¤t->sighand->siglock); | 1588 | spin_unlock_irq(¤t->sighand->siglock); |
1560 | try_to_freeze(); | 1589 | try_to_freeze(); |
1561 | read_lock(&tasklist_lock); | 1590 | read_lock(&tasklist_lock); |
1562 | if (likely(current->ptrace & PT_PTRACED) && | 1591 | if (may_ptrace_stop()) { |
1563 | likely(current->parent != current->real_parent || | ||
1564 | !(current->ptrace & PT_ATTACHED)) && | ||
1565 | (likely(current->parent->signal != current->signal) || | ||
1566 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { | ||
1567 | do_notify_parent_cldstop(current, CLD_TRAPPED); | 1592 | do_notify_parent_cldstop(current, CLD_TRAPPED); |
1568 | read_unlock(&tasklist_lock); | 1593 | read_unlock(&tasklist_lock); |
1569 | schedule(); | 1594 | schedule(); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2c0e65819448..f1a4eb1a655e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -73,6 +73,7 @@ extern int printk_ratelimit_burst; | |||
73 | extern int pid_max_min, pid_max_max; | 73 | extern int pid_max_min, pid_max_max; |
74 | extern int sysctl_drop_caches; | 74 | extern int sysctl_drop_caches; |
75 | extern int percpu_pagelist_fraction; | 75 | extern int percpu_pagelist_fraction; |
76 | extern int compat_log; | ||
76 | 77 | ||
77 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | 78 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
78 | int unknown_nmi_panic; | 79 | int unknown_nmi_panic; |
@@ -677,6 +678,16 @@ static ctl_table kern_table[] = { | |||
677 | .proc_handler = &proc_dointvec, | 678 | .proc_handler = &proc_dointvec, |
678 | }, | 679 | }, |
679 | #endif | 680 | #endif |
681 | #ifdef CONFIG_COMPAT | ||
682 | { | ||
683 | .ctl_name = KERN_COMPAT_LOG, | ||
684 | .procname = "compat-log", | ||
685 | .data = &compat_log, | ||
686 | .maxlen = sizeof (int), | ||
687 | .mode = 0644, | ||
688 | .proc_handler = &proc_dointvec, | ||
689 | }, | ||
690 | #endif | ||
680 | { .ctl_name = 0 } | 691 | { .ctl_name = 0 } |
681 | }; | 692 | }; |
682 | 693 | ||
diff --git a/kernel/time.c b/kernel/time.c index b00ddc71cedb..5bd489747643 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -523,6 +523,7 @@ EXPORT_SYMBOL(do_gettimeofday); | |||
523 | 523 | ||
524 | 524 | ||
525 | #else | 525 | #else |
526 | #ifndef CONFIG_GENERIC_TIME | ||
526 | /* | 527 | /* |
527 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval | 528 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval |
528 | * and therefore only yields usec accuracy | 529 | * and therefore only yields usec accuracy |
@@ -537,6 +538,7 @@ void getnstimeofday(struct timespec *tv) | |||
537 | } | 538 | } |
538 | EXPORT_SYMBOL_GPL(getnstimeofday); | 539 | EXPORT_SYMBOL_GPL(getnstimeofday); |
539 | #endif | 540 | #endif |
541 | #endif | ||
540 | 542 | ||
541 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. | 543 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. |
542 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 | 544 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile new file mode 100644 index 000000000000..e1dfd8e86cce --- /dev/null +++ b/kernel/time/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y += clocksource.o jiffies.o | |||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c new file mode 100644 index 000000000000..74eca5939bd9 --- /dev/null +++ b/kernel/time/clocksource.c | |||
@@ -0,0 +1,349 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/clocksource.c | ||
3 | * | ||
4 | * This file contains the functions which manage clocksource drivers. | ||
5 | * | ||
6 | * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | * | ||
22 | * TODO WishList: | ||
23 | * o Allow clocksource drivers to be unregistered | ||
24 | * o get rid of clocksource_jiffies extern | ||
25 | */ | ||
26 | |||
27 | #include <linux/clocksource.h> | ||
28 | #include <linux/sysdev.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | |||
32 | /* XXX - Would like a better way for initializing curr_clocksource */ | ||
33 | extern struct clocksource clocksource_jiffies; | ||
34 | |||
35 | /*[Clocksource internal variables]--------- | ||
36 | * curr_clocksource: | ||
37 | * currently selected clocksource. Initialized to clocksource_jiffies. | ||
38 | * next_clocksource: | ||
39 | * pending next selected clocksource. | ||
40 | * clocksource_list: | ||
41 | * linked list with the registered clocksources | ||
42 | * clocksource_lock: | ||
43 | * protects manipulations to curr_clocksource and next_clocksource | ||
44 | * and the clocksource_list | ||
45 | * override_name: | ||
46 | * Name of the user-specified clocksource. | ||
47 | */ | ||
48 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | ||
49 | static struct clocksource *next_clocksource; | ||
50 | static LIST_HEAD(clocksource_list); | ||
51 | static DEFINE_SPINLOCK(clocksource_lock); | ||
52 | static char override_name[32]; | ||
53 | static int finished_booting; | ||
54 | |||
55 | /* clocksource_done_booting - Called near the end of bootup | ||
56 | * | ||
57 | * Hack to avoid lots of clocksource churn at boot time | ||
58 | */ | ||
59 | static int __init clocksource_done_booting(void) | ||
60 | { | ||
61 | finished_booting = 1; | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | late_initcall(clocksource_done_booting); | ||
66 | |||
67 | /** | ||
68 | * clocksource_get_next - Returns the selected clocksource | ||
69 | * | ||
70 | */ | ||
71 | struct clocksource *clocksource_get_next(void) | ||
72 | { | ||
73 | unsigned long flags; | ||
74 | |||
75 | spin_lock_irqsave(&clocksource_lock, flags); | ||
76 | if (next_clocksource && finished_booting) { | ||
77 | curr_clocksource = next_clocksource; | ||
78 | next_clocksource = NULL; | ||
79 | } | ||
80 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
81 | |||
82 | return curr_clocksource; | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * select_clocksource - Finds the best registered clocksource. | ||
87 | * | ||
88 | * Private function. Must hold clocksource_lock when called. | ||
89 | * | ||
90 | * Looks through the list of registered clocksources, returning | ||
91 | * the one with the highest rating value. If there is a clocksource | ||
92 | * name that matches the override string, it returns that clocksource. | ||
93 | */ | ||
94 | static struct clocksource *select_clocksource(void) | ||
95 | { | ||
96 | struct clocksource *best = NULL; | ||
97 | struct list_head *tmp; | ||
98 | |||
99 | list_for_each(tmp, &clocksource_list) { | ||
100 | struct clocksource *src; | ||
101 | |||
102 | src = list_entry(tmp, struct clocksource, list); | ||
103 | if (!best) | ||
104 | best = src; | ||
105 | |||
106 | /* check for override: */ | ||
107 | if (strlen(src->name) == strlen(override_name) && | ||
108 | !strcmp(src->name, override_name)) { | ||
109 | best = src; | ||
110 | break; | ||
111 | } | ||
112 | /* pick the highest rating: */ | ||
113 | if (src->rating > best->rating) | ||
114 | best = src; | ||
115 | } | ||
116 | |||
117 | return best; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * is_registered_source - Checks if clocksource is registered | ||
122 | * @c: pointer to a clocksource | ||
123 | * | ||
124 | * Private helper function. Must hold clocksource_lock when called. | ||
125 | * | ||
126 | * Returns one if the clocksource is already registered, zero otherwise. | ||
127 | */ | ||
128 | static int is_registered_source(struct clocksource *c) | ||
129 | { | ||
130 | int len = strlen(c->name); | ||
131 | struct list_head *tmp; | ||
132 | |||
133 | list_for_each(tmp, &clocksource_list) { | ||
134 | struct clocksource *src; | ||
135 | |||
136 | src = list_entry(tmp, struct clocksource, list); | ||
137 | if (strlen(src->name) == len && !strcmp(src->name, c->name)) | ||
138 | return 1; | ||
139 | } | ||
140 | |||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * clocksource_register - Used to install new clocksources | ||
146 | * @t: clocksource to be registered | ||
147 | * | ||
148 | * Returns -EBUSY if registration fails, zero otherwise. | ||
149 | */ | ||
150 | int clocksource_register(struct clocksource *c) | ||
151 | { | ||
152 | int ret = 0; | ||
153 | unsigned long flags; | ||
154 | |||
155 | spin_lock_irqsave(&clocksource_lock, flags); | ||
156 | /* check if clocksource is already registered */ | ||
157 | if (is_registered_source(c)) { | ||
158 | printk("register_clocksource: Cannot register %s. " | ||
159 | "Already registered!", c->name); | ||
160 | ret = -EBUSY; | ||
161 | } else { | ||
162 | /* register it */ | ||
163 | list_add(&c->list, &clocksource_list); | ||
164 | /* scan the registered clocksources, and pick the best one */ | ||
165 | next_clocksource = select_clocksource(); | ||
166 | } | ||
167 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
168 | return ret; | ||
169 | } | ||
170 | EXPORT_SYMBOL(clocksource_register); | ||
171 | |||
172 | /** | ||
173 | * clocksource_reselect - Rescan list for next clocksource | ||
174 | * | ||
175 | * A quick helper function to be used if a clocksource changes its | ||
176 | * rating. Forces the clocksource list to be re-scanned for the best | ||
177 | * clocksource. | ||
178 | */ | ||
179 | void clocksource_reselect(void) | ||
180 | { | ||
181 | unsigned long flags; | ||
182 | |||
183 | spin_lock_irqsave(&clocksource_lock, flags); | ||
184 | next_clocksource = select_clocksource(); | ||
185 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
186 | } | ||
187 | EXPORT_SYMBOL(clocksource_reselect); | ||
188 | |||
189 | /** | ||
190 | * sysfs_show_current_clocksources - sysfs interface for current clocksource | ||
191 | * @dev: unused | ||
192 | * @buf: char buffer to be filled with clocksource list | ||
193 | * | ||
194 | * Provides sysfs interface for listing current clocksource. | ||
195 | */ | ||
196 | static ssize_t | ||
197 | sysfs_show_current_clocksources(struct sys_device *dev, char *buf) | ||
198 | { | ||
199 | char *curr = buf; | ||
200 | |||
201 | spin_lock_irq(&clocksource_lock); | ||
202 | curr += sprintf(curr, "%s ", curr_clocksource->name); | ||
203 | spin_unlock_irq(&clocksource_lock); | ||
204 | |||
205 | curr += sprintf(curr, "\n"); | ||
206 | |||
207 | return curr - buf; | ||
208 | } | ||
209 | |||
210 | /** | ||
211 | * sysfs_override_clocksource - interface for manually overriding clocksource | ||
212 | * @dev: unused | ||
213 | * @buf: name of override clocksource | ||
214 | * @count: length of buffer | ||
215 | * | ||
216 | * Takes input from sysfs interface for manually overriding the default | ||
217 | * clocksource selction. | ||
218 | */ | ||
219 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | ||
220 | const char *buf, size_t count) | ||
221 | { | ||
222 | size_t ret = count; | ||
223 | /* strings from sysfs write are not 0 terminated! */ | ||
224 | if (count >= sizeof(override_name)) | ||
225 | return -EINVAL; | ||
226 | |||
227 | /* strip of \n: */ | ||
228 | if (buf[count-1] == '\n') | ||
229 | count--; | ||
230 | if (count < 1) | ||
231 | return -EINVAL; | ||
232 | |||
233 | spin_lock_irq(&clocksource_lock); | ||
234 | |||
235 | /* copy the name given: */ | ||
236 | memcpy(override_name, buf, count); | ||
237 | override_name[count] = 0; | ||
238 | |||
239 | /* try to select it: */ | ||
240 | next_clocksource = select_clocksource(); | ||
241 | |||
242 | spin_unlock_irq(&clocksource_lock); | ||
243 | |||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | /** | ||
248 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource | ||
249 | * @dev: unused | ||
250 | * @buf: char buffer to be filled with clocksource list | ||
251 | * | ||
252 | * Provides sysfs interface for listing registered clocksources | ||
253 | */ | ||
254 | static ssize_t | ||
255 | sysfs_show_available_clocksources(struct sys_device *dev, char *buf) | ||
256 | { | ||
257 | struct list_head *tmp; | ||
258 | char *curr = buf; | ||
259 | |||
260 | spin_lock_irq(&clocksource_lock); | ||
261 | list_for_each(tmp, &clocksource_list) { | ||
262 | struct clocksource *src; | ||
263 | |||
264 | src = list_entry(tmp, struct clocksource, list); | ||
265 | curr += sprintf(curr, "%s ", src->name); | ||
266 | } | ||
267 | spin_unlock_irq(&clocksource_lock); | ||
268 | |||
269 | curr += sprintf(curr, "\n"); | ||
270 | |||
271 | return curr - buf; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Sysfs setup bits: | ||
276 | */ | ||
277 | static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources, | ||
278 | sysfs_override_clocksource); | ||
279 | |||
280 | static SYSDEV_ATTR(available_clocksource, 0600, | ||
281 | sysfs_show_available_clocksources, NULL); | ||
282 | |||
283 | static struct sysdev_class clocksource_sysclass = { | ||
284 | set_kset_name("clocksource"), | ||
285 | }; | ||
286 | |||
287 | static struct sys_device device_clocksource = { | ||
288 | .id = 0, | ||
289 | .cls = &clocksource_sysclass, | ||
290 | }; | ||
291 | |||
292 | static int __init init_clocksource_sysfs(void) | ||
293 | { | ||
294 | int error = sysdev_class_register(&clocksource_sysclass); | ||
295 | |||
296 | if (!error) | ||
297 | error = sysdev_register(&device_clocksource); | ||
298 | if (!error) | ||
299 | error = sysdev_create_file( | ||
300 | &device_clocksource, | ||
301 | &attr_current_clocksource); | ||
302 | if (!error) | ||
303 | error = sysdev_create_file( | ||
304 | &device_clocksource, | ||
305 | &attr_available_clocksource); | ||
306 | return error; | ||
307 | } | ||
308 | |||
309 | device_initcall(init_clocksource_sysfs); | ||
310 | |||
311 | /** | ||
312 | * boot_override_clocksource - boot clock override | ||
313 | * @str: override name | ||
314 | * | ||
315 | * Takes a clocksource= boot argument and uses it | ||
316 | * as the clocksource override name. | ||
317 | */ | ||
318 | static int __init boot_override_clocksource(char* str) | ||
319 | { | ||
320 | unsigned long flags; | ||
321 | spin_lock_irqsave(&clocksource_lock, flags); | ||
322 | if (str) | ||
323 | strlcpy(override_name, str, sizeof(override_name)); | ||
324 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
325 | return 1; | ||
326 | } | ||
327 | |||
328 | __setup("clocksource=", boot_override_clocksource); | ||
329 | |||
330 | /** | ||
331 | * boot_override_clock - Compatibility layer for deprecated boot option | ||
332 | * @str: override name | ||
333 | * | ||
334 | * DEPRECATED! Takes a clock= boot argument and uses it | ||
335 | * as the clocksource override name | ||
336 | */ | ||
337 | static int __init boot_override_clock(char* str) | ||
338 | { | ||
339 | if (!strcmp(str, "pmtmr")) { | ||
340 | printk("Warning: clock=pmtmr is deprecated. " | ||
341 | "Use clocksource=acpi_pm.\n"); | ||
342 | return boot_override_clocksource("acpi_pm"); | ||
343 | } | ||
344 | printk("Warning! clock= boot option is deprecated. " | ||
345 | "Use clocksource=xyz\n"); | ||
346 | return boot_override_clocksource(str); | ||
347 | } | ||
348 | |||
349 | __setup("clock=", boot_override_clock); | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c new file mode 100644 index 000000000000..126bb30c4afe --- /dev/null +++ b/kernel/time/jiffies.c | |||
@@ -0,0 +1,73 @@ | |||
1 | /*********************************************************************** | ||
2 | * linux/kernel/time/jiffies.c | ||
3 | * | ||
4 | * This file contains the jiffies based clocksource. | ||
5 | * | ||
6 | * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | * | ||
22 | ************************************************************************/ | ||
23 | #include <linux/clocksource.h> | ||
24 | #include <linux/jiffies.h> | ||
25 | #include <linux/init.h> | ||
26 | |||
27 | /* The Jiffies based clocksource is the lowest common | ||
28 | * denominator clock source which should function on | ||
29 | * all systems. It has the same coarse resolution as | ||
30 | * the timer interrupt frequency HZ and it suffers | ||
31 | * inaccuracies caused by missed or lost timer | ||
32 | * interrupts and the inability for the timer | ||
33 | * interrupt hardware to accuratly tick at the | ||
34 | * requested HZ value. It is also not reccomended | ||
35 | * for "tick-less" systems. | ||
36 | */ | ||
37 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) | ||
38 | |||
39 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier | ||
40 | * conversion, the .shift value could be zero. However | ||
41 | * this would make NTP adjustments impossible as they are | ||
42 | * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to | ||
43 | * shift both the nominator and denominator the same | ||
44 | * amount, and give ntp adjustments in units of 1/2^8 | ||
45 | * | ||
46 | * The value 8 is somewhat carefully chosen, as anything | ||
47 | * larger can result in overflows. NSEC_PER_JIFFY grows as | ||
48 | * HZ shrinks, so values greater then 8 overflow 32bits when | ||
49 | * HZ=100. | ||
50 | */ | ||
51 | #define JIFFIES_SHIFT 8 | ||
52 | |||
53 | static cycle_t jiffies_read(void) | ||
54 | { | ||
55 | return (cycle_t) jiffies; | ||
56 | } | ||
57 | |||
58 | struct clocksource clocksource_jiffies = { | ||
59 | .name = "jiffies", | ||
60 | .rating = 0, /* lowest rating*/ | ||
61 | .read = jiffies_read, | ||
62 | .mask = 0xffffffff, /*32bits*/ | ||
63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | ||
64 | .shift = JIFFIES_SHIFT, | ||
65 | .is_continuous = 0, /* tick based, not free running */ | ||
66 | }; | ||
67 | |||
68 | static int __init init_jiffies_clocksource(void) | ||
69 | { | ||
70 | return clocksource_register(&clocksource_jiffies); | ||
71 | } | ||
72 | |||
73 | module_init(init_jiffies_clocksource); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index eb97371b87d8..5bb6b7976eec 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -597,7 +597,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ | |||
597 | long time_precision = 1; /* clock precision (us) */ | 597 | long time_precision = 1; /* clock precision (us) */ |
598 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | 598 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ |
599 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | 599 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ |
600 | static long time_phase; /* phase offset (scaled us) */ | ||
601 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; | 600 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; |
602 | /* frequency offset (scaled ppm)*/ | 601 | /* frequency offset (scaled ppm)*/ |
603 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ | 602 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ |
@@ -747,27 +746,14 @@ static long adjtime_adjustment(void) | |||
747 | } | 746 | } |
748 | 747 | ||
749 | /* in the NTP reference this is called "hardclock()" */ | 748 | /* in the NTP reference this is called "hardclock()" */ |
750 | static void update_wall_time_one_tick(void) | 749 | static void update_ntp_one_tick(void) |
751 | { | 750 | { |
752 | long time_adjust_step, delta_nsec; | 751 | long time_adjust_step; |
753 | 752 | ||
754 | time_adjust_step = adjtime_adjustment(); | 753 | time_adjust_step = adjtime_adjustment(); |
755 | if (time_adjust_step) | 754 | if (time_adjust_step) |
756 | /* Reduce by this step the amount of time left */ | 755 | /* Reduce by this step the amount of time left */ |
757 | time_adjust -= time_adjust_step; | 756 | time_adjust -= time_adjust_step; |
758 | delta_nsec = tick_nsec + time_adjust_step * 1000; | ||
759 | /* | ||
760 | * Advance the phase, once it gets to one microsecond, then | ||
761 | * advance the tick more. | ||
762 | */ | ||
763 | time_phase += time_adj; | ||
764 | if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) { | ||
765 | long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10)); | ||
766 | time_phase -= ltemp << (SHIFT_SCALE - 10); | ||
767 | delta_nsec += ltemp; | ||
768 | } | ||
769 | xtime.tv_nsec += delta_nsec; | ||
770 | time_interpolator_update(delta_nsec); | ||
771 | 757 | ||
772 | /* Changes by adjtime() do not take effect till next tick. */ | 758 | /* Changes by adjtime() do not take effect till next tick. */ |
773 | if (time_next_adjust != 0) { | 759 | if (time_next_adjust != 0) { |
@@ -780,36 +766,378 @@ static void update_wall_time_one_tick(void) | |||
780 | * Return how long ticks are at the moment, that is, how much time | 766 | * Return how long ticks are at the moment, that is, how much time |
781 | * update_wall_time_one_tick will add to xtime next time we call it | 767 | * update_wall_time_one_tick will add to xtime next time we call it |
782 | * (assuming no calls to do_adjtimex in the meantime). | 768 | * (assuming no calls to do_adjtimex in the meantime). |
783 | * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 | 769 | * The return value is in fixed-point nanoseconds shifted by the |
784 | * bits to the right of the binary point. | 770 | * specified number of bits to the right of the binary point. |
785 | * This function has no side-effects. | 771 | * This function has no side-effects. |
786 | */ | 772 | */ |
787 | u64 current_tick_length(void) | 773 | u64 current_tick_length(void) |
788 | { | 774 | { |
789 | long delta_nsec; | 775 | long delta_nsec; |
776 | u64 ret; | ||
790 | 777 | ||
778 | /* calculate the finest interval NTP will allow. | ||
779 | * ie: nanosecond value shifted by (SHIFT_SCALE - 10) | ||
780 | */ | ||
791 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; | 781 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; |
792 | return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; | 782 | ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; |
783 | ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); | ||
784 | |||
785 | return ret; | ||
793 | } | 786 | } |
794 | 787 | ||
795 | /* | 788 | /* XXX - all of this timekeeping code should be later moved to time.c */ |
796 | * Using a loop looks inefficient, but "ticks" is | 789 | #include <linux/clocksource.h> |
797 | * usually just one (we shouldn't be losing ticks, | 790 | static struct clocksource *clock; /* pointer to current clocksource */ |
798 | * we're doing this this way mainly for interrupt | 791 | |
799 | * latency reasons, not because we think we'll | 792 | #ifdef CONFIG_GENERIC_TIME |
800 | * have lots of lost timer ticks | 793 | /** |
794 | * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook | ||
795 | * | ||
796 | * private function, must hold xtime_lock lock when being | ||
797 | * called. Returns the number of nanoseconds since the | ||
798 | * last call to update_wall_time() (adjusted by NTP scaling) | ||
799 | */ | ||
800 | static inline s64 __get_nsec_offset(void) | ||
801 | { | ||
802 | cycle_t cycle_now, cycle_delta; | ||
803 | s64 ns_offset; | ||
804 | |||
805 | /* read clocksource: */ | ||
806 | cycle_now = clocksource_read(clock); | ||
807 | |||
808 | /* calculate the delta since the last update_wall_time: */ | ||
809 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
810 | |||
811 | /* convert to nanoseconds: */ | ||
812 | ns_offset = cyc2ns(clock, cycle_delta); | ||
813 | |||
814 | return ns_offset; | ||
815 | } | ||
816 | |||
817 | /** | ||
818 | * __get_realtime_clock_ts - Returns the time of day in a timespec | ||
819 | * @ts: pointer to the timespec to be set | ||
820 | * | ||
821 | * Returns the time of day in a timespec. Used by | ||
822 | * do_gettimeofday() and get_realtime_clock_ts(). | ||
801 | */ | 823 | */ |
802 | static void update_wall_time(unsigned long ticks) | 824 | static inline void __get_realtime_clock_ts(struct timespec *ts) |
803 | { | 825 | { |
826 | unsigned long seq; | ||
827 | s64 nsecs; | ||
828 | |||
829 | do { | ||
830 | seq = read_seqbegin(&xtime_lock); | ||
831 | |||
832 | *ts = xtime; | ||
833 | nsecs = __get_nsec_offset(); | ||
834 | |||
835 | } while (read_seqretry(&xtime_lock, seq)); | ||
836 | |||
837 | timespec_add_ns(ts, nsecs); | ||
838 | } | ||
839 | |||
840 | /** | ||
841 | * getnstimeofday - Returns the time of day in a timespec | ||
842 | * @ts: pointer to the timespec to be set | ||
843 | * | ||
844 | * Returns the time of day in a timespec. | ||
845 | */ | ||
846 | void getnstimeofday(struct timespec *ts) | ||
847 | { | ||
848 | __get_realtime_clock_ts(ts); | ||
849 | } | ||
850 | |||
851 | EXPORT_SYMBOL(getnstimeofday); | ||
852 | |||
853 | /** | ||
854 | * do_gettimeofday - Returns the time of day in a timeval | ||
855 | * @tv: pointer to the timeval to be set | ||
856 | * | ||
857 | * NOTE: Users should be converted to using get_realtime_clock_ts() | ||
858 | */ | ||
859 | void do_gettimeofday(struct timeval *tv) | ||
860 | { | ||
861 | struct timespec now; | ||
862 | |||
863 | __get_realtime_clock_ts(&now); | ||
864 | tv->tv_sec = now.tv_sec; | ||
865 | tv->tv_usec = now.tv_nsec/1000; | ||
866 | } | ||
867 | |||
868 | EXPORT_SYMBOL(do_gettimeofday); | ||
869 | /** | ||
870 | * do_settimeofday - Sets the time of day | ||
871 | * @tv: pointer to the timespec variable containing the new time | ||
872 | * | ||
873 | * Sets the time of day to the new time and update NTP and notify hrtimers | ||
874 | */ | ||
875 | int do_settimeofday(struct timespec *tv) | ||
876 | { | ||
877 | unsigned long flags; | ||
878 | time_t wtm_sec, sec = tv->tv_sec; | ||
879 | long wtm_nsec, nsec = tv->tv_nsec; | ||
880 | |||
881 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
882 | return -EINVAL; | ||
883 | |||
884 | write_seqlock_irqsave(&xtime_lock, flags); | ||
885 | |||
886 | nsec -= __get_nsec_offset(); | ||
887 | |||
888 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
889 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
890 | |||
891 | set_normalized_timespec(&xtime, sec, nsec); | ||
892 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
893 | |||
894 | ntp_clear(); | ||
895 | |||
896 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
897 | |||
898 | /* signal hrtimers about time change */ | ||
899 | clock_was_set(); | ||
900 | |||
901 | return 0; | ||
902 | } | ||
903 | |||
904 | EXPORT_SYMBOL(do_settimeofday); | ||
905 | |||
906 | /** | ||
907 | * change_clocksource - Swaps clocksources if a new one is available | ||
908 | * | ||
909 | * Accumulates current time interval and initializes new clocksource | ||
910 | */ | ||
911 | static int change_clocksource(void) | ||
912 | { | ||
913 | struct clocksource *new; | ||
914 | cycle_t now; | ||
915 | u64 nsec; | ||
916 | new = clocksource_get_next(); | ||
917 | if (clock != new) { | ||
918 | now = clocksource_read(new); | ||
919 | nsec = __get_nsec_offset(); | ||
920 | timespec_add_ns(&xtime, nsec); | ||
921 | |||
922 | clock = new; | ||
923 | clock->cycle_last = now; | ||
924 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | ||
925 | clock->name); | ||
926 | return 1; | ||
927 | } else if (clock->update_callback) { | ||
928 | return clock->update_callback(); | ||
929 | } | ||
930 | return 0; | ||
931 | } | ||
932 | #else | ||
933 | #define change_clocksource() (0) | ||
934 | #endif | ||
935 | |||
936 | /** | ||
937 | * timeofday_is_continuous - check to see if timekeeping is free running | ||
938 | */ | ||
939 | int timekeeping_is_continuous(void) | ||
940 | { | ||
941 | unsigned long seq; | ||
942 | int ret; | ||
943 | |||
804 | do { | 944 | do { |
805 | ticks--; | 945 | seq = read_seqbegin(&xtime_lock); |
806 | update_wall_time_one_tick(); | 946 | |
807 | if (xtime.tv_nsec >= 1000000000) { | 947 | ret = clock->is_continuous; |
808 | xtime.tv_nsec -= 1000000000; | 948 | |
949 | } while (read_seqretry(&xtime_lock, seq)); | ||
950 | |||
951 | return ret; | ||
952 | } | ||
953 | |||
954 | /* | ||
955 | * timekeeping_init - Initializes the clocksource and common timekeeping values | ||
956 | */ | ||
957 | void __init timekeeping_init(void) | ||
958 | { | ||
959 | unsigned long flags; | ||
960 | |||
961 | write_seqlock_irqsave(&xtime_lock, flags); | ||
962 | clock = clocksource_get_next(); | ||
963 | clocksource_calculate_interval(clock, tick_nsec); | ||
964 | clock->cycle_last = clocksource_read(clock); | ||
965 | ntp_clear(); | ||
966 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
967 | } | ||
968 | |||
969 | |||
970 | /* | ||
971 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | ||
972 | * @dev: unused | ||
973 | * | ||
974 | * This is for the generic clocksource timekeeping. | ||
975 | * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are | ||
976 | * still managed by arch specific suspend/resume code. | ||
977 | */ | ||
978 | static int timekeeping_resume(struct sys_device *dev) | ||
979 | { | ||
980 | unsigned long flags; | ||
981 | |||
982 | write_seqlock_irqsave(&xtime_lock, flags); | ||
983 | /* restart the last cycle value */ | ||
984 | clock->cycle_last = clocksource_read(clock); | ||
985 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
986 | return 0; | ||
987 | } | ||
988 | |||
989 | /* sysfs resume/suspend bits for timekeeping */ | ||
990 | static struct sysdev_class timekeeping_sysclass = { | ||
991 | .resume = timekeeping_resume, | ||
992 | set_kset_name("timekeeping"), | ||
993 | }; | ||
994 | |||
995 | static struct sys_device device_timer = { | ||
996 | .id = 0, | ||
997 | .cls = &timekeeping_sysclass, | ||
998 | }; | ||
999 | |||
1000 | static int __init timekeeping_init_device(void) | ||
1001 | { | ||
1002 | int error = sysdev_class_register(&timekeeping_sysclass); | ||
1003 | if (!error) | ||
1004 | error = sysdev_register(&device_timer); | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | device_initcall(timekeeping_init_device); | ||
1009 | |||
1010 | /* | ||
1011 | * If the error is already larger, we look ahead another tick, | ||
1012 | * to compensate for late or lost adjustments. | ||
1013 | */ | ||
1014 | static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset) | ||
1015 | { | ||
1016 | int adj; | ||
1017 | |||
1018 | /* | ||
1019 | * As soon as the machine is synchronized to the external time | ||
1020 | * source this should be the common case. | ||
1021 | */ | ||
1022 | error >>= 2; | ||
1023 | if (likely(sign > 0 ? error <= *interval : error >= *interval)) | ||
1024 | return sign; | ||
1025 | |||
1026 | /* | ||
1027 | * An extra look ahead dampens the effect of the current error, | ||
1028 | * which can grow quite large with continously late updates, as | ||
1029 | * it would dominate the adjustment value and can lead to | ||
1030 | * oscillation. | ||
1031 | */ | ||
1032 | error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); | ||
1033 | error -= clock->xtime_interval >> 1; | ||
1034 | |||
1035 | adj = 0; | ||
1036 | while (1) { | ||
1037 | error >>= 1; | ||
1038 | if (sign > 0 ? error <= *interval : error >= *interval) | ||
1039 | break; | ||
1040 | adj++; | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * Add the current adjustments to the error and take the offset | ||
1045 | * into account, the latter can cause the error to be hardly | ||
1046 | * reduced at the next tick. Check the error again if there's | ||
1047 | * room for another adjustment, thus further reducing the error | ||
1048 | * which otherwise had to be corrected at the next update. | ||
1049 | */ | ||
1050 | error = (error << 1) - *interval + *offset; | ||
1051 | if (sign > 0 ? error > *interval : error < *interval) | ||
1052 | adj++; | ||
1053 | |||
1054 | *interval <<= adj; | ||
1055 | *offset <<= adj; | ||
1056 | return sign << adj; | ||
1057 | } | ||
1058 | |||
1059 | /* | ||
1060 | * Adjust the multiplier to reduce the error value, | ||
1061 | * this is optimized for the most common adjustments of -1,0,1, | ||
1062 | * for other values we can do a bit more work. | ||
1063 | */ | ||
1064 | static void clocksource_adjust(struct clocksource *clock, s64 offset) | ||
1065 | { | ||
1066 | s64 error, interval = clock->cycle_interval; | ||
1067 | int adj; | ||
1068 | |||
1069 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); | ||
1070 | if (error > interval) { | ||
1071 | adj = clocksource_bigadjust(1, error, &interval, &offset); | ||
1072 | } else if (error < -interval) { | ||
1073 | interval = -interval; | ||
1074 | offset = -offset; | ||
1075 | adj = clocksource_bigadjust(-1, error, &interval, &offset); | ||
1076 | } else | ||
1077 | return; | ||
1078 | |||
1079 | clock->mult += adj; | ||
1080 | clock->xtime_interval += interval; | ||
1081 | clock->xtime_nsec -= offset; | ||
1082 | clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift); | ||
1083 | } | ||
1084 | |||
1085 | /* | ||
1086 | * update_wall_time - Uses the current clocksource to increment the wall time | ||
1087 | * | ||
1088 | * Called from the timer interrupt, must hold a write on xtime_lock. | ||
1089 | */ | ||
1090 | static void update_wall_time(void) | ||
1091 | { | ||
1092 | cycle_t offset; | ||
1093 | |||
1094 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
1095 | |||
1096 | #ifdef CONFIG_GENERIC_TIME | ||
1097 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | ||
1098 | #else | ||
1099 | offset = clock->cycle_interval; | ||
1100 | #endif | ||
1101 | |||
1102 | /* normally this loop will run just once, however in the | ||
1103 | * case of lost or late ticks, it will accumulate correctly. | ||
1104 | */ | ||
1105 | while (offset >= clock->cycle_interval) { | ||
1106 | /* accumulate one interval */ | ||
1107 | clock->xtime_nsec += clock->xtime_interval; | ||
1108 | clock->cycle_last += clock->cycle_interval; | ||
1109 | offset -= clock->cycle_interval; | ||
1110 | |||
1111 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { | ||
1112 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; | ||
809 | xtime.tv_sec++; | 1113 | xtime.tv_sec++; |
810 | second_overflow(); | 1114 | second_overflow(); |
811 | } | 1115 | } |
812 | } while (ticks); | 1116 | |
1117 | /* interpolator bits */ | ||
1118 | time_interpolator_update(clock->xtime_interval | ||
1119 | >> clock->shift); | ||
1120 | /* increment the NTP state machine */ | ||
1121 | update_ntp_one_tick(); | ||
1122 | |||
1123 | /* accumulate error between NTP and clock interval */ | ||
1124 | clock->error += current_tick_length(); | ||
1125 | clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); | ||
1126 | } | ||
1127 | |||
1128 | /* correct the clock when NTP error is too big */ | ||
1129 | clocksource_adjust(clock, offset); | ||
1130 | |||
1131 | /* store full nanoseconds into xtime */ | ||
1132 | xtime.tv_nsec = clock->xtime_nsec >> clock->shift; | ||
1133 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | ||
1134 | |||
1135 | /* check to see if there is a new clocksource to use */ | ||
1136 | if (change_clocksource()) { | ||
1137 | clock->error = 0; | ||
1138 | clock->xtime_nsec = 0; | ||
1139 | clocksource_calculate_interval(clock, tick_nsec); | ||
1140 | } | ||
813 | } | 1141 | } |
814 | 1142 | ||
815 | /* | 1143 | /* |
@@ -915,10 +1243,8 @@ static inline void update_times(void) | |||
915 | unsigned long ticks; | 1243 | unsigned long ticks; |
916 | 1244 | ||
917 | ticks = jiffies - wall_jiffies; | 1245 | ticks = jiffies - wall_jiffies; |
918 | if (ticks) { | 1246 | wall_jiffies += ticks; |
919 | wall_jiffies += ticks; | 1247 | update_wall_time(); |
920 | update_wall_time(ticks); | ||
921 | } | ||
922 | calc_load(ticks); | 1248 | calc_load(ticks); |
923 | } | 1249 | } |
924 | 1250 | ||
diff --git a/kernel/unwind.c b/kernel/unwind.c new file mode 100644 index 000000000000..f69c804c8e62 --- /dev/null +++ b/kernel/unwind.c | |||
@@ -0,0 +1,918 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2002-2006 Novell, Inc. | ||
3 | * Jan Beulich <jbeulich@novell.com> | ||
4 | * This code is released under version 2 of the GNU GPL. | ||
5 | * | ||
6 | * A simple API for unwinding kernel stacks. This is used for | ||
7 | * debugging and error reporting purposes. The kernel doesn't need | ||
8 | * full-blown stack unwinding with all the bells and whistles, so there | ||
9 | * is not much point in implementing the full Dwarf2 unwind API. | ||
10 | */ | ||
11 | |||
12 | #include <linux/unwind.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/delay.h> | ||
15 | #include <linux/stop_machine.h> | ||
16 | #include <asm/sections.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | #include <asm/unaligned.h> | ||
19 | |||
20 | extern char __start_unwind[], __end_unwind[]; | ||
21 | |||
22 | #define MAX_STACK_DEPTH 8 | ||
23 | |||
24 | #define EXTRA_INFO(f) { \ | ||
25 | BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ | ||
26 | % FIELD_SIZEOF(struct unwind_frame_info, f)) \ | ||
27 | + offsetof(struct unwind_frame_info, f) \ | ||
28 | / FIELD_SIZEOF(struct unwind_frame_info, f), \ | ||
29 | FIELD_SIZEOF(struct unwind_frame_info, f) \ | ||
30 | } | ||
31 | #define PTREGS_INFO(f) EXTRA_INFO(regs.f) | ||
32 | |||
33 | static const struct { | ||
34 | unsigned offs:BITS_PER_LONG / 2; | ||
35 | unsigned width:BITS_PER_LONG / 2; | ||
36 | } reg_info[] = { | ||
37 | UNW_REGISTER_INFO | ||
38 | }; | ||
39 | |||
40 | #undef PTREGS_INFO | ||
41 | #undef EXTRA_INFO | ||
42 | |||
43 | #ifndef REG_INVALID | ||
44 | #define REG_INVALID(r) (reg_info[r].width == 0) | ||
45 | #endif | ||
46 | |||
47 | #define DW_CFA_nop 0x00 | ||
48 | #define DW_CFA_set_loc 0x01 | ||
49 | #define DW_CFA_advance_loc1 0x02 | ||
50 | #define DW_CFA_advance_loc2 0x03 | ||
51 | #define DW_CFA_advance_loc4 0x04 | ||
52 | #define DW_CFA_offset_extended 0x05 | ||
53 | #define DW_CFA_restore_extended 0x06 | ||
54 | #define DW_CFA_undefined 0x07 | ||
55 | #define DW_CFA_same_value 0x08 | ||
56 | #define DW_CFA_register 0x09 | ||
57 | #define DW_CFA_remember_state 0x0a | ||
58 | #define DW_CFA_restore_state 0x0b | ||
59 | #define DW_CFA_def_cfa 0x0c | ||
60 | #define DW_CFA_def_cfa_register 0x0d | ||
61 | #define DW_CFA_def_cfa_offset 0x0e | ||
62 | #define DW_CFA_def_cfa_expression 0x0f | ||
63 | #define DW_CFA_expression 0x10 | ||
64 | #define DW_CFA_offset_extended_sf 0x11 | ||
65 | #define DW_CFA_def_cfa_sf 0x12 | ||
66 | #define DW_CFA_def_cfa_offset_sf 0x13 | ||
67 | #define DW_CFA_val_offset 0x14 | ||
68 | #define DW_CFA_val_offset_sf 0x15 | ||
69 | #define DW_CFA_val_expression 0x16 | ||
70 | #define DW_CFA_lo_user 0x1c | ||
71 | #define DW_CFA_GNU_window_save 0x2d | ||
72 | #define DW_CFA_GNU_args_size 0x2e | ||
73 | #define DW_CFA_GNU_negative_offset_extended 0x2f | ||
74 | #define DW_CFA_hi_user 0x3f | ||
75 | |||
76 | #define DW_EH_PE_FORM 0x07 | ||
77 | #define DW_EH_PE_native 0x00 | ||
78 | #define DW_EH_PE_leb128 0x01 | ||
79 | #define DW_EH_PE_data2 0x02 | ||
80 | #define DW_EH_PE_data4 0x03 | ||
81 | #define DW_EH_PE_data8 0x04 | ||
82 | #define DW_EH_PE_signed 0x08 | ||
83 | #define DW_EH_PE_ADJUST 0x70 | ||
84 | #define DW_EH_PE_abs 0x00 | ||
85 | #define DW_EH_PE_pcrel 0x10 | ||
86 | #define DW_EH_PE_textrel 0x20 | ||
87 | #define DW_EH_PE_datarel 0x30 | ||
88 | #define DW_EH_PE_funcrel 0x40 | ||
89 | #define DW_EH_PE_aligned 0x50 | ||
90 | #define DW_EH_PE_indirect 0x80 | ||
91 | #define DW_EH_PE_omit 0xff | ||
92 | |||
93 | typedef unsigned long uleb128_t; | ||
94 | typedef signed long sleb128_t; | ||
95 | |||
96 | static struct unwind_table { | ||
97 | struct { | ||
98 | unsigned long pc; | ||
99 | unsigned long range; | ||
100 | } core, init; | ||
101 | const void *address; | ||
102 | unsigned long size; | ||
103 | struct unwind_table *link; | ||
104 | const char *name; | ||
105 | } root_table, *last_table; | ||
106 | |||
107 | struct unwind_item { | ||
108 | enum item_location { | ||
109 | Nowhere, | ||
110 | Memory, | ||
111 | Register, | ||
112 | Value | ||
113 | } where; | ||
114 | uleb128_t value; | ||
115 | }; | ||
116 | |||
117 | struct unwind_state { | ||
118 | uleb128_t loc, org; | ||
119 | const u8 *cieStart, *cieEnd; | ||
120 | uleb128_t codeAlign; | ||
121 | sleb128_t dataAlign; | ||
122 | struct cfa { | ||
123 | uleb128_t reg, offs; | ||
124 | } cfa; | ||
125 | struct unwind_item regs[ARRAY_SIZE(reg_info)]; | ||
126 | unsigned stackDepth:8; | ||
127 | unsigned version:8; | ||
128 | const u8 *label; | ||
129 | const u8 *stack[MAX_STACK_DEPTH]; | ||
130 | }; | ||
131 | |||
132 | static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; | ||
133 | |||
134 | static struct unwind_table *find_table(unsigned long pc) | ||
135 | { | ||
136 | struct unwind_table *table; | ||
137 | |||
138 | for (table = &root_table; table; table = table->link) | ||
139 | if ((pc >= table->core.pc | ||
140 | && pc < table->core.pc + table->core.range) | ||
141 | || (pc >= table->init.pc | ||
142 | && pc < table->init.pc + table->init.range)) | ||
143 | break; | ||
144 | |||
145 | return table; | ||
146 | } | ||
147 | |||
148 | static void init_unwind_table(struct unwind_table *table, | ||
149 | const char *name, | ||
150 | const void *core_start, | ||
151 | unsigned long core_size, | ||
152 | const void *init_start, | ||
153 | unsigned long init_size, | ||
154 | const void *table_start, | ||
155 | unsigned long table_size) | ||
156 | { | ||
157 | table->core.pc = (unsigned long)core_start; | ||
158 | table->core.range = core_size; | ||
159 | table->init.pc = (unsigned long)init_start; | ||
160 | table->init.range = init_size; | ||
161 | table->address = table_start; | ||
162 | table->size = table_size; | ||
163 | table->link = NULL; | ||
164 | table->name = name; | ||
165 | } | ||
166 | |||
167 | void __init unwind_init(void) | ||
168 | { | ||
169 | init_unwind_table(&root_table, "kernel", | ||
170 | _text, _end - _text, | ||
171 | NULL, 0, | ||
172 | __start_unwind, __end_unwind - __start_unwind); | ||
173 | } | ||
174 | |||
175 | #ifdef CONFIG_MODULES | ||
176 | |||
177 | /* Must be called with module_mutex held. */ | ||
178 | void *unwind_add_table(struct module *module, | ||
179 | const void *table_start, | ||
180 | unsigned long table_size) | ||
181 | { | ||
182 | struct unwind_table *table; | ||
183 | |||
184 | if (table_size <= 0) | ||
185 | return NULL; | ||
186 | |||
187 | table = kmalloc(sizeof(*table), GFP_KERNEL); | ||
188 | if (!table) | ||
189 | return NULL; | ||
190 | |||
191 | init_unwind_table(table, module->name, | ||
192 | module->module_core, module->core_size, | ||
193 | module->module_init, module->init_size, | ||
194 | table_start, table_size); | ||
195 | |||
196 | if (last_table) | ||
197 | last_table->link = table; | ||
198 | else | ||
199 | root_table.link = table; | ||
200 | last_table = table; | ||
201 | |||
202 | return table; | ||
203 | } | ||
204 | |||
205 | struct unlink_table_info | ||
206 | { | ||
207 | struct unwind_table *table; | ||
208 | int init_only; | ||
209 | }; | ||
210 | |||
211 | static int unlink_table(void *arg) | ||
212 | { | ||
213 | struct unlink_table_info *info = arg; | ||
214 | struct unwind_table *table = info->table, *prev; | ||
215 | |||
216 | for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) | ||
217 | ; | ||
218 | |||
219 | if (prev->link) { | ||
220 | if (info->init_only) { | ||
221 | table->init.pc = 0; | ||
222 | table->init.range = 0; | ||
223 | info->table = NULL; | ||
224 | } else { | ||
225 | prev->link = table->link; | ||
226 | if (!prev->link) | ||
227 | last_table = prev; | ||
228 | } | ||
229 | } else | ||
230 | info->table = NULL; | ||
231 | |||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | /* Must be called with module_mutex held. */ | ||
236 | void unwind_remove_table(void *handle, int init_only) | ||
237 | { | ||
238 | struct unwind_table *table = handle; | ||
239 | struct unlink_table_info info; | ||
240 | |||
241 | if (!table || table == &root_table) | ||
242 | return; | ||
243 | |||
244 | if (init_only && table == last_table) { | ||
245 | table->init.pc = 0; | ||
246 | table->init.range = 0; | ||
247 | return; | ||
248 | } | ||
249 | |||
250 | info.table = table; | ||
251 | info.init_only = init_only; | ||
252 | stop_machine_run(unlink_table, &info, NR_CPUS); | ||
253 | |||
254 | if (info.table) | ||
255 | kfree(table); | ||
256 | } | ||
257 | |||
258 | #endif /* CONFIG_MODULES */ | ||
259 | |||
260 | static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) | ||
261 | { | ||
262 | const u8 *cur = *pcur; | ||
263 | uleb128_t value; | ||
264 | unsigned shift; | ||
265 | |||
266 | for (shift = 0, value = 0; cur < end; shift += 7) { | ||
267 | if (shift + 7 > 8 * sizeof(value) | ||
268 | && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { | ||
269 | cur = end + 1; | ||
270 | break; | ||
271 | } | ||
272 | value |= (uleb128_t)(*cur & 0x7f) << shift; | ||
273 | if (!(*cur++ & 0x80)) | ||
274 | break; | ||
275 | } | ||
276 | *pcur = cur; | ||
277 | |||
278 | return value; | ||
279 | } | ||
280 | |||
281 | static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) | ||
282 | { | ||
283 | const u8 *cur = *pcur; | ||
284 | sleb128_t value; | ||
285 | unsigned shift; | ||
286 | |||
287 | for (shift = 0, value = 0; cur < end; shift += 7) { | ||
288 | if (shift + 7 > 8 * sizeof(value) | ||
289 | && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { | ||
290 | cur = end + 1; | ||
291 | break; | ||
292 | } | ||
293 | value |= (sleb128_t)(*cur & 0x7f) << shift; | ||
294 | if (!(*cur & 0x80)) { | ||
295 | value |= -(*cur++ & 0x40) << shift; | ||
296 | break; | ||
297 | } | ||
298 | } | ||
299 | *pcur = cur; | ||
300 | |||
301 | return value; | ||
302 | } | ||
303 | |||
304 | static unsigned long read_pointer(const u8 **pLoc, | ||
305 | const void *end, | ||
306 | signed ptrType) | ||
307 | { | ||
308 | unsigned long value = 0; | ||
309 | union { | ||
310 | const u8 *p8; | ||
311 | const u16 *p16u; | ||
312 | const s16 *p16s; | ||
313 | const u32 *p32u; | ||
314 | const s32 *p32s; | ||
315 | const unsigned long *pul; | ||
316 | } ptr; | ||
317 | |||
318 | if (ptrType < 0 || ptrType == DW_EH_PE_omit) | ||
319 | return 0; | ||
320 | ptr.p8 = *pLoc; | ||
321 | switch(ptrType & DW_EH_PE_FORM) { | ||
322 | case DW_EH_PE_data2: | ||
323 | if (end < (const void *)(ptr.p16u + 1)) | ||
324 | return 0; | ||
325 | if(ptrType & DW_EH_PE_signed) | ||
326 | value = get_unaligned(ptr.p16s++); | ||
327 | else | ||
328 | value = get_unaligned(ptr.p16u++); | ||
329 | break; | ||
330 | case DW_EH_PE_data4: | ||
331 | #ifdef CONFIG_64BIT | ||
332 | if (end < (const void *)(ptr.p32u + 1)) | ||
333 | return 0; | ||
334 | if(ptrType & DW_EH_PE_signed) | ||
335 | value = get_unaligned(ptr.p32s++); | ||
336 | else | ||
337 | value = get_unaligned(ptr.p32u++); | ||
338 | break; | ||
339 | case DW_EH_PE_data8: | ||
340 | BUILD_BUG_ON(sizeof(u64) != sizeof(value)); | ||
341 | #else | ||
342 | BUILD_BUG_ON(sizeof(u32) != sizeof(value)); | ||
343 | #endif | ||
344 | case DW_EH_PE_native: | ||
345 | if (end < (const void *)(ptr.pul + 1)) | ||
346 | return 0; | ||
347 | value = get_unaligned(ptr.pul++); | ||
348 | break; | ||
349 | case DW_EH_PE_leb128: | ||
350 | BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); | ||
351 | value = ptrType & DW_EH_PE_signed | ||
352 | ? get_sleb128(&ptr.p8, end) | ||
353 | : get_uleb128(&ptr.p8, end); | ||
354 | if ((const void *)ptr.p8 > end) | ||
355 | return 0; | ||
356 | break; | ||
357 | default: | ||
358 | return 0; | ||
359 | } | ||
360 | switch(ptrType & DW_EH_PE_ADJUST) { | ||
361 | case DW_EH_PE_abs: | ||
362 | break; | ||
363 | case DW_EH_PE_pcrel: | ||
364 | value += (unsigned long)*pLoc; | ||
365 | break; | ||
366 | default: | ||
367 | return 0; | ||
368 | } | ||
369 | if ((ptrType & DW_EH_PE_indirect) | ||
370 | && __get_user(value, (unsigned long *)value)) | ||
371 | return 0; | ||
372 | *pLoc = ptr.p8; | ||
373 | |||
374 | return value; | ||
375 | } | ||
376 | |||
377 | static signed fde_pointer_type(const u32 *cie) | ||
378 | { | ||
379 | const u8 *ptr = (const u8 *)(cie + 2); | ||
380 | unsigned version = *ptr; | ||
381 | |||
382 | if (version != 1) | ||
383 | return -1; /* unsupported */ | ||
384 | if (*++ptr) { | ||
385 | const char *aug; | ||
386 | const u8 *end = (const u8 *)(cie + 1) + *cie; | ||
387 | uleb128_t len; | ||
388 | |||
389 | /* check if augmentation size is first (and thus present) */ | ||
390 | if (*ptr != 'z') | ||
391 | return -1; | ||
392 | /* check if augmentation string is nul-terminated */ | ||
393 | if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) | ||
394 | return -1; | ||
395 | ++ptr; /* skip terminator */ | ||
396 | get_uleb128(&ptr, end); /* skip code alignment */ | ||
397 | get_sleb128(&ptr, end); /* skip data alignment */ | ||
398 | /* skip return address column */ | ||
399 | version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); | ||
400 | len = get_uleb128(&ptr, end); /* augmentation length */ | ||
401 | if (ptr + len < ptr || ptr + len > end) | ||
402 | return -1; | ||
403 | end = ptr + len; | ||
404 | while (*++aug) { | ||
405 | if (ptr >= end) | ||
406 | return -1; | ||
407 | switch(*aug) { | ||
408 | case 'L': | ||
409 | ++ptr; | ||
410 | break; | ||
411 | case 'P': { | ||
412 | signed ptrType = *ptr++; | ||
413 | |||
414 | if (!read_pointer(&ptr, end, ptrType) || ptr > end) | ||
415 | return -1; | ||
416 | } | ||
417 | break; | ||
418 | case 'R': | ||
419 | return *ptr; | ||
420 | default: | ||
421 | return -1; | ||
422 | } | ||
423 | } | ||
424 | } | ||
425 | return DW_EH_PE_native|DW_EH_PE_abs; | ||
426 | } | ||
427 | |||
428 | static int advance_loc(unsigned long delta, struct unwind_state *state) | ||
429 | { | ||
430 | state->loc += delta * state->codeAlign; | ||
431 | |||
432 | return delta > 0; | ||
433 | } | ||
434 | |||
435 | static void set_rule(uleb128_t reg, | ||
436 | enum item_location where, | ||
437 | uleb128_t value, | ||
438 | struct unwind_state *state) | ||
439 | { | ||
440 | if (reg < ARRAY_SIZE(state->regs)) { | ||
441 | state->regs[reg].where = where; | ||
442 | state->regs[reg].value = value; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static int processCFI(const u8 *start, | ||
447 | const u8 *end, | ||
448 | unsigned long targetLoc, | ||
449 | signed ptrType, | ||
450 | struct unwind_state *state) | ||
451 | { | ||
452 | union { | ||
453 | const u8 *p8; | ||
454 | const u16 *p16; | ||
455 | const u32 *p32; | ||
456 | } ptr; | ||
457 | int result = 1; | ||
458 | |||
459 | if (start != state->cieStart) { | ||
460 | state->loc = state->org; | ||
461 | result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); | ||
462 | if (targetLoc == 0 && state->label == NULL) | ||
463 | return result; | ||
464 | } | ||
465 | for (ptr.p8 = start; result && ptr.p8 < end; ) { | ||
466 | switch(*ptr.p8 >> 6) { | ||
467 | uleb128_t value; | ||
468 | |||
469 | case 0: | ||
470 | switch(*ptr.p8++) { | ||
471 | case DW_CFA_nop: | ||
472 | break; | ||
473 | case DW_CFA_set_loc: | ||
474 | if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0) | ||
475 | result = 0; | ||
476 | break; | ||
477 | case DW_CFA_advance_loc1: | ||
478 | result = ptr.p8 < end && advance_loc(*ptr.p8++, state); | ||
479 | break; | ||
480 | case DW_CFA_advance_loc2: | ||
481 | result = ptr.p8 <= end + 2 | ||
482 | && advance_loc(*ptr.p16++, state); | ||
483 | break; | ||
484 | case DW_CFA_advance_loc4: | ||
485 | result = ptr.p8 <= end + 4 | ||
486 | && advance_loc(*ptr.p32++, state); | ||
487 | break; | ||
488 | case DW_CFA_offset_extended: | ||
489 | value = get_uleb128(&ptr.p8, end); | ||
490 | set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); | ||
491 | break; | ||
492 | case DW_CFA_val_offset: | ||
493 | value = get_uleb128(&ptr.p8, end); | ||
494 | set_rule(value, Value, get_uleb128(&ptr.p8, end), state); | ||
495 | break; | ||
496 | case DW_CFA_offset_extended_sf: | ||
497 | value = get_uleb128(&ptr.p8, end); | ||
498 | set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); | ||
499 | break; | ||
500 | case DW_CFA_val_offset_sf: | ||
501 | value = get_uleb128(&ptr.p8, end); | ||
502 | set_rule(value, Value, get_sleb128(&ptr.p8, end), state); | ||
503 | break; | ||
504 | case DW_CFA_restore_extended: | ||
505 | case DW_CFA_undefined: | ||
506 | case DW_CFA_same_value: | ||
507 | set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); | ||
508 | break; | ||
509 | case DW_CFA_register: | ||
510 | value = get_uleb128(&ptr.p8, end); | ||
511 | set_rule(value, | ||
512 | Register, | ||
513 | get_uleb128(&ptr.p8, end), state); | ||
514 | break; | ||
515 | case DW_CFA_remember_state: | ||
516 | if (ptr.p8 == state->label) { | ||
517 | state->label = NULL; | ||
518 | return 1; | ||
519 | } | ||
520 | if (state->stackDepth >= MAX_STACK_DEPTH) | ||
521 | return 0; | ||
522 | state->stack[state->stackDepth++] = ptr.p8; | ||
523 | break; | ||
524 | case DW_CFA_restore_state: | ||
525 | if (state->stackDepth) { | ||
526 | const uleb128_t loc = state->loc; | ||
527 | const u8 *label = state->label; | ||
528 | |||
529 | state->label = state->stack[state->stackDepth - 1]; | ||
530 | memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); | ||
531 | memset(state->regs, 0, sizeof(state->regs)); | ||
532 | state->stackDepth = 0; | ||
533 | result = processCFI(start, end, 0, ptrType, state); | ||
534 | state->loc = loc; | ||
535 | state->label = label; | ||
536 | } else | ||
537 | return 0; | ||
538 | break; | ||
539 | case DW_CFA_def_cfa: | ||
540 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
541 | /*nobreak*/ | ||
542 | case DW_CFA_def_cfa_offset: | ||
543 | state->cfa.offs = get_uleb128(&ptr.p8, end); | ||
544 | break; | ||
545 | case DW_CFA_def_cfa_sf: | ||
546 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
547 | /*nobreak*/ | ||
548 | case DW_CFA_def_cfa_offset_sf: | ||
549 | state->cfa.offs = get_sleb128(&ptr.p8, end) | ||
550 | * state->dataAlign; | ||
551 | break; | ||
552 | case DW_CFA_def_cfa_register: | ||
553 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
554 | break; | ||
555 | /*todo case DW_CFA_def_cfa_expression: */ | ||
556 | /*todo case DW_CFA_expression: */ | ||
557 | /*todo case DW_CFA_val_expression: */ | ||
558 | case DW_CFA_GNU_args_size: | ||
559 | get_uleb128(&ptr.p8, end); | ||
560 | break; | ||
561 | case DW_CFA_GNU_negative_offset_extended: | ||
562 | value = get_uleb128(&ptr.p8, end); | ||
563 | set_rule(value, | ||
564 | Memory, | ||
565 | (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); | ||
566 | break; | ||
567 | case DW_CFA_GNU_window_save: | ||
568 | default: | ||
569 | result = 0; | ||
570 | break; | ||
571 | } | ||
572 | break; | ||
573 | case 1: | ||
574 | result = advance_loc(*ptr.p8++ & 0x3f, state); | ||
575 | break; | ||
576 | case 2: | ||
577 | value = *ptr.p8++ & 0x3f; | ||
578 | set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); | ||
579 | break; | ||
580 | case 3: | ||
581 | set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); | ||
582 | break; | ||
583 | } | ||
584 | if (ptr.p8 > end) | ||
585 | result = 0; | ||
586 | if (result && targetLoc != 0 && targetLoc < state->loc) | ||
587 | return 1; | ||
588 | } | ||
589 | |||
590 | return result | ||
591 | && ptr.p8 == end | ||
592 | && (targetLoc == 0 | ||
593 | || (/*todo While in theory this should apply, gcc in practice omits | ||
594 | everything past the function prolog, and hence the location | ||
595 | never reaches the end of the function. | ||
596 | targetLoc < state->loc &&*/ state->label == NULL)); | ||
597 | } | ||
598 | |||
599 | /* Unwind to previous to frame. Returns 0 if successful, negative | ||
600 | * number in case of an error. */ | ||
601 | int unwind(struct unwind_frame_info *frame) | ||
602 | { | ||
603 | #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) | ||
604 | const u32 *fde = NULL, *cie = NULL; | ||
605 | const u8 *ptr = NULL, *end = NULL; | ||
606 | unsigned long startLoc = 0, endLoc = 0, cfa; | ||
607 | unsigned i; | ||
608 | signed ptrType = -1; | ||
609 | uleb128_t retAddrReg = 0; | ||
610 | struct unwind_table *table; | ||
611 | struct unwind_state state; | ||
612 | |||
613 | if (UNW_PC(frame) == 0) | ||
614 | return -EINVAL; | ||
615 | if ((table = find_table(UNW_PC(frame))) != NULL | ||
616 | && !(table->size & (sizeof(*fde) - 1))) { | ||
617 | unsigned long tableSize = table->size; | ||
618 | |||
619 | for (fde = table->address; | ||
620 | tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; | ||
621 | tableSize -= sizeof(*fde) + *fde, | ||
622 | fde += 1 + *fde / sizeof(*fde)) { | ||
623 | if (!*fde || (*fde & (sizeof(*fde) - 1))) | ||
624 | break; | ||
625 | if (!fde[1]) | ||
626 | continue; /* this is a CIE */ | ||
627 | if ((fde[1] & (sizeof(*fde) - 1)) | ||
628 | || fde[1] > (unsigned long)(fde + 1) | ||
629 | - (unsigned long)table->address) | ||
630 | continue; /* this is not a valid FDE */ | ||
631 | cie = fde + 1 - fde[1] / sizeof(*fde); | ||
632 | if (*cie <= sizeof(*cie) + 4 | ||
633 | || *cie >= fde[1] - sizeof(*fde) | ||
634 | || (*cie & (sizeof(*cie) - 1)) | ||
635 | || cie[1] | ||
636 | || (ptrType = fde_pointer_type(cie)) < 0) { | ||
637 | cie = NULL; /* this is not a (valid) CIE */ | ||
638 | continue; | ||
639 | } | ||
640 | ptr = (const u8 *)(fde + 2); | ||
641 | startLoc = read_pointer(&ptr, | ||
642 | (const u8 *)(fde + 1) + *fde, | ||
643 | ptrType); | ||
644 | endLoc = startLoc | ||
645 | + read_pointer(&ptr, | ||
646 | (const u8 *)(fde + 1) + *fde, | ||
647 | ptrType & DW_EH_PE_indirect | ||
648 | ? ptrType | ||
649 | : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed)); | ||
650 | if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc) | ||
651 | break; | ||
652 | cie = NULL; | ||
653 | } | ||
654 | } | ||
655 | if (cie != NULL) { | ||
656 | memset(&state, 0, sizeof(state)); | ||
657 | state.cieEnd = ptr; /* keep here temporarily */ | ||
658 | ptr = (const u8 *)(cie + 2); | ||
659 | end = (const u8 *)(cie + 1) + *cie; | ||
660 | if ((state.version = *ptr) != 1) | ||
661 | cie = NULL; /* unsupported version */ | ||
662 | else if (*++ptr) { | ||
663 | /* check if augmentation size is first (and thus present) */ | ||
664 | if (*ptr == 'z') { | ||
665 | /* check for ignorable (or already handled) | ||
666 | * nul-terminated augmentation string */ | ||
667 | while (++ptr < end && *ptr) | ||
668 | if (strchr("LPR", *ptr) == NULL) | ||
669 | break; | ||
670 | } | ||
671 | if (ptr >= end || *ptr) | ||
672 | cie = NULL; | ||
673 | } | ||
674 | ++ptr; | ||
675 | } | ||
676 | if (cie != NULL) { | ||
677 | /* get code aligment factor */ | ||
678 | state.codeAlign = get_uleb128(&ptr, end); | ||
679 | /* get data aligment factor */ | ||
680 | state.dataAlign = get_sleb128(&ptr, end); | ||
681 | if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) | ||
682 | cie = NULL; | ||
683 | else { | ||
684 | retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); | ||
685 | /* skip augmentation */ | ||
686 | if (((const char *)(cie + 2))[1] == 'z') | ||
687 | ptr += get_uleb128(&ptr, end); | ||
688 | if (ptr > end | ||
689 | || retAddrReg >= ARRAY_SIZE(reg_info) | ||
690 | || REG_INVALID(retAddrReg) | ||
691 | || reg_info[retAddrReg].width != sizeof(unsigned long)) | ||
692 | cie = NULL; | ||
693 | } | ||
694 | } | ||
695 | if (cie != NULL) { | ||
696 | state.cieStart = ptr; | ||
697 | ptr = state.cieEnd; | ||
698 | state.cieEnd = end; | ||
699 | end = (const u8 *)(fde + 1) + *fde; | ||
700 | /* skip augmentation */ | ||
701 | if (((const char *)(cie + 2))[1] == 'z') { | ||
702 | uleb128_t augSize = get_uleb128(&ptr, end); | ||
703 | |||
704 | if ((ptr += augSize) > end) | ||
705 | fde = NULL; | ||
706 | } | ||
707 | } | ||
708 | if (cie == NULL || fde == NULL) { | ||
709 | #ifdef CONFIG_FRAME_POINTER | ||
710 | unsigned long top, bottom; | ||
711 | #endif | ||
712 | |||
713 | #ifdef CONFIG_FRAME_POINTER | ||
714 | top = STACK_TOP(frame->task); | ||
715 | bottom = STACK_BOTTOM(frame->task); | ||
716 | # if FRAME_RETADDR_OFFSET < 0 | ||
717 | if (UNW_SP(frame) < top | ||
718 | && UNW_FP(frame) <= UNW_SP(frame) | ||
719 | && bottom < UNW_FP(frame) | ||
720 | # else | ||
721 | if (UNW_SP(frame) > top | ||
722 | && UNW_FP(frame) >= UNW_SP(frame) | ||
723 | && bottom > UNW_FP(frame) | ||
724 | # endif | ||
725 | && !((UNW_SP(frame) | UNW_FP(frame)) | ||
726 | & (sizeof(unsigned long) - 1))) { | ||
727 | unsigned long link; | ||
728 | |||
729 | if (!__get_user(link, | ||
730 | (unsigned long *)(UNW_FP(frame) | ||
731 | + FRAME_LINK_OFFSET)) | ||
732 | # if FRAME_RETADDR_OFFSET < 0 | ||
733 | && link > bottom && link < UNW_FP(frame) | ||
734 | # else | ||
735 | && link > UNW_FP(frame) && link < bottom | ||
736 | # endif | ||
737 | && !(link & (sizeof(link) - 1)) | ||
738 | && !__get_user(UNW_PC(frame), | ||
739 | (unsigned long *)(UNW_FP(frame) | ||
740 | + FRAME_RETADDR_OFFSET))) { | ||
741 | UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET | ||
742 | # if FRAME_RETADDR_OFFSET < 0 | ||
743 | - | ||
744 | # else | ||
745 | + | ||
746 | # endif | ||
747 | sizeof(UNW_PC(frame)); | ||
748 | UNW_FP(frame) = link; | ||
749 | return 0; | ||
750 | } | ||
751 | } | ||
752 | #endif | ||
753 | return -ENXIO; | ||
754 | } | ||
755 | state.org = startLoc; | ||
756 | memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); | ||
757 | /* process instructions */ | ||
758 | if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state) | ||
759 | || state.loc > endLoc | ||
760 | || state.regs[retAddrReg].where == Nowhere | ||
761 | || state.cfa.reg >= ARRAY_SIZE(reg_info) | ||
762 | || reg_info[state.cfa.reg].width != sizeof(unsigned long) | ||
763 | || state.cfa.offs % sizeof(unsigned long)) | ||
764 | return -EIO; | ||
765 | /* update frame */ | ||
766 | cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; | ||
767 | startLoc = min((unsigned long)UNW_SP(frame), cfa); | ||
768 | endLoc = max((unsigned long)UNW_SP(frame), cfa); | ||
769 | if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { | ||
770 | startLoc = min(STACK_LIMIT(cfa), cfa); | ||
771 | endLoc = max(STACK_LIMIT(cfa), cfa); | ||
772 | } | ||
773 | #ifndef CONFIG_64BIT | ||
774 | # define CASES CASE(8); CASE(16); CASE(32) | ||
775 | #else | ||
776 | # define CASES CASE(8); CASE(16); CASE(32); CASE(64) | ||
777 | #endif | ||
778 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { | ||
779 | if (REG_INVALID(i)) { | ||
780 | if (state.regs[i].where == Nowhere) | ||
781 | continue; | ||
782 | return -EIO; | ||
783 | } | ||
784 | switch(state.regs[i].where) { | ||
785 | default: | ||
786 | break; | ||
787 | case Register: | ||
788 | if (state.regs[i].value >= ARRAY_SIZE(reg_info) | ||
789 | || REG_INVALID(state.regs[i].value) | ||
790 | || reg_info[i].width > reg_info[state.regs[i].value].width) | ||
791 | return -EIO; | ||
792 | switch(reg_info[state.regs[i].value].width) { | ||
793 | #define CASE(n) \ | ||
794 | case sizeof(u##n): \ | ||
795 | state.regs[i].value = FRAME_REG(state.regs[i].value, \ | ||
796 | const u##n); \ | ||
797 | break | ||
798 | CASES; | ||
799 | #undef CASE | ||
800 | default: | ||
801 | return -EIO; | ||
802 | } | ||
803 | break; | ||
804 | } | ||
805 | } | ||
806 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { | ||
807 | if (REG_INVALID(i)) | ||
808 | continue; | ||
809 | switch(state.regs[i].where) { | ||
810 | case Nowhere: | ||
811 | if (reg_info[i].width != sizeof(UNW_SP(frame)) | ||
812 | || &FRAME_REG(i, __typeof__(UNW_SP(frame))) | ||
813 | != &UNW_SP(frame)) | ||
814 | continue; | ||
815 | UNW_SP(frame) = cfa; | ||
816 | break; | ||
817 | case Register: | ||
818 | switch(reg_info[i].width) { | ||
819 | #define CASE(n) case sizeof(u##n): \ | ||
820 | FRAME_REG(i, u##n) = state.regs[i].value; \ | ||
821 | break | ||
822 | CASES; | ||
823 | #undef CASE | ||
824 | default: | ||
825 | return -EIO; | ||
826 | } | ||
827 | break; | ||
828 | case Value: | ||
829 | if (reg_info[i].width != sizeof(unsigned long)) | ||
830 | return -EIO; | ||
831 | FRAME_REG(i, unsigned long) = cfa + state.regs[i].value | ||
832 | * state.dataAlign; | ||
833 | break; | ||
834 | case Memory: { | ||
835 | unsigned long addr = cfa + state.regs[i].value | ||
836 | * state.dataAlign; | ||
837 | |||
838 | if ((state.regs[i].value * state.dataAlign) | ||
839 | % sizeof(unsigned long) | ||
840 | || addr < startLoc | ||
841 | || addr + sizeof(unsigned long) < addr | ||
842 | || addr + sizeof(unsigned long) > endLoc) | ||
843 | return -EIO; | ||
844 | switch(reg_info[i].width) { | ||
845 | #define CASE(n) case sizeof(u##n): \ | ||
846 | __get_user(FRAME_REG(i, u##n), (u##n *)addr); \ | ||
847 | break | ||
848 | CASES; | ||
849 | #undef CASE | ||
850 | default: | ||
851 | return -EIO; | ||
852 | } | ||
853 | } | ||
854 | break; | ||
855 | } | ||
856 | } | ||
857 | |||
858 | return 0; | ||
859 | #undef CASES | ||
860 | #undef FRAME_REG | ||
861 | } | ||
862 | EXPORT_SYMBOL(unwind); | ||
863 | |||
864 | int unwind_init_frame_info(struct unwind_frame_info *info, | ||
865 | struct task_struct *tsk, | ||
866 | /*const*/ struct pt_regs *regs) | ||
867 | { | ||
868 | info->task = tsk; | ||
869 | arch_unw_init_frame_info(info, regs); | ||
870 | |||
871 | return 0; | ||
872 | } | ||
873 | EXPORT_SYMBOL(unwind_init_frame_info); | ||
874 | |||
875 | /* | ||
876 | * Prepare to unwind a blocked task. | ||
877 | */ | ||
878 | int unwind_init_blocked(struct unwind_frame_info *info, | ||
879 | struct task_struct *tsk) | ||
880 | { | ||
881 | info->task = tsk; | ||
882 | arch_unw_init_blocked(info); | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | EXPORT_SYMBOL(unwind_init_blocked); | ||
887 | |||
888 | /* | ||
889 | * Prepare to unwind the currently running thread. | ||
890 | */ | ||
891 | int unwind_init_running(struct unwind_frame_info *info, | ||
892 | asmlinkage int (*callback)(struct unwind_frame_info *, | ||
893 | void *arg), | ||
894 | void *arg) | ||
895 | { | ||
896 | info->task = current; | ||
897 | |||
898 | return arch_unwind_init_running(info, callback, arg); | ||
899 | } | ||
900 | EXPORT_SYMBOL(unwind_init_running); | ||
901 | |||
902 | /* | ||
903 | * Unwind until the return pointer is in user-land (or until an error | ||
904 | * occurs). Returns 0 if successful, negative number in case of | ||
905 | * error. | ||
906 | */ | ||
907 | int unwind_to_user(struct unwind_frame_info *info) | ||
908 | { | ||
909 | while (!arch_unw_user_mode(info)) { | ||
910 | int err = unwind(info); | ||
911 | |||
912 | if (err < 0) | ||
913 | return err; | ||
914 | } | ||
915 | |||
916 | return 0; | ||
917 | } | ||
918 | EXPORT_SYMBOL(unwind_to_user); | ||