aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c115
-rw-r--r--kernel/compat.c7
-rw-r--r--kernel/cpu.c10
-rw-r--r--kernel/cpuset.c26
-rw-r--r--kernel/exit.c16
-rw-r--r--kernel/fork.c16
-rw-r--r--kernel/hrtimer.c15
-rw-r--r--kernel/kprobes.c58
-rw-r--r--kernel/kthread.c61
-rw-r--r--kernel/module.c29
-rw-r--r--kernel/mutex-debug.c12
-rw-r--r--kernel/mutex-debug.h25
-rw-r--r--kernel/mutex.c21
-rw-r--r--kernel/mutex.h6
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/disk.c2
-rw-r--r--kernel/power/main.c4
-rw-r--r--kernel/power/power.h4
-rw-r--r--kernel/power/snapshot.c112
-rw-r--r--kernel/power/swsusp.c18
-rw-r--r--kernel/printk.c52
-rw-r--r--kernel/ptrace.c23
-rw-r--r--kernel/sched.c17
-rw-r--r--kernel/signal.c35
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/stop_machine.c17
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/sysctl.c22
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/clocksource.c349
-rw-r--r--kernel/time/jiffies.c73
-rw-r--r--kernel/timer.c398
-rw-r--r--kernel/unwind.c918
-rw-r--r--kernel/workqueue.c30
37 files changed, 2135 insertions, 379 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f6ef00f4f9..752bd7d383 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o 11 hrtimer.o
12 12
13obj-y += time/
13obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 14obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
14obj-$(CONFIG_FUTEX) += futex.o 15obj-$(CONFIG_FUTEX) += futex.o
15ifeq ($(CONFIG_COMPAT),y) 16ifeq ($(CONFIG_COMPAT),y)
@@ -21,6 +22,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
21obj-$(CONFIG_UID16) += uid16.o 22obj-$(CONFIG_UID16) += uid16.o
22obj-$(CONFIG_MODULES) += module.o 23obj-$(CONFIG_MODULES) += module.o
23obj-$(CONFIG_KALLSYMS) += kallsyms.o 24obj-$(CONFIG_KALLSYMS) += kallsyms.o
25obj-$(CONFIG_STACK_UNWIND) += unwind.o
24obj-$(CONFIG_PM) += power/ 26obj-$(CONFIG_PM) += power/
25obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 27obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
26obj-$(CONFIG_KEXEC) += kexec.o 28obj-$(CONFIG_KEXEC) += kexec.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 6802020e0c..368c4f03fe 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,7 +75,7 @@ int acct_parm[3] = {4, 2, 30};
75/* 75/*
76 * External references and all of the globals. 76 * External references and all of the globals.
77 */ 77 */
78static void do_acct_process(long, struct file *); 78static void do_acct_process(struct file *);
79 79
80/* 80/*
81 * This structure is used so that all the data protected by lock 81 * This structure is used so that all the data protected by lock
@@ -196,7 +196,7 @@ static void acct_file_reopen(struct file *file)
196 if (old_acct) { 196 if (old_acct) {
197 mnt_unpin(old_acct->f_vfsmnt); 197 mnt_unpin(old_acct->f_vfsmnt);
198 spin_unlock(&acct_globals.lock); 198 spin_unlock(&acct_globals.lock);
199 do_acct_process(0, old_acct); 199 do_acct_process(old_acct);
200 filp_close(old_acct, NULL); 200 filp_close(old_acct, NULL);
201 spin_lock(&acct_globals.lock); 201 spin_lock(&acct_globals.lock);
202 } 202 }
@@ -419,16 +419,15 @@ static u32 encode_float(u64 value)
419/* 419/*
420 * do_acct_process does all actual work. Caller holds the reference to file. 420 * do_acct_process does all actual work. Caller holds the reference to file.
421 */ 421 */
422static void do_acct_process(long exitcode, struct file *file) 422static void do_acct_process(struct file *file)
423{ 423{
424 struct pacct_struct *pacct = &current->signal->pacct;
424 acct_t ac; 425 acct_t ac;
425 mm_segment_t fs; 426 mm_segment_t fs;
426 unsigned long vsize;
427 unsigned long flim; 427 unsigned long flim;
428 u64 elapsed; 428 u64 elapsed;
429 u64 run_time; 429 u64 run_time;
430 struct timespec uptime; 430 struct timespec uptime;
431 unsigned long jiffies;
432 431
433 /* 432 /*
434 * First check to see if there is enough free_space to continue 433 * First check to see if there is enough free_space to continue
@@ -469,12 +468,6 @@ static void do_acct_process(long exitcode, struct file *file)
469#endif 468#endif
470 do_div(elapsed, AHZ); 469 do_div(elapsed, AHZ);
471 ac.ac_btime = xtime.tv_sec - elapsed; 470 ac.ac_btime = xtime.tv_sec - elapsed;
472 jiffies = cputime_to_jiffies(cputime_add(current->utime,
473 current->signal->utime));
474 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
475 jiffies = cputime_to_jiffies(cputime_add(current->stime,
476 current->signal->stime));
477 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
478 /* we really need to bite the bullet and change layout */ 471 /* we really need to bite the bullet and change layout */
479 ac.ac_uid = current->uid; 472 ac.ac_uid = current->uid;
480 ac.ac_gid = current->gid; 473 ac.ac_gid = current->gid;
@@ -496,37 +489,18 @@ static void do_acct_process(long exitcode, struct file *file)
496 old_encode_dev(tty_devnum(current->signal->tty)) : 0; 489 old_encode_dev(tty_devnum(current->signal->tty)) : 0;
497 read_unlock(&tasklist_lock); 490 read_unlock(&tasklist_lock);
498 491
499 ac.ac_flag = 0; 492 spin_lock(&current->sighand->siglock);
500 if (current->flags & PF_FORKNOEXEC) 493 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
501 ac.ac_flag |= AFORK; 494 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
502 if (current->flags & PF_SUPERPRIV) 495 ac.ac_flag = pacct->ac_flag;
503 ac.ac_flag |= ASU; 496 ac.ac_mem = encode_comp_t(pacct->ac_mem);
504 if (current->flags & PF_DUMPCORE) 497 ac.ac_minflt = encode_comp_t(pacct->ac_minflt);
505 ac.ac_flag |= ACORE; 498 ac.ac_majflt = encode_comp_t(pacct->ac_majflt);
506 if (current->flags & PF_SIGNALED) 499 ac.ac_exitcode = pacct->ac_exitcode;
507 ac.ac_flag |= AXSIG; 500 spin_unlock(&current->sighand->siglock);
508
509 vsize = 0;
510 if (current->mm) {
511 struct vm_area_struct *vma;
512 down_read(&current->mm->mmap_sem);
513 vma = current->mm->mmap;
514 while (vma) {
515 vsize += vma->vm_end - vma->vm_start;
516 vma = vma->vm_next;
517 }
518 up_read(&current->mm->mmap_sem);
519 }
520 vsize = vsize / 1024;
521 ac.ac_mem = encode_comp_t(vsize);
522 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ 501 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
523 ac.ac_rw = encode_comp_t(ac.ac_io / 1024); 502 ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
524 ac.ac_minflt = encode_comp_t(current->signal->min_flt +
525 current->min_flt);
526 ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
527 current->maj_flt);
528 ac.ac_swaps = encode_comp_t(0); 503 ac.ac_swaps = encode_comp_t(0);
529 ac.ac_exitcode = exitcode;
530 504
531 /* 505 /*
532 * Kernel segment override to datasegment and write it 506 * Kernel segment override to datasegment and write it
@@ -546,12 +520,63 @@ static void do_acct_process(long exitcode, struct file *file)
546} 520}
547 521
548/** 522/**
523 * acct_init_pacct - initialize a new pacct_struct
524 */
525void acct_init_pacct(struct pacct_struct *pacct)
526{
527 memset(pacct, 0, sizeof(struct pacct_struct));
528 pacct->ac_utime = pacct->ac_stime = cputime_zero;
529}
530
531/**
532 * acct_collect - collect accounting information into pacct_struct
533 * @exitcode: task exit code
534 * @group_dead: not 0, if this thread is the last one in the process.
535 */
536void acct_collect(long exitcode, int group_dead)
537{
538 struct pacct_struct *pacct = &current->signal->pacct;
539 unsigned long vsize = 0;
540
541 if (group_dead && current->mm) {
542 struct vm_area_struct *vma;
543 down_read(&current->mm->mmap_sem);
544 vma = current->mm->mmap;
545 while (vma) {
546 vsize += vma->vm_end - vma->vm_start;
547 vma = vma->vm_next;
548 }
549 up_read(&current->mm->mmap_sem);
550 }
551
552 spin_lock_irq(&current->sighand->siglock);
553 if (group_dead)
554 pacct->ac_mem = vsize / 1024;
555 if (thread_group_leader(current)) {
556 pacct->ac_exitcode = exitcode;
557 if (current->flags & PF_FORKNOEXEC)
558 pacct->ac_flag |= AFORK;
559 }
560 if (current->flags & PF_SUPERPRIV)
561 pacct->ac_flag |= ASU;
562 if (current->flags & PF_DUMPCORE)
563 pacct->ac_flag |= ACORE;
564 if (current->flags & PF_SIGNALED)
565 pacct->ac_flag |= AXSIG;
566 pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
567 pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
568 pacct->ac_minflt += current->min_flt;
569 pacct->ac_majflt += current->maj_flt;
570 spin_unlock_irq(&current->sighand->siglock);
571}
572
573/**
549 * acct_process - now just a wrapper around do_acct_process 574 * acct_process - now just a wrapper around do_acct_process
550 * @exitcode: task exit code 575 * @exitcode: task exit code
551 * 576 *
552 * handles process accounting for an exiting task 577 * handles process accounting for an exiting task
553 */ 578 */
554void acct_process(long exitcode) 579void acct_process()
555{ 580{
556 struct file *file = NULL; 581 struct file *file = NULL;
557 582
@@ -570,7 +595,7 @@ void acct_process(long exitcode)
570 get_file(file); 595 get_file(file);
571 spin_unlock(&acct_globals.lock); 596 spin_unlock(&acct_globals.lock);
572 597
573 do_acct_process(exitcode, file); 598 do_acct_process(file);
574 fput(file); 599 fput(file);
575} 600}
576 601
@@ -599,9 +624,7 @@ void acct_update_integrals(struct task_struct *tsk)
599 */ 624 */
600void acct_clear_integrals(struct task_struct *tsk) 625void acct_clear_integrals(struct task_struct *tsk)
601{ 626{
602 if (tsk) { 627 tsk->acct_stimexpd = 0;
603 tsk->acct_stimexpd = 0; 628 tsk->acct_rss_mem1 = 0;
604 tsk->acct_rss_mem1 = 0; 629 tsk->acct_vm_mem1 = 0;
605 tsk->acct_vm_mem1 = 0;
606 }
607} 630}
diff --git a/kernel/compat.c b/kernel/compat.c
index 2f67233243..126dee9530 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -730,17 +730,10 @@ void
730sigset_from_compat (sigset_t *set, compat_sigset_t *compat) 730sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
731{ 731{
732 switch (_NSIG_WORDS) { 732 switch (_NSIG_WORDS) {
733#if defined (__COMPAT_ENDIAN_SWAP__)
734 case 4: set->sig[3] = compat->sig[7] | (((long)compat->sig[6]) << 32 );
735 case 3: set->sig[2] = compat->sig[5] | (((long)compat->sig[4]) << 32 );
736 case 2: set->sig[1] = compat->sig[3] | (((long)compat->sig[2]) << 32 );
737 case 1: set->sig[0] = compat->sig[1] | (((long)compat->sig[0]) << 32 );
738#else
739 case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 ); 733 case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
740 case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 ); 734 case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
741 case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 ); 735 case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
742 case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 ); 736 case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
743#endif
744 } 737 }
745} 738}
746 739
diff --git a/kernel/cpu.c b/kernel/cpu.c
index fe2b8d0bfe..03dcd98184 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -13,10 +13,10 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/kthread.h> 14#include <linux/kthread.h>
15#include <linux/stop_machine.h> 15#include <linux/stop_machine.h>
16#include <asm/semaphore.h> 16#include <linux/mutex.h>
17 17
18/* This protects CPUs going up and down... */ 18/* This protects CPUs going up and down... */
19static DECLARE_MUTEX(cpucontrol); 19static DEFINE_MUTEX(cpucontrol);
20 20
21static BLOCKING_NOTIFIER_HEAD(cpu_chain); 21static BLOCKING_NOTIFIER_HEAD(cpu_chain);
22 22
@@ -30,9 +30,9 @@ static int __lock_cpu_hotplug(int interruptible)
30 30
31 if (lock_cpu_hotplug_owner != current) { 31 if (lock_cpu_hotplug_owner != current) {
32 if (interruptible) 32 if (interruptible)
33 ret = down_interruptible(&cpucontrol); 33 ret = mutex_lock_interruptible(&cpucontrol);
34 else 34 else
35 down(&cpucontrol); 35 mutex_lock(&cpucontrol);
36 } 36 }
37 37
38 /* 38 /*
@@ -56,7 +56,7 @@ void unlock_cpu_hotplug(void)
56{ 56{
57 if (--lock_cpu_hotplug_depth == 0) { 57 if (--lock_cpu_hotplug_depth == 0) {
58 lock_cpu_hotplug_owner = NULL; 58 lock_cpu_hotplug_owner = NULL;
59 up(&cpucontrol); 59 mutex_unlock(&cpucontrol);
60 } 60 }
61} 61}
62EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); 62EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b602f73fb3..1535af3a91 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2442,31 +2442,43 @@ void __cpuset_memory_pressure_bump(void)
2442 */ 2442 */
2443static int proc_cpuset_show(struct seq_file *m, void *v) 2443static int proc_cpuset_show(struct seq_file *m, void *v)
2444{ 2444{
2445 struct pid *pid;
2445 struct task_struct *tsk; 2446 struct task_struct *tsk;
2446 char *buf; 2447 char *buf;
2447 int retval = 0; 2448 int retval;
2448 2449
2450 retval = -ENOMEM;
2449 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 2451 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2450 if (!buf) 2452 if (!buf)
2451 return -ENOMEM; 2453 goto out;
2454
2455 retval = -ESRCH;
2456 pid = m->private;
2457 tsk = get_pid_task(pid, PIDTYPE_PID);
2458 if (!tsk)
2459 goto out_free;
2452 2460
2453 tsk = m->private; 2461 retval = -EINVAL;
2454 mutex_lock(&manage_mutex); 2462 mutex_lock(&manage_mutex);
2463
2455 retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); 2464 retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
2456 if (retval < 0) 2465 if (retval < 0)
2457 goto out; 2466 goto out_unlock;
2458 seq_puts(m, buf); 2467 seq_puts(m, buf);
2459 seq_putc(m, '\n'); 2468 seq_putc(m, '\n');
2460out: 2469out_unlock:
2461 mutex_unlock(&manage_mutex); 2470 mutex_unlock(&manage_mutex);
2471 put_task_struct(tsk);
2472out_free:
2462 kfree(buf); 2473 kfree(buf);
2474out:
2463 return retval; 2475 return retval;
2464} 2476}
2465 2477
2466static int cpuset_open(struct inode *inode, struct file *file) 2478static int cpuset_open(struct inode *inode, struct file *file)
2467{ 2479{
2468 struct task_struct *tsk = PROC_I(inode)->task; 2480 struct pid *pid = PROC_I(inode)->pid;
2469 return single_open(file, proc_cpuset_show, tsk); 2481 return single_open(file, proc_cpuset_show, pid);
2470} 2482}
2471 2483
2472struct file_operations proc_cpuset_operations = { 2484struct file_operations proc_cpuset_operations = {
diff --git a/kernel/exit.c b/kernel/exit.c
index a3baf92462..304ef637be 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -36,6 +36,7 @@
36#include <linux/compat.h> 36#include <linux/compat.h>
37#include <linux/pipe_fs_i.h> 37#include <linux/pipe_fs_i.h>
38#include <linux/audit.h> /* for audit_free() */ 38#include <linux/audit.h> /* for audit_free() */
39#include <linux/resource.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/unistd.h> 42#include <asm/unistd.h>
@@ -45,8 +46,6 @@
45extern void sem_exit (void); 46extern void sem_exit (void);
46extern struct task_struct *child_reaper; 47extern struct task_struct *child_reaper;
47 48
48int getrusage(struct task_struct *, int, struct rusage __user *);
49
50static void exit_mm(struct task_struct * tsk); 49static void exit_mm(struct task_struct * tsk);
51 50
52static void __unhash_process(struct task_struct *p) 51static void __unhash_process(struct task_struct *p)
@@ -138,12 +137,8 @@ void release_task(struct task_struct * p)
138{ 137{
139 int zap_leader; 138 int zap_leader;
140 task_t *leader; 139 task_t *leader;
141 struct dentry *proc_dentry;
142
143repeat: 140repeat:
144 atomic_dec(&p->user->processes); 141 atomic_dec(&p->user->processes);
145 spin_lock(&p->proc_lock);
146 proc_dentry = proc_pid_unhash(p);
147 write_lock_irq(&tasklist_lock); 142 write_lock_irq(&tasklist_lock);
148 ptrace_unlink(p); 143 ptrace_unlink(p);
149 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); 144 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -172,8 +167,7 @@ repeat:
172 167
173 sched_exit(p); 168 sched_exit(p);
174 write_unlock_irq(&tasklist_lock); 169 write_unlock_irq(&tasklist_lock);
175 spin_unlock(&p->proc_lock); 170 proc_flush_task(p);
176 proc_pid_flush(proc_dentry);
177 release_thread(p); 171 release_thread(p);
178 call_rcu(&p->rcu, delayed_put_task_struct); 172 call_rcu(&p->rcu, delayed_put_task_struct);
179 173
@@ -895,11 +889,11 @@ fastcall NORET_TYPE void do_exit(long code)
895 if (group_dead) { 889 if (group_dead) {
896 hrtimer_cancel(&tsk->signal->real_timer); 890 hrtimer_cancel(&tsk->signal->real_timer);
897 exit_itimers(tsk->signal); 891 exit_itimers(tsk->signal);
898 acct_process(code);
899 } 892 }
893 acct_collect(code, group_dead);
900 if (unlikely(tsk->robust_list)) 894 if (unlikely(tsk->robust_list))
901 exit_robust_list(tsk); 895 exit_robust_list(tsk);
902#ifdef CONFIG_COMPAT 896#if defined(CONFIG_FUTEX) && defined(CONFIG_COMPAT)
903 if (unlikely(tsk->compat_robust_list)) 897 if (unlikely(tsk->compat_robust_list))
904 compat_exit_robust_list(tsk); 898 compat_exit_robust_list(tsk);
905#endif 899#endif
@@ -907,6 +901,8 @@ fastcall NORET_TYPE void do_exit(long code)
907 audit_free(tsk); 901 audit_free(tsk);
908 exit_mm(tsk); 902 exit_mm(tsk);
909 903
904 if (group_dead)
905 acct_process();
910 exit_sem(tsk); 906 exit_sem(tsk);
911 __exit_files(tsk); 907 __exit_files(tsk);
912 __exit_fs(tsk); 908 __exit_fs(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 49adc0e8d4..9b4e54ef02 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -874,6 +874,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
874 tsk->it_prof_expires = 874 tsk->it_prof_expires =
875 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); 875 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
876 } 876 }
877 acct_init_pacct(&sig->pacct);
877 878
878 return 0; 879 return 0;
879} 880}
@@ -992,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags,
992 if (put_user(p->pid, parent_tidptr)) 993 if (put_user(p->pid, parent_tidptr))
993 goto bad_fork_cleanup; 994 goto bad_fork_cleanup;
994 995
995 p->proc_dentry = NULL;
996
997 INIT_LIST_HEAD(&p->children); 996 INIT_LIST_HEAD(&p->children);
998 INIT_LIST_HEAD(&p->sibling); 997 INIT_LIST_HEAD(&p->sibling);
999 p->vfork_done = NULL; 998 p->vfork_done = NULL;
1000 spin_lock_init(&p->alloc_lock); 999 spin_lock_init(&p->alloc_lock);
1001 spin_lock_init(&p->proc_lock);
1002 1000
1003 clear_tsk_thread_flag(p, TIF_SIGPENDING); 1001 clear_tsk_thread_flag(p, TIF_SIGPENDING);
1004 init_sigpending(&p->pending); 1002 init_sigpending(&p->pending);
@@ -1158,18 +1156,6 @@ static task_t *copy_process(unsigned long clone_flags,
1158 } 1156 }
1159 1157
1160 if (clone_flags & CLONE_THREAD) { 1158 if (clone_flags & CLONE_THREAD) {
1161 /*
1162 * Important: if an exit-all has been started then
1163 * do not create this new thread - the whole thread
1164 * group is supposed to exit anyway.
1165 */
1166 if (current->signal->flags & SIGNAL_GROUP_EXIT) {
1167 spin_unlock(&current->sighand->siglock);
1168 write_unlock_irq(&tasklist_lock);
1169 retval = -EAGAIN;
1170 goto bad_fork_cleanup_namespace;
1171 }
1172
1173 p->group_leader = current->group_leader; 1159 p->group_leader = current->group_leader;
1174 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); 1160 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1175 1161
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1832430572..55601b3ce6 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -98,7 +98,6 @@ static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) =
98 98
99/** 99/**
100 * ktime_get_ts - get the monotonic clock in timespec format 100 * ktime_get_ts - get the monotonic clock in timespec format
101 *
102 * @ts: pointer to timespec variable 101 * @ts: pointer to timespec variable
103 * 102 *
104 * The function calculates the monotonic clock from the realtime 103 * The function calculates the monotonic clock from the realtime
@@ -238,7 +237,6 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
238# ifndef CONFIG_KTIME_SCALAR 237# ifndef CONFIG_KTIME_SCALAR
239/** 238/**
240 * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable 239 * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
241 *
242 * @kt: addend 240 * @kt: addend
243 * @nsec: the scalar nsec value to add 241 * @nsec: the scalar nsec value to add
244 * 242 *
@@ -299,7 +297,6 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
299 297
300/** 298/**
301 * hrtimer_forward - forward the timer expiry 299 * hrtimer_forward - forward the timer expiry
302 *
303 * @timer: hrtimer to forward 300 * @timer: hrtimer to forward
304 * @now: forward past this time 301 * @now: forward past this time
305 * @interval: the interval to forward 302 * @interval: the interval to forward
@@ -411,7 +408,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base)
411 408
412/** 409/**
413 * hrtimer_start - (re)start an relative timer on the current CPU 410 * hrtimer_start - (re)start an relative timer on the current CPU
414 *
415 * @timer: the timer to be added 411 * @timer: the timer to be added
416 * @tim: expiry time 412 * @tim: expiry time
417 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 413 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
@@ -460,14 +456,13 @@ EXPORT_SYMBOL_GPL(hrtimer_start);
460 456
461/** 457/**
462 * hrtimer_try_to_cancel - try to deactivate a timer 458 * hrtimer_try_to_cancel - try to deactivate a timer
463 *
464 * @timer: hrtimer to stop 459 * @timer: hrtimer to stop
465 * 460 *
466 * Returns: 461 * Returns:
467 * 0 when the timer was not active 462 * 0 when the timer was not active
468 * 1 when the timer was active 463 * 1 when the timer was active
469 * -1 when the timer is currently excuting the callback function and 464 * -1 when the timer is currently excuting the callback function and
470 * can not be stopped 465 * cannot be stopped
471 */ 466 */
472int hrtimer_try_to_cancel(struct hrtimer *timer) 467int hrtimer_try_to_cancel(struct hrtimer *timer)
473{ 468{
@@ -489,7 +484,6 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
489 484
490/** 485/**
491 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 486 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
492 *
493 * @timer: the timer to be cancelled 487 * @timer: the timer to be cancelled
494 * 488 *
495 * Returns: 489 * Returns:
@@ -510,7 +504,6 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
510 504
511/** 505/**
512 * hrtimer_get_remaining - get remaining time for the timer 506 * hrtimer_get_remaining - get remaining time for the timer
513 *
514 * @timer: the timer to read 507 * @timer: the timer to read
515 */ 508 */
516ktime_t hrtimer_get_remaining(const struct hrtimer *timer) 509ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
@@ -564,7 +557,6 @@ ktime_t hrtimer_get_next_event(void)
564 557
565/** 558/**
566 * hrtimer_init - initialize a timer to the given clock 559 * hrtimer_init - initialize a timer to the given clock
567 *
568 * @timer: the timer to be initialized 560 * @timer: the timer to be initialized
569 * @clock_id: the clock to be used 561 * @clock_id: the clock to be used
570 * @mode: timer mode abs/rel 562 * @mode: timer mode abs/rel
@@ -576,7 +568,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
576 568
577 memset(timer, 0, sizeof(struct hrtimer)); 569 memset(timer, 0, sizeof(struct hrtimer));
578 570
579 bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); 571 bases = __raw_get_cpu_var(hrtimer_bases);
580 572
581 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS) 573 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS)
582 clock_id = CLOCK_MONOTONIC; 574 clock_id = CLOCK_MONOTONIC;
@@ -588,7 +580,6 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
588 580
589/** 581/**
590 * hrtimer_get_res - get the timer resolution for a clock 582 * hrtimer_get_res - get the timer resolution for a clock
591 *
592 * @which_clock: which clock to query 583 * @which_clock: which clock to query
593 * @tp: pointer to timespec variable to store the resolution 584 * @tp: pointer to timespec variable to store the resolution
594 * 585 *
@@ -599,7 +590,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
599{ 590{
600 struct hrtimer_base *bases; 591 struct hrtimer_base *bases;
601 592
602 bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); 593 bases = __raw_get_cpu_var(hrtimer_bases);
603 *tp = ktime_to_timespec(bases[which_clock].resolution); 594 *tp = ktime_to_timespec(bases[which_clock].resolution);
604 595
605 return 0; 596 return 0;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1fbf466a29..64aab08115 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -47,11 +47,17 @@
47 47
48static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 48static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
49static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 49static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
50static atomic_t kprobe_count;
50 51
51DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 52DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
52DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ 53DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
53static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 54static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
54 55
56static struct notifier_block kprobe_page_fault_nb = {
57 .notifier_call = kprobe_exceptions_notify,
58 .priority = 0x7fffffff /* we need to notified first */
59};
60
55#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 61#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
56/* 62/*
57 * kprobe->ainsn.insn points to the copy of the instruction to be 63 * kprobe->ainsn.insn points to the copy of the instruction to be
@@ -368,16 +374,15 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
368*/ 374*/
369static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 375static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
370{ 376{
371 struct kprobe *kp;
372
373 if (p->break_handler) { 377 if (p->break_handler) {
374 list_for_each_entry_rcu(kp, &old_p->list, list) { 378 if (old_p->break_handler)
375 if (kp->break_handler) 379 return -EEXIST;
376 return -EEXIST;
377 }
378 list_add_tail_rcu(&p->list, &old_p->list); 380 list_add_tail_rcu(&p->list, &old_p->list);
381 old_p->break_handler = aggr_break_handler;
379 } else 382 } else
380 list_add_rcu(&p->list, &old_p->list); 383 list_add_rcu(&p->list, &old_p->list);
384 if (p->post_handler && !old_p->post_handler)
385 old_p->post_handler = aggr_post_handler;
381 return 0; 386 return 0;
382} 387}
383 388
@@ -390,9 +395,11 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
390 copy_kprobe(p, ap); 395 copy_kprobe(p, ap);
391 ap->addr = p->addr; 396 ap->addr = p->addr;
392 ap->pre_handler = aggr_pre_handler; 397 ap->pre_handler = aggr_pre_handler;
393 ap->post_handler = aggr_post_handler;
394 ap->fault_handler = aggr_fault_handler; 398 ap->fault_handler = aggr_fault_handler;
395 ap->break_handler = aggr_break_handler; 399 if (p->post_handler)
400 ap->post_handler = aggr_post_handler;
401 if (p->break_handler)
402 ap->break_handler = aggr_break_handler;
396 403
397 INIT_LIST_HEAD(&ap->list); 404 INIT_LIST_HEAD(&ap->list);
398 list_add_rcu(&p->list, &ap->list); 405 list_add_rcu(&p->list, &ap->list);
@@ -464,6 +471,8 @@ static int __kprobes __register_kprobe(struct kprobe *p,
464 old_p = get_kprobe(p->addr); 471 old_p = get_kprobe(p->addr);
465 if (old_p) { 472 if (old_p) {
466 ret = register_aggr_kprobe(old_p, p); 473 ret = register_aggr_kprobe(old_p, p);
474 if (!ret)
475 atomic_inc(&kprobe_count);
467 goto out; 476 goto out;
468 } 477 }
469 478
@@ -474,6 +483,10 @@ static int __kprobes __register_kprobe(struct kprobe *p,
474 hlist_add_head_rcu(&p->hlist, 483 hlist_add_head_rcu(&p->hlist,
475 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 484 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
476 485
486 if (atomic_add_return(1, &kprobe_count) == \
487 (ARCH_INACTIVE_KPROBE_COUNT + 1))
488 register_page_fault_notifier(&kprobe_page_fault_nb);
489
477 arch_arm_kprobe(p); 490 arch_arm_kprobe(p);
478 491
479out: 492out:
@@ -536,14 +549,40 @@ valid_p:
536 kfree(old_p); 549 kfree(old_p);
537 } 550 }
538 arch_remove_kprobe(p); 551 arch_remove_kprobe(p);
552 } else {
553 mutex_lock(&kprobe_mutex);
554 if (p->break_handler)
555 old_p->break_handler = NULL;
556 if (p->post_handler){
557 list_for_each_entry_rcu(list_p, &old_p->list, list){
558 if (list_p->post_handler){
559 cleanup_p = 2;
560 break;
561 }
562 }
563 if (cleanup_p == 0)
564 old_p->post_handler = NULL;
565 }
566 mutex_unlock(&kprobe_mutex);
539 } 567 }
568
569 /* Call unregister_page_fault_notifier()
570 * if no probes are active
571 */
572 mutex_lock(&kprobe_mutex);
573 if (atomic_add_return(-1, &kprobe_count) == \
574 ARCH_INACTIVE_KPROBE_COUNT)
575 unregister_page_fault_notifier(&kprobe_page_fault_nb);
576 mutex_unlock(&kprobe_mutex);
577 return;
540} 578}
541 579
542static struct notifier_block kprobe_exceptions_nb = { 580static struct notifier_block kprobe_exceptions_nb = {
543 .notifier_call = kprobe_exceptions_notify, 581 .notifier_call = kprobe_exceptions_notify,
544 .priority = 0x7fffffff /* we need to notified first */ 582 .priority = 0x7fffffff /* we need to be notified first */
545}; 583};
546 584
585
547int __kprobes register_jprobe(struct jprobe *jp) 586int __kprobes register_jprobe(struct jprobe *jp)
548{ 587{
549 /* Todo: Verify probepoint is a function entry point */ 588 /* Todo: Verify probepoint is a function entry point */
@@ -652,6 +691,7 @@ static int __init init_kprobes(void)
652 INIT_HLIST_HEAD(&kprobe_table[i]); 691 INIT_HLIST_HEAD(&kprobe_table[i]);
653 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 692 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
654 } 693 }
694 atomic_set(&kprobe_count, 0);
655 695
656 err = arch_init_kprobes(); 696 err = arch_init_kprobes();
657 if (!err) 697 if (!err)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c5f3c6613b..24be714b04 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -45,6 +45,13 @@ struct kthread_stop_info
45static DEFINE_MUTEX(kthread_stop_lock); 45static DEFINE_MUTEX(kthread_stop_lock);
46static struct kthread_stop_info kthread_stop_info; 46static struct kthread_stop_info kthread_stop_info;
47 47
48/**
49 * kthread_should_stop - should this kthread return now?
50 *
51 * When someone calls kthread_stop on your kthread, it will be woken
52 * and this will return true. You should then return, and your return
53 * value will be passed through to kthread_stop().
54 */
48int kthread_should_stop(void) 55int kthread_should_stop(void)
49{ 56{
50 return (kthread_stop_info.k == current); 57 return (kthread_stop_info.k == current);
@@ -122,6 +129,25 @@ static void keventd_create_kthread(void *_create)
122 complete(&create->done); 129 complete(&create->done);
123} 130}
124 131
132/**
133 * kthread_create - create a kthread.
134 * @threadfn: the function to run until signal_pending(current).
135 * @data: data ptr for @threadfn.
136 * @namefmt: printf-style name for the thread.
137 *
138 * Description: This helper function creates and names a kernel
139 * thread. The thread will be stopped: use wake_up_process() to start
140 * it. See also kthread_run(), kthread_create_on_cpu().
141 *
142 * When woken, the thread will run @threadfn() with @data as its
143 * argument. @threadfn can either call do_exit() directly if it is a
144 * standalone thread for which noone will call kthread_stop(), or
145 * return when 'kthread_should_stop()' is true (which means
146 * kthread_stop() has been called). The return value should be zero
147 * or a negative error number; it will be passed to kthread_stop().
148 *
149 * Returns a task_struct or ERR_PTR(-ENOMEM).
150 */
125struct task_struct *kthread_create(int (*threadfn)(void *data), 151struct task_struct *kthread_create(int (*threadfn)(void *data),
126 void *data, 152 void *data,
127 const char namefmt[], 153 const char namefmt[],
@@ -156,6 +182,15 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
156} 182}
157EXPORT_SYMBOL(kthread_create); 183EXPORT_SYMBOL(kthread_create);
158 184
185/**
186 * kthread_bind - bind a just-created kthread to a cpu.
187 * @k: thread created by kthread_create().
188 * @cpu: cpu (might not be online, must be possible) for @k to run on.
189 *
190 * Description: This function is equivalent to set_cpus_allowed(),
191 * except that @cpu doesn't need to be online, and the thread must be
192 * stopped (i.e., just returned from kthread_create().
193 */
159void kthread_bind(struct task_struct *k, unsigned int cpu) 194void kthread_bind(struct task_struct *k, unsigned int cpu)
160{ 195{
161 BUG_ON(k->state != TASK_INTERRUPTIBLE); 196 BUG_ON(k->state != TASK_INTERRUPTIBLE);
@@ -166,12 +201,36 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
166} 201}
167EXPORT_SYMBOL(kthread_bind); 202EXPORT_SYMBOL(kthread_bind);
168 203
204/**
205 * kthread_stop - stop a thread created by kthread_create().
206 * @k: thread created by kthread_create().
207 *
208 * Sets kthread_should_stop() for @k to return true, wakes it, and
209 * waits for it to exit. Your threadfn() must not call do_exit()
210 * itself if you use this function! This can also be called after
211 * kthread_create() instead of calling wake_up_process(): the thread
212 * will exit without calling threadfn().
213 *
214 * Returns the result of threadfn(), or %-EINTR if wake_up_process()
215 * was never called.
216 */
169int kthread_stop(struct task_struct *k) 217int kthread_stop(struct task_struct *k)
170{ 218{
171 return kthread_stop_sem(k, NULL); 219 return kthread_stop_sem(k, NULL);
172} 220}
173EXPORT_SYMBOL(kthread_stop); 221EXPORT_SYMBOL(kthread_stop);
174 222
223/**
224 * kthread_stop_sem - stop a thread created by kthread_create().
225 * @k: thread created by kthread_create().
226 * @s: semaphore that @k waits on while idle.
227 *
228 * Does essentially the same thing as kthread_stop() above, but wakes
229 * @k by calling up(@s).
230 *
231 * Returns the result of threadfn(), or %-EINTR if wake_up_process()
232 * was never called.
233 */
175int kthread_stop_sem(struct task_struct *k, struct semaphore *s) 234int kthread_stop_sem(struct task_struct *k, struct semaphore *s)
176{ 235{
177 int ret; 236 int ret;
@@ -210,5 +269,5 @@ static __init int helper_init(void)
210 269
211 return 0; 270 return 0;
212} 271}
213core_initcall(helper_init);
214 272
273core_initcall(helper_init);
diff --git a/kernel/module.c b/kernel/module.c
index bbe04862e1..10e5b872ad 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -40,9 +40,11 @@
40#include <linux/string.h> 40#include <linux/string.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/unwind.h>
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44#include <asm/semaphore.h> 45#include <asm/semaphore.h>
45#include <asm/cacheflush.h> 46#include <asm/cacheflush.h>
47#include <linux/license.h>
46 48
47#if 0 49#if 0
48#define DEBUGP printk 50#define DEBUGP printk
@@ -1051,6 +1053,8 @@ static void free_module(struct module *mod)
1051 remove_sect_attrs(mod); 1053 remove_sect_attrs(mod);
1052 mod_kobject_remove(mod); 1054 mod_kobject_remove(mod);
1053 1055
1056 unwind_remove_table(mod->unwind_info, 0);
1057
1054 /* Arch-specific cleanup. */ 1058 /* Arch-specific cleanup. */
1055 module_arch_cleanup(mod); 1059 module_arch_cleanup(mod);
1056 1060
@@ -1248,16 +1252,6 @@ static void layout_sections(struct module *mod,
1248 } 1252 }
1249} 1253}
1250 1254
1251static inline int license_is_gpl_compatible(const char *license)
1252{
1253 return (strcmp(license, "GPL") == 0
1254 || strcmp(license, "GPL v2") == 0
1255 || strcmp(license, "GPL and additional rights") == 0
1256 || strcmp(license, "Dual BSD/GPL") == 0
1257 || strcmp(license, "Dual MIT/GPL") == 0
1258 || strcmp(license, "Dual MPL/GPL") == 0);
1259}
1260
1261static void set_license(struct module *mod, const char *license) 1255static void set_license(struct module *mod, const char *license)
1262{ 1256{
1263 if (!license) 1257 if (!license)
@@ -1326,7 +1320,7 @@ int is_exported(const char *name, const struct module *mod)
1326 if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) 1320 if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab))
1327 return 1; 1321 return 1;
1328 else 1322 else
1329 if (lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) 1323 if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
1330 return 1; 1324 return 1;
1331 else 1325 else
1332 return 0; 1326 return 0;
@@ -1412,7 +1406,7 @@ static struct module *load_module(void __user *umod,
1412 unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, 1406 unsigned int i, symindex = 0, strindex = 0, setupindex, exindex,
1413 exportindex, modindex, obsparmindex, infoindex, gplindex, 1407 exportindex, modindex, obsparmindex, infoindex, gplindex,
1414 crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, 1408 crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
1415 gplfuturecrcindex; 1409 gplfuturecrcindex, unwindex = 0;
1416 struct module *mod; 1410 struct module *mod;
1417 long err = 0; 1411 long err = 0;
1418 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1412 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1502,6 +1496,9 @@ static struct module *load_module(void __user *umod,
1502 versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); 1496 versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
1503 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); 1497 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
1504 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); 1498 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
1499#ifdef ARCH_UNWIND_SECTION_NAME
1500 unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
1501#endif
1505 1502
1506 /* Don't keep modinfo section */ 1503 /* Don't keep modinfo section */
1507 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 1504 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1510,6 +1507,8 @@ static struct module *load_module(void __user *umod,
1510 sechdrs[symindex].sh_flags |= SHF_ALLOC; 1507 sechdrs[symindex].sh_flags |= SHF_ALLOC;
1511 sechdrs[strindex].sh_flags |= SHF_ALLOC; 1508 sechdrs[strindex].sh_flags |= SHF_ALLOC;
1512#endif 1509#endif
1510 if (unwindex)
1511 sechdrs[unwindex].sh_flags |= SHF_ALLOC;
1513 1512
1514 /* Check module struct version now, before we try to use module. */ 1513 /* Check module struct version now, before we try to use module. */
1515 if (!check_modstruct_version(sechdrs, versindex, mod)) { 1514 if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1738,6 +1737,11 @@ static struct module *load_module(void __user *umod,
1738 goto arch_cleanup; 1737 goto arch_cleanup;
1739 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 1738 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
1740 1739
1740 /* Size of section 0 is 0, so this works well if no unwind info. */
1741 mod->unwind_info = unwind_add_table(mod,
1742 (void *)sechdrs[unwindex].sh_addr,
1743 sechdrs[unwindex].sh_size);
1744
1741 /* Get rid of temporary copy */ 1745 /* Get rid of temporary copy */
1742 vfree(hdr); 1746 vfree(hdr);
1743 1747
@@ -1836,6 +1840,7 @@ sys_init_module(void __user *umod,
1836 mod->state = MODULE_STATE_LIVE; 1840 mod->state = MODULE_STATE_LIVE;
1837 /* Drop initial reference. */ 1841 /* Drop initial reference. */
1838 module_put(mod); 1842 module_put(mod);
1843 unwind_remove_table(mod->unwind_info, 1);
1839 module_free(mod, mod->module_init); 1844 module_free(mod, mod->module_init);
1840 mod->module_init = NULL; 1845 mod->module_init = NULL;
1841 mod->init_size = 0; 1846 mod->init_size = 0;
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index f4913c3769..036b6285b1 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -153,13 +153,13 @@ next:
153 continue; 153 continue;
154 count++; 154 count++;
155 cursor = curr->next; 155 cursor = curr->next;
156 debug_spin_lock_restore(&debug_mutex_lock, flags); 156 debug_spin_unlock_restore(&debug_mutex_lock, flags);
157 157
158 printk("\n#%03d: ", count); 158 printk("\n#%03d: ", count);
159 printk_lock(lock, filter ? 0 : 1); 159 printk_lock(lock, filter ? 0 : 1);
160 goto next; 160 goto next;
161 } 161 }
162 debug_spin_lock_restore(&debug_mutex_lock, flags); 162 debug_spin_unlock_restore(&debug_mutex_lock, flags);
163 printk("\n"); 163 printk("\n");
164} 164}
165 165
@@ -316,7 +316,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task)
316 continue; 316 continue;
317 list_del_init(curr); 317 list_del_init(curr);
318 DEBUG_OFF(); 318 DEBUG_OFF();
319 debug_spin_lock_restore(&debug_mutex_lock, flags); 319 debug_spin_unlock_restore(&debug_mutex_lock, flags);
320 320
321 printk("BUG: %s/%d, lock held at task exit time!\n", 321 printk("BUG: %s/%d, lock held at task exit time!\n",
322 task->comm, task->pid); 322 task->comm, task->pid);
@@ -325,7 +325,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task)
325 printk("exiting task is not even the owner??\n"); 325 printk("exiting task is not even the owner??\n");
326 return; 326 return;
327 } 327 }
328 debug_spin_lock_restore(&debug_mutex_lock, flags); 328 debug_spin_unlock_restore(&debug_mutex_lock, flags);
329} 329}
330 330
331/* 331/*
@@ -352,7 +352,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
352 continue; 352 continue;
353 list_del_init(curr); 353 list_del_init(curr);
354 DEBUG_OFF(); 354 DEBUG_OFF();
355 debug_spin_lock_restore(&debug_mutex_lock, flags); 355 debug_spin_unlock_restore(&debug_mutex_lock, flags);
356 356
357 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", 357 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
358 current->comm, current->pid, lock, from, to); 358 current->comm, current->pid, lock, from, to);
@@ -362,7 +362,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
362 printk("freeing task is not even the owner??\n"); 362 printk("freeing task is not even the owner??\n");
363 return; 363 return;
364 } 364 }
365 debug_spin_lock_restore(&debug_mutex_lock, flags); 365 debug_spin_unlock_restore(&debug_mutex_lock, flags);
366} 366}
367 367
368/* 368/*
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index fd384050ac..a5196c36a5 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -46,21 +46,6 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
46extern void debug_mutex_unlock(struct mutex *lock); 46extern void debug_mutex_unlock(struct mutex *lock);
47extern void debug_mutex_init(struct mutex *lock, const char *name); 47extern void debug_mutex_init(struct mutex *lock, const char *name);
48 48
49#define debug_spin_lock(lock) \
50 do { \
51 local_irq_disable(); \
52 if (debug_mutex_on) \
53 spin_lock(lock); \
54 } while (0)
55
56#define debug_spin_unlock(lock) \
57 do { \
58 if (debug_mutex_on) \
59 spin_unlock(lock); \
60 local_irq_enable(); \
61 preempt_check_resched(); \
62 } while (0)
63
64#define debug_spin_lock_save(lock, flags) \ 49#define debug_spin_lock_save(lock, flags) \
65 do { \ 50 do { \
66 local_irq_save(flags); \ 51 local_irq_save(flags); \
@@ -68,7 +53,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name);
68 spin_lock(lock); \ 53 spin_lock(lock); \
69 } while (0) 54 } while (0)
70 55
71#define debug_spin_lock_restore(lock, flags) \ 56#define debug_spin_unlock_restore(lock, flags) \
72 do { \ 57 do { \
73 if (debug_mutex_on) \ 58 if (debug_mutex_on) \
74 spin_unlock(lock); \ 59 spin_unlock(lock); \
@@ -76,20 +61,20 @@ extern void debug_mutex_init(struct mutex *lock, const char *name);
76 preempt_check_resched(); \ 61 preempt_check_resched(); \
77 } while (0) 62 } while (0)
78 63
79#define spin_lock_mutex(lock) \ 64#define spin_lock_mutex(lock, flags) \
80 do { \ 65 do { \
81 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 66 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
82 \ 67 \
83 DEBUG_WARN_ON(in_interrupt()); \ 68 DEBUG_WARN_ON(in_interrupt()); \
84 debug_spin_lock(&debug_mutex_lock); \ 69 debug_spin_lock_save(&debug_mutex_lock, flags); \
85 spin_lock(lock); \ 70 spin_lock(lock); \
86 DEBUG_WARN_ON(l->magic != l); \ 71 DEBUG_WARN_ON(l->magic != l); \
87 } while (0) 72 } while (0)
88 73
89#define spin_unlock_mutex(lock) \ 74#define spin_unlock_mutex(lock, flags) \
90 do { \ 75 do { \
91 spin_unlock(lock); \ 76 spin_unlock(lock); \
92 debug_spin_unlock(&debug_mutex_lock); \ 77 debug_spin_unlock_restore(&debug_mutex_lock, flags); \
93 } while (0) 78 } while (0)
94 79
95#define DEBUG_OFF() \ 80#define DEBUG_OFF() \
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5449b210d9..7043db21bb 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -125,10 +125,11 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
125 struct task_struct *task = current; 125 struct task_struct *task = current;
126 struct mutex_waiter waiter; 126 struct mutex_waiter waiter;
127 unsigned int old_val; 127 unsigned int old_val;
128 unsigned long flags;
128 129
129 debug_mutex_init_waiter(&waiter); 130 debug_mutex_init_waiter(&waiter);
130 131
131 spin_lock_mutex(&lock->wait_lock); 132 spin_lock_mutex(&lock->wait_lock, flags);
132 133
133 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); 134 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip);
134 135
@@ -157,7 +158,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
157 if (unlikely(state == TASK_INTERRUPTIBLE && 158 if (unlikely(state == TASK_INTERRUPTIBLE &&
158 signal_pending(task))) { 159 signal_pending(task))) {
159 mutex_remove_waiter(lock, &waiter, task->thread_info); 160 mutex_remove_waiter(lock, &waiter, task->thread_info);
160 spin_unlock_mutex(&lock->wait_lock); 161 spin_unlock_mutex(&lock->wait_lock, flags);
161 162
162 debug_mutex_free_waiter(&waiter); 163 debug_mutex_free_waiter(&waiter);
163 return -EINTR; 164 return -EINTR;
@@ -165,9 +166,9 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
165 __set_task_state(task, state); 166 __set_task_state(task, state);
166 167
167 /* didnt get the lock, go to sleep: */ 168 /* didnt get the lock, go to sleep: */
168 spin_unlock_mutex(&lock->wait_lock); 169 spin_unlock_mutex(&lock->wait_lock, flags);
169 schedule(); 170 schedule();
170 spin_lock_mutex(&lock->wait_lock); 171 spin_lock_mutex(&lock->wait_lock, flags);
171 } 172 }
172 173
173 /* got the lock - rejoice! */ 174 /* got the lock - rejoice! */
@@ -178,7 +179,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
178 if (likely(list_empty(&lock->wait_list))) 179 if (likely(list_empty(&lock->wait_list)))
179 atomic_set(&lock->count, 0); 180 atomic_set(&lock->count, 0);
180 181
181 spin_unlock_mutex(&lock->wait_lock); 182 spin_unlock_mutex(&lock->wait_lock, flags);
182 183
183 debug_mutex_free_waiter(&waiter); 184 debug_mutex_free_waiter(&waiter);
184 185
@@ -203,10 +204,11 @@ static fastcall noinline void
203__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) 204__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
204{ 205{
205 struct mutex *lock = container_of(lock_count, struct mutex, count); 206 struct mutex *lock = container_of(lock_count, struct mutex, count);
207 unsigned long flags;
206 208
207 DEBUG_WARN_ON(lock->owner != current_thread_info()); 209 DEBUG_WARN_ON(lock->owner != current_thread_info());
208 210
209 spin_lock_mutex(&lock->wait_lock); 211 spin_lock_mutex(&lock->wait_lock, flags);
210 212
211 /* 213 /*
212 * some architectures leave the lock unlocked in the fastpath failure 214 * some architectures leave the lock unlocked in the fastpath failure
@@ -231,7 +233,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
231 233
232 debug_mutex_clear_owner(lock); 234 debug_mutex_clear_owner(lock);
233 235
234 spin_unlock_mutex(&lock->wait_lock); 236 spin_unlock_mutex(&lock->wait_lock, flags);
235} 237}
236 238
237/* 239/*
@@ -276,9 +278,10 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__)
276static inline int __mutex_trylock_slowpath(atomic_t *lock_count) 278static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
277{ 279{
278 struct mutex *lock = container_of(lock_count, struct mutex, count); 280 struct mutex *lock = container_of(lock_count, struct mutex, count);
281 unsigned long flags;
279 int prev; 282 int prev;
280 283
281 spin_lock_mutex(&lock->wait_lock); 284 spin_lock_mutex(&lock->wait_lock, flags);
282 285
283 prev = atomic_xchg(&lock->count, -1); 286 prev = atomic_xchg(&lock->count, -1);
284 if (likely(prev == 1)) 287 if (likely(prev == 1))
@@ -287,7 +290,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
287 if (likely(list_empty(&lock->wait_list))) 290 if (likely(list_empty(&lock->wait_list)))
288 atomic_set(&lock->count, 0); 291 atomic_set(&lock->count, 0);
289 292
290 spin_unlock_mutex(&lock->wait_lock); 293 spin_unlock_mutex(&lock->wait_lock, flags);
291 294
292 return prev == 1; 295 return prev == 1;
293} 296}
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 00fe84e7b6..0691899472 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -9,8 +9,10 @@
9 * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: 9 * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
10 */ 10 */
11 11
12#define spin_lock_mutex(lock) spin_lock(lock) 12#define spin_lock_mutex(lock, flags) \
13#define spin_unlock_mutex(lock) spin_unlock(lock) 13 do { spin_lock(lock); (void)(flags); } while (0)
14#define spin_unlock_mutex(lock, flags) \
15 do { spin_unlock(lock); (void)(flags); } while (0)
14#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
15 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
16 18
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index cdf315e794..fc311a4673 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -38,7 +38,7 @@ config PM_DEBUG
38 38
39config PM_TRACE 39config PM_TRACE
40 bool "Suspend/resume event tracing" 40 bool "Suspend/resume event tracing"
41 depends on PM && PM_DEBUG && X86 41 depends on PM && PM_DEBUG && X86_32
42 default y 42 default y
43 ---help--- 43 ---help---
44 This enables some cheesy code to save the last PM event point in the 44 This enables some cheesy code to save the last PM event point in the
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 81d4d982f3..e13e740678 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -231,7 +231,7 @@ static int software_resume(void)
231late_initcall(software_resume); 231late_initcall(software_resume);
232 232
233 233
234static char * pm_disk_modes[] = { 234static const char * const pm_disk_modes[] = {
235 [PM_DISK_FIRMWARE] = "firmware", 235 [PM_DISK_FIRMWARE] = "firmware",
236 [PM_DISK_PLATFORM] = "platform", 236 [PM_DISK_PLATFORM] = "platform",
237 [PM_DISK_SHUTDOWN] = "shutdown", 237 [PM_DISK_SHUTDOWN] = "shutdown",
diff --git a/kernel/power/main.c b/kernel/power/main.c
index cdf0f07af9..6d295c7767 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -145,7 +145,7 @@ static void suspend_finish(suspend_state_t state)
145 145
146 146
147 147
148static char *pm_states[PM_SUSPEND_MAX] = { 148static const char * const pm_states[PM_SUSPEND_MAX] = {
149 [PM_SUSPEND_STANDBY] = "standby", 149 [PM_SUSPEND_STANDBY] = "standby",
150 [PM_SUSPEND_MEM] = "mem", 150 [PM_SUSPEND_MEM] = "mem",
151#ifdef CONFIG_SOFTWARE_SUSPEND 151#ifdef CONFIG_SOFTWARE_SUSPEND
@@ -262,7 +262,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf)
262static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) 262static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
263{ 263{
264 suspend_state_t state = PM_SUSPEND_STANDBY; 264 suspend_state_t state = PM_SUSPEND_STANDBY;
265 char ** s; 265 const char * const *s;
266 char *p; 266 char *p;
267 int error; 267 int error;
268 int len; 268 int len;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 98c41423f3..57a792982f 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -105,10 +105,6 @@ extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
105extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); 105extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap);
106extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); 106extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
107 107
108extern unsigned int count_special_pages(void);
109extern int save_special_mem(void);
110extern int restore_special_mem(void);
111
112extern int swsusp_check(void); 108extern int swsusp_check(void);
113extern int swsusp_shrink_memory(void); 109extern int swsusp_shrink_memory(void);
114extern void swsusp_free(void); 110extern void swsusp_free(void);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3d9284100b..24c96f3542 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -39,90 +39,8 @@ static unsigned int nr_copy_pages;
39static unsigned int nr_meta_pages; 39static unsigned int nr_meta_pages;
40static unsigned long *buffer; 40static unsigned long *buffer;
41 41
42struct arch_saveable_page {
43 unsigned long start;
44 unsigned long end;
45 char *data;
46 struct arch_saveable_page *next;
47};
48static struct arch_saveable_page *arch_pages;
49
50int swsusp_add_arch_pages(unsigned long start, unsigned long end)
51{
52 struct arch_saveable_page *tmp;
53
54 while (start < end) {
55 tmp = kzalloc(sizeof(struct arch_saveable_page), GFP_KERNEL);
56 if (!tmp)
57 return -ENOMEM;
58 tmp->start = start;
59 tmp->end = ((start >> PAGE_SHIFT) + 1) << PAGE_SHIFT;
60 if (tmp->end > end)
61 tmp->end = end;
62 tmp->next = arch_pages;
63 start = tmp->end;
64 arch_pages = tmp;
65 }
66 return 0;
67}
68
69static unsigned int count_arch_pages(void)
70{
71 unsigned int count = 0;
72 struct arch_saveable_page *tmp = arch_pages;
73 while (tmp) {
74 count++;
75 tmp = tmp->next;
76 }
77 return count;
78}
79
80static int save_arch_mem(void)
81{
82 char *kaddr;
83 struct arch_saveable_page *tmp = arch_pages;
84 int offset;
85
86 pr_debug("swsusp: Saving arch specific memory");
87 while (tmp) {
88 tmp->data = (char *)__get_free_page(GFP_ATOMIC);
89 if (!tmp->data)
90 return -ENOMEM;
91 offset = tmp->start - (tmp->start & PAGE_MASK);
92 /* arch pages might haven't a 'struct page' */
93 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
94 memcpy(tmp->data + offset, kaddr + offset,
95 tmp->end - tmp->start);
96 kunmap_atomic(kaddr, KM_USER0);
97
98 tmp = tmp->next;
99 }
100 return 0;
101}
102
103static int restore_arch_mem(void)
104{
105 char *kaddr;
106 struct arch_saveable_page *tmp = arch_pages;
107 int offset;
108
109 while (tmp) {
110 if (!tmp->data)
111 continue;
112 offset = tmp->start - (tmp->start & PAGE_MASK);
113 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
114 memcpy(kaddr + offset, tmp->data + offset,
115 tmp->end - tmp->start);
116 kunmap_atomic(kaddr, KM_USER0);
117 free_page((long)tmp->data);
118 tmp->data = NULL;
119 tmp = tmp->next;
120 }
121 return 0;
122}
123
124#ifdef CONFIG_HIGHMEM 42#ifdef CONFIG_HIGHMEM
125static unsigned int count_highmem_pages(void) 43unsigned int count_highmem_pages(void)
126{ 44{
127 struct zone *zone; 45 struct zone *zone;
128 unsigned long zone_pfn; 46 unsigned long zone_pfn;
@@ -199,7 +117,7 @@ static int save_highmem_zone(struct zone *zone)
199 return 0; 117 return 0;
200} 118}
201 119
202static int save_highmem(void) 120int save_highmem(void)
203{ 121{
204 struct zone *zone; 122 struct zone *zone;
205 int res = 0; 123 int res = 0;
@@ -216,7 +134,7 @@ static int save_highmem(void)
216 return 0; 134 return 0;
217} 135}
218 136
219static int restore_highmem(void) 137int restore_highmem(void)
220{ 138{
221 printk("swsusp: Restoring Highmem\n"); 139 printk("swsusp: Restoring Highmem\n");
222 while (highmem_copy) { 140 while (highmem_copy) {
@@ -238,29 +156,6 @@ static inline int save_highmem(void) {return 0;}
238static inline int restore_highmem(void) {return 0;} 156static inline int restore_highmem(void) {return 0;}
239#endif 157#endif
240 158
241unsigned int count_special_pages(void)
242{
243 return count_arch_pages() + count_highmem_pages();
244}
245
246int save_special_mem(void)
247{
248 int ret;
249 ret = save_arch_mem();
250 if (!ret)
251 ret = save_highmem();
252 return ret;
253}
254
255int restore_special_mem(void)
256{
257 int ret;
258 ret = restore_arch_mem();
259 if (!ret)
260 ret = restore_highmem();
261 return ret;
262}
263
264static int pfn_is_nosave(unsigned long pfn) 159static int pfn_is_nosave(unsigned long pfn)
265{ 160{
266 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; 161 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
@@ -286,6 +181,7 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn)
286 return 0; 181 return 0;
287 182
288 page = pfn_to_page(pfn); 183 page = pfn_to_page(pfn);
184 BUG_ON(PageReserved(page) && PageNosave(page));
289 if (PageNosave(page)) 185 if (PageNosave(page))
290 return 0; 186 return 0;
291 if (PageReserved(page) && pfn_is_nosave(pfn)) 187 if (PageReserved(page) && pfn_is_nosave(pfn))
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index f0ee4e7780..17f669c830 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -62,6 +62,16 @@ unsigned long image_size = 500 * 1024 * 1024;
62 62
63int in_suspend __nosavedata = 0; 63int in_suspend __nosavedata = 0;
64 64
65#ifdef CONFIG_HIGHMEM
66unsigned int count_highmem_pages(void);
67int save_highmem(void);
68int restore_highmem(void);
69#else
70static inline int save_highmem(void) { return 0; }
71static inline int restore_highmem(void) { return 0; }
72static inline unsigned int count_highmem_pages(void) { return 0; }
73#endif
74
65/** 75/**
66 * The following functions are used for tracing the allocated 76 * The following functions are used for tracing the allocated
67 * swap pages, so that they can be freed in case of an error. 77 * swap pages, so that they can be freed in case of an error.
@@ -182,7 +192,7 @@ int swsusp_shrink_memory(void)
182 192
183 printk("Shrinking memory... "); 193 printk("Shrinking memory... ");
184 do { 194 do {
185 size = 2 * count_special_pages(); 195 size = 2 * count_highmem_pages();
186 size += size / 50 + count_data_pages(); 196 size += size / 50 + count_data_pages();
187 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + 197 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
188 PAGES_FOR_IO; 198 PAGES_FOR_IO;
@@ -226,7 +236,7 @@ int swsusp_suspend(void)
226 goto Enable_irqs; 236 goto Enable_irqs;
227 } 237 }
228 238
229 if ((error = save_special_mem())) { 239 if ((error = save_highmem())) {
230 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); 240 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
231 goto Restore_highmem; 241 goto Restore_highmem;
232 } 242 }
@@ -237,7 +247,7 @@ int swsusp_suspend(void)
237 /* Restore control flow magically appears here */ 247 /* Restore control flow magically appears here */
238 restore_processor_state(); 248 restore_processor_state();
239Restore_highmem: 249Restore_highmem:
240 restore_special_mem(); 250 restore_highmem();
241 device_power_up(); 251 device_power_up();
242Enable_irqs: 252Enable_irqs:
243 local_irq_enable(); 253 local_irq_enable();
@@ -263,7 +273,7 @@ int swsusp_resume(void)
263 */ 273 */
264 swsusp_free(); 274 swsusp_free();
265 restore_processor_state(); 275 restore_processor_state();
266 restore_special_mem(); 276 restore_highmem();
267 touch_softlockup_watchdog(); 277 touch_softlockup_watchdog();
268 device_power_up(); 278 device_power_up();
269 local_irq_enable(); 279 local_irq_enable();
diff --git a/kernel/printk.c b/kernel/printk.c
index 19a9556192..95b7fe17f1 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -24,6 +24,7 @@
24#include <linux/console.h> 24#include <linux/console.h>
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/moduleparam.h>
27#include <linux/interrupt.h> /* For in_interrupt() */ 28#include <linux/interrupt.h> /* For in_interrupt() */
28#include <linux/config.h> 29#include <linux/config.h>
29#include <linux/delay.h> 30#include <linux/delay.h>
@@ -327,7 +328,9 @@ static void __call_console_drivers(unsigned long start, unsigned long end)
327 struct console *con; 328 struct console *con;
328 329
329 for (con = console_drivers; con; con = con->next) { 330 for (con = console_drivers; con; con = con->next) {
330 if ((con->flags & CON_ENABLED) && con->write) 331 if ((con->flags & CON_ENABLED) && con->write &&
332 (cpu_online(smp_processor_id()) ||
333 (con->flags & CON_ANYTIME)))
331 con->write(con, &LOG_BUF(start), end - start); 334 con->write(con, &LOG_BUF(start), end - start);
332 } 335 }
333} 336}
@@ -437,6 +440,7 @@ static int printk_time = 1;
437#else 440#else
438static int printk_time = 0; 441static int printk_time = 0;
439#endif 442#endif
443module_param(printk_time, int, S_IRUGO | S_IWUSR);
440 444
441static int __init printk_time_setup(char *str) 445static int __init printk_time_setup(char *str)
442{ 446{
@@ -453,6 +457,18 @@ __attribute__((weak)) unsigned long long printk_clock(void)
453 return sched_clock(); 457 return sched_clock();
454} 458}
455 459
460/* Check if we have any console registered that can be called early in boot. */
461static int have_callable_console(void)
462{
463 struct console *con;
464
465 for (con = console_drivers; con; con = con->next)
466 if (con->flags & CON_ANYTIME)
467 return 1;
468
469 return 0;
470}
471
456/** 472/**
457 * printk - print a kernel message 473 * printk - print a kernel message
458 * @fmt: format string 474 * @fmt: format string
@@ -566,27 +582,29 @@ asmlinkage int vprintk(const char *fmt, va_list args)
566 log_level_unknown = 1; 582 log_level_unknown = 1;
567 } 583 }
568 584
569 if (!cpu_online(smp_processor_id())) { 585 if (!down_trylock(&console_sem)) {
570 /* 586 /*
571 * Some console drivers may assume that per-cpu resources have 587 * We own the drivers. We can drop the spinlock and
572 * been allocated. So don't allow them to be called by this 588 * let release_console_sem() print the text, maybe ...
573 * CPU until it is officially up. We shouldn't be calling into
574 * random console drivers on a CPU which doesn't exist yet..
575 */ 589 */
590 console_locked = 1;
576 printk_cpu = UINT_MAX; 591 printk_cpu = UINT_MAX;
577 spin_unlock_irqrestore(&logbuf_lock, flags); 592 spin_unlock_irqrestore(&logbuf_lock, flags);
578 goto out; 593
579 }
580 if (!down_trylock(&console_sem)) {
581 console_locked = 1;
582 /* 594 /*
583 * We own the drivers. We can drop the spinlock and let 595 * Console drivers may assume that per-cpu resources have
584 * release_console_sem() print the text 596 * been allocated. So unless they're explicitly marked as
597 * being able to cope (CON_ANYTIME) don't call them until
598 * this CPU is officially up.
585 */ 599 */
586 printk_cpu = UINT_MAX; 600 if (cpu_online(smp_processor_id()) || have_callable_console()) {
587 spin_unlock_irqrestore(&logbuf_lock, flags); 601 console_may_schedule = 0;
588 console_may_schedule = 0; 602 release_console_sem();
589 release_console_sem(); 603 } else {
604 /* Release by hand to avoid flushing the buffer. */
605 console_locked = 0;
606 up(&console_sem);
607 }
590 } else { 608 } else {
591 /* 609 /*
592 * Someone else owns the drivers. We drop the spinlock, which 610 * Someone else owns the drivers. We drop the spinlock, which
@@ -596,7 +614,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
596 printk_cpu = UINT_MAX; 614 printk_cpu = UINT_MAX;
597 spin_unlock_irqrestore(&logbuf_lock, flags); 615 spin_unlock_irqrestore(&logbuf_lock, flags);
598 } 616 }
599out: 617
600 preempt_enable(); 618 preempt_enable();
601 return printed_len; 619 return printed_len;
602} 620}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 921c22ad16..335c5b932e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -120,8 +120,18 @@ int ptrace_check_attach(struct task_struct *child, int kill)
120 120
121static int may_attach(struct task_struct *task) 121static int may_attach(struct task_struct *task)
122{ 122{
123 if (!task->mm) 123 /* May we inspect the given task?
124 return -EPERM; 124 * This check is used both for attaching with ptrace
125 * and for allowing access to sensitive information in /proc.
126 *
127 * ptrace_attach denies several cases that /proc allows
128 * because setting up the necessary parent/child relationship
129 * or halting the specified task is impossible.
130 */
131 int dumpable = 0;
132 /* Don't let security modules deny introspection */
133 if (task == current)
134 return 0;
125 if (((current->uid != task->euid) || 135 if (((current->uid != task->euid) ||
126 (current->uid != task->suid) || 136 (current->uid != task->suid) ||
127 (current->uid != task->uid) || 137 (current->uid != task->uid) ||
@@ -130,7 +140,9 @@ static int may_attach(struct task_struct *task)
130 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) 140 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
131 return -EPERM; 141 return -EPERM;
132 smp_rmb(); 142 smp_rmb();
133 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) 143 if (task->mm)
144 dumpable = task->mm->dumpable;
145 if (!dumpable && !capable(CAP_SYS_PTRACE))
134 return -EPERM; 146 return -EPERM;
135 147
136 return security_ptrace(current, task); 148 return security_ptrace(current, task);
@@ -176,6 +188,8 @@ repeat:
176 goto repeat; 188 goto repeat;
177 } 189 }
178 190
191 if (!task->mm)
192 goto bad;
179 /* the same process cannot be attached many times */ 193 /* the same process cannot be attached many times */
180 if (task->ptrace & PT_PTRACED) 194 if (task->ptrace & PT_PTRACED)
181 goto bad; 195 goto bad;
@@ -200,7 +214,7 @@ out:
200 return retval; 214 return retval;
201} 215}
202 216
203void __ptrace_detach(struct task_struct *child, unsigned int data) 217static inline void __ptrace_detach(struct task_struct *child, unsigned int data)
204{ 218{
205 child->exit_code = data; 219 child->exit_code = data;
206 /* .. re-parent .. */ 220 /* .. re-parent .. */
@@ -219,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
219 ptrace_disable(child); 233 ptrace_disable(child);
220 234
221 write_lock_irq(&tasklist_lock); 235 write_lock_irq(&tasklist_lock);
236 /* protect against de_thread()->release_task() */
222 if (child->ptrace) 237 if (child->ptrace)
223 __ptrace_detach(child, data); 238 __ptrace_detach(child, data);
224 write_unlock_irq(&tasklist_lock); 239 write_unlock_irq(&tasklist_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index 5dbc426944..a856040c20 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -818,6 +818,11 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
818 * the target CPU. 818 * the target CPU.
819 */ 819 */
820#ifdef CONFIG_SMP 820#ifdef CONFIG_SMP
821
822#ifndef tsk_is_polling
823#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
824#endif
825
821static void resched_task(task_t *p) 826static void resched_task(task_t *p)
822{ 827{
823 int cpu; 828 int cpu;
@@ -833,9 +838,9 @@ static void resched_task(task_t *p)
833 if (cpu == smp_processor_id()) 838 if (cpu == smp_processor_id())
834 return; 839 return;
835 840
836 /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ 841 /* NEED_RESCHED must be visible before we test polling */
837 smp_mb(); 842 smp_mb();
838 if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) 843 if (!tsk_is_polling(p))
839 smp_send_reschedule(cpu); 844 smp_send_reschedule(cpu);
840} 845}
841#else 846#else
@@ -4152,7 +4157,7 @@ EXPORT_SYMBOL(yield);
4152 */ 4157 */
4153void __sched io_schedule(void) 4158void __sched io_schedule(void)
4154{ 4159{
4155 struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id()); 4160 struct runqueue *rq = &__raw_get_cpu_var(runqueues);
4156 4161
4157 atomic_inc(&rq->nr_iowait); 4162 atomic_inc(&rq->nr_iowait);
4158 schedule(); 4163 schedule();
@@ -4163,7 +4168,7 @@ EXPORT_SYMBOL(io_schedule);
4163 4168
4164long __sched io_schedule_timeout(long timeout) 4169long __sched io_schedule_timeout(long timeout)
4165{ 4170{
4166 struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id()); 4171 struct runqueue *rq = &__raw_get_cpu_var(runqueues);
4167 long ret; 4172 long ret;
4168 4173
4169 atomic_inc(&rq->nr_iowait); 4174 atomic_inc(&rq->nr_iowait);
@@ -4247,7 +4252,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4247 if (retval) 4252 if (retval)
4248 goto out_unlock; 4253 goto out_unlock;
4249 4254
4250 jiffies_to_timespec(p->policy & SCHED_FIFO ? 4255 jiffies_to_timespec(p->policy == SCHED_FIFO ?
4251 0 : task_timeslice(p), &t); 4256 0 : task_timeslice(p), &t);
4252 read_unlock(&tasklist_lock); 4257 read_unlock(&tasklist_lock);
4253 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; 4258 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
@@ -4756,6 +4761,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
4756 break; 4761 break;
4757#ifdef CONFIG_HOTPLUG_CPU 4762#ifdef CONFIG_HOTPLUG_CPU
4758 case CPU_UP_CANCELED: 4763 case CPU_UP_CANCELED:
4764 if (!cpu_rq(cpu)->migration_thread)
4765 break;
4759 /* Unbind it from offline cpu so it can run. Fall thru. */ 4766 /* Unbind it from offline cpu so it can run. Fall thru. */
4760 kthread_bind(cpu_rq(cpu)->migration_thread, 4767 kthread_bind(cpu_rq(cpu)->migration_thread,
4761 any_online_cpu(cpu_online_map)); 4768 any_online_cpu(cpu_online_map));
diff --git a/kernel/signal.c b/kernel/signal.c
index 1b3c921737..52adf53929 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1531,6 +1531,35 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1531 spin_unlock_irqrestore(&sighand->siglock, flags); 1531 spin_unlock_irqrestore(&sighand->siglock, flags);
1532} 1532}
1533 1533
1534static inline int may_ptrace_stop(void)
1535{
1536 if (!likely(current->ptrace & PT_PTRACED))
1537 return 0;
1538
1539 if (unlikely(current->parent == current->real_parent &&
1540 (current->ptrace & PT_ATTACHED)))
1541 return 0;
1542
1543 if (unlikely(current->signal == current->parent->signal) &&
1544 unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))
1545 return 0;
1546
1547 /*
1548 * Are we in the middle of do_coredump?
1549 * If so and our tracer is also part of the coredump stopping
1550 * is a deadlock situation, and pointless because our tracer
1551 * is dead so don't allow us to stop.
1552 * If SIGKILL was already sent before the caller unlocked
1553 * ->siglock we must see ->core_waiters != 0. Otherwise it
1554 * is safe to enter schedule().
1555 */
1556 if (unlikely(current->mm->core_waiters) &&
1557 unlikely(current->mm == current->parent->mm))
1558 return 0;
1559
1560 return 1;
1561}
1562
1534/* 1563/*
1535 * This must be called with current->sighand->siglock held. 1564 * This must be called with current->sighand->siglock held.
1536 * 1565 *
@@ -1559,11 +1588,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
1559 spin_unlock_irq(&current->sighand->siglock); 1588 spin_unlock_irq(&current->sighand->siglock);
1560 try_to_freeze(); 1589 try_to_freeze();
1561 read_lock(&tasklist_lock); 1590 read_lock(&tasklist_lock);
1562 if (likely(current->ptrace & PT_PTRACED) && 1591 if (may_ptrace_stop()) {
1563 likely(current->parent != current->real_parent ||
1564 !(current->ptrace & PT_ATTACHED)) &&
1565 (likely(current->parent->signal != current->signal) ||
1566 !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
1567 do_notify_parent_cldstop(current, CLD_TRAPPED); 1592 do_notify_parent_cldstop(current, CLD_TRAPPED);
1568 read_unlock(&tasklist_lock); 1593 read_unlock(&tasklist_lock);
1569 schedule(); 1594 schedule();
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 336f92d64e..9e2f1c6e73 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -470,6 +470,8 @@ static int cpu_callback(struct notifier_block *nfb,
470 break; 470 break;
471#ifdef CONFIG_HOTPLUG_CPU 471#ifdef CONFIG_HOTPLUG_CPU
472 case CPU_UP_CANCELED: 472 case CPU_UP_CANCELED:
473 if (!per_cpu(ksoftirqd, hotcpu))
474 break;
473 /* Unbind so it can run. Fall thru. */ 475 /* Unbind so it can run. Fall thru. */
474 kthread_bind(per_cpu(ksoftirqd, hotcpu), 476 kthread_bind(per_cpu(ksoftirqd, hotcpu),
475 any_online_cpu(cpu_online_map)); 477 any_online_cpu(cpu_online_map));
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 14c7faf029..b5c3b94e01 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -36,7 +36,7 @@ static struct notifier_block panic_block = {
36 36
37void touch_softlockup_watchdog(void) 37void touch_softlockup_watchdog(void)
38{ 38{
39 per_cpu(touch_timestamp, raw_smp_processor_id()) = jiffies; 39 __raw_get_cpu_var(touch_timestamp) = jiffies;
40} 40}
41EXPORT_SYMBOL(touch_softlockup_watchdog); 41EXPORT_SYMBOL(touch_softlockup_watchdog);
42 42
@@ -127,6 +127,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
127 break; 127 break;
128#ifdef CONFIG_HOTPLUG_CPU 128#ifdef CONFIG_HOTPLUG_CPU
129 case CPU_UP_CANCELED: 129 case CPU_UP_CANCELED:
130 if (!per_cpu(watchdog_task, hotcpu))
131 break;
130 /* Unbind so it can run. Fall thru. */ 132 /* Unbind so it can run. Fall thru. */
131 kthread_bind(per_cpu(watchdog_task, hotcpu), 133 kthread_bind(per_cpu(watchdog_task, hotcpu),
132 any_online_cpu(cpu_online_map)); 134 any_online_cpu(cpu_online_map));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index dcfb5d7314..2c0aacc37c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,6 +4,7 @@
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/syscalls.h> 6#include <linux/syscalls.h>
7#include <linux/kthread.h>
7#include <asm/atomic.h> 8#include <asm/atomic.h>
8#include <asm/semaphore.h> 9#include <asm/semaphore.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
@@ -25,13 +26,11 @@ static unsigned int stopmachine_num_threads;
25static atomic_t stopmachine_thread_ack; 26static atomic_t stopmachine_thread_ack;
26static DECLARE_MUTEX(stopmachine_mutex); 27static DECLARE_MUTEX(stopmachine_mutex);
27 28
28static int stopmachine(void *cpu) 29static int stopmachine(void *unused)
29{ 30{
30 int irqs_disabled = 0; 31 int irqs_disabled = 0;
31 int prepared = 0; 32 int prepared = 0;
32 33
33 set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
34
35 /* Ack: we are alive */ 34 /* Ack: we are alive */
36 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ 35 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
37 atomic_inc(&stopmachine_thread_ack); 36 atomic_inc(&stopmachine_thread_ack);
@@ -85,7 +84,8 @@ static void stopmachine_set_state(enum stopmachine_state state)
85 84
86static int stop_machine(void) 85static int stop_machine(void)
87{ 86{
88 int i, ret = 0; 87 int ret = 0;
88 unsigned int i;
89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
90 90
91 /* One high-prio thread per cpu. We'll do this one. */ 91 /* One high-prio thread per cpu. We'll do this one. */
@@ -96,11 +96,16 @@ static int stop_machine(void)
96 stopmachine_state = STOPMACHINE_WAIT; 96 stopmachine_state = STOPMACHINE_WAIT;
97 97
98 for_each_online_cpu(i) { 98 for_each_online_cpu(i) {
99 struct task_struct *tsk;
99 if (i == raw_smp_processor_id()) 100 if (i == raw_smp_processor_id())
100 continue; 101 continue;
101 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); 102 tsk = kthread_create(stopmachine, NULL, "stopmachine");
102 if (ret < 0) 103 if (IS_ERR(tsk)) {
104 ret = PTR_ERR(tsk);
103 break; 105 break;
106 }
107 kthread_bind(tsk, i);
108 wake_up_process(tsk);
104 stopmachine_num_threads++; 109 stopmachine_num_threads++;
105 } 110 }
106 111
diff --git a/kernel/sys.c b/kernel/sys.c
index 90930b28d2..2d5179c67c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -137,14 +137,15 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
137 unsigned long val, void *v) 137 unsigned long val, void *v)
138{ 138{
139 int ret = NOTIFY_DONE; 139 int ret = NOTIFY_DONE;
140 struct notifier_block *nb; 140 struct notifier_block *nb, *next_nb;
141 141
142 nb = rcu_dereference(*nl); 142 nb = rcu_dereference(*nl);
143 while (nb) { 143 while (nb) {
144 next_nb = rcu_dereference(nb->next);
144 ret = nb->notifier_call(nb, val, v); 145 ret = nb->notifier_call(nb, val, v);
145 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) 146 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
146 break; 147 break;
147 nb = rcu_dereference(nb->next); 148 nb = next_nb;
148 } 149 }
149 return ret; 150 return ret;
150} 151}
@@ -588,7 +589,7 @@ void emergency_restart(void)
588} 589}
589EXPORT_SYMBOL_GPL(emergency_restart); 590EXPORT_SYMBOL_GPL(emergency_restart);
590 591
591void kernel_restart_prepare(char *cmd) 592static void kernel_restart_prepare(char *cmd)
592{ 593{
593 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 594 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
594 system_state = SYSTEM_RESTART; 595 system_state = SYSTEM_RESTART;
@@ -622,7 +623,7 @@ EXPORT_SYMBOL_GPL(kernel_restart);
622 * Move into place and start executing a preloaded standalone 623 * Move into place and start executing a preloaded standalone
623 * executable. If nothing was preloaded return an error. 624 * executable. If nothing was preloaded return an error.
624 */ 625 */
625void kernel_kexec(void) 626static void kernel_kexec(void)
626{ 627{
627#ifdef CONFIG_KEXEC 628#ifdef CONFIG_KEXEC
628 struct kimage *image; 629 struct kimage *image;
@@ -636,7 +637,6 @@ void kernel_kexec(void)
636 machine_kexec(image); 637 machine_kexec(image);
637#endif 638#endif
638} 639}
639EXPORT_SYMBOL_GPL(kernel_kexec);
640 640
641void kernel_shutdown_prepare(enum system_states state) 641void kernel_shutdown_prepare(enum system_states state)
642{ 642{
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index eb8bd214e7..f1a4eb1a65 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -73,6 +73,7 @@ extern int printk_ratelimit_burst;
73extern int pid_max_min, pid_max_max; 73extern int pid_max_min, pid_max_max;
74extern int sysctl_drop_caches; 74extern int sysctl_drop_caches;
75extern int percpu_pagelist_fraction; 75extern int percpu_pagelist_fraction;
76extern int compat_log;
76 77
77#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 78#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
78int unknown_nmi_panic; 79int unknown_nmi_panic;
@@ -143,7 +144,6 @@ static struct ctl_table_header root_table_header =
143 144
144static ctl_table kern_table[]; 145static ctl_table kern_table[];
145static ctl_table vm_table[]; 146static ctl_table vm_table[];
146static ctl_table proc_table[];
147static ctl_table fs_table[]; 147static ctl_table fs_table[];
148static ctl_table debug_table[]; 148static ctl_table debug_table[];
149static ctl_table dev_table[]; 149static ctl_table dev_table[];
@@ -203,12 +203,6 @@ static ctl_table root_table[] = {
203 }, 203 },
204#endif 204#endif
205 { 205 {
206 .ctl_name = CTL_PROC,
207 .procname = "proc",
208 .mode = 0555,
209 .child = proc_table,
210 },
211 {
212 .ctl_name = CTL_FS, 206 .ctl_name = CTL_FS,
213 .procname = "fs", 207 .procname = "fs",
214 .mode = 0555, 208 .mode = 0555,
@@ -684,6 +678,16 @@ static ctl_table kern_table[] = {
684 .proc_handler = &proc_dointvec, 678 .proc_handler = &proc_dointvec,
685 }, 679 },
686#endif 680#endif
681#ifdef CONFIG_COMPAT
682 {
683 .ctl_name = KERN_COMPAT_LOG,
684 .procname = "compat-log",
685 .data = &compat_log,
686 .maxlen = sizeof (int),
687 .mode = 0644,
688 .proc_handler = &proc_dointvec,
689 },
690#endif
687 { .ctl_name = 0 } 691 { .ctl_name = 0 }
688}; 692};
689 693
@@ -927,10 +931,6 @@ static ctl_table vm_table[] = {
927 { .ctl_name = 0 } 931 { .ctl_name = 0 }
928}; 932};
929 933
930static ctl_table proc_table[] = {
931 { .ctl_name = 0 }
932};
933
934static ctl_table fs_table[] = { 934static ctl_table fs_table[] = {
935 { 935 {
936 .ctl_name = FS_NRINODE, 936 .ctl_name = FS_NRINODE,
diff --git a/kernel/time.c b/kernel/time.c
index b00ddc71ce..5bd4897476 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -523,6 +523,7 @@ EXPORT_SYMBOL(do_gettimeofday);
523 523
524 524
525#else 525#else
526#ifndef CONFIG_GENERIC_TIME
526/* 527/*
527 * Simulate gettimeofday using do_gettimeofday which only allows a timeval 528 * Simulate gettimeofday using do_gettimeofday which only allows a timeval
528 * and therefore only yields usec accuracy 529 * and therefore only yields usec accuracy
@@ -537,6 +538,7 @@ void getnstimeofday(struct timespec *tv)
537} 538}
538EXPORT_SYMBOL_GPL(getnstimeofday); 539EXPORT_SYMBOL_GPL(getnstimeofday);
539#endif 540#endif
541#endif
540 542
541/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. 543/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
542 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 544 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
new file mode 100644
index 0000000000..e1dfd8e86c
--- /dev/null
+++ b/kernel/time/Makefile
@@ -0,0 +1 @@
obj-y += clocksource.o jiffies.o
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
new file mode 100644
index 0000000000..74eca5939b
--- /dev/null
+++ b/kernel/time/clocksource.c
@@ -0,0 +1,349 @@
1/*
2 * linux/kernel/time/clocksource.c
3 *
4 * This file contains the functions which manage clocksource drivers.
5 *
6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 *
22 * TODO WishList:
23 * o Allow clocksource drivers to be unregistered
24 * o get rid of clocksource_jiffies extern
25 */
26
27#include <linux/clocksource.h>
28#include <linux/sysdev.h>
29#include <linux/init.h>
30#include <linux/module.h>
31
32/* XXX - Would like a better way for initializing curr_clocksource */
33extern struct clocksource clocksource_jiffies;
34
35/*[Clocksource internal variables]---------
36 * curr_clocksource:
37 * currently selected clocksource. Initialized to clocksource_jiffies.
38 * next_clocksource:
39 * pending next selected clocksource.
40 * clocksource_list:
41 * linked list with the registered clocksources
42 * clocksource_lock:
43 * protects manipulations to curr_clocksource and next_clocksource
44 * and the clocksource_list
45 * override_name:
46 * Name of the user-specified clocksource.
47 */
48static struct clocksource *curr_clocksource = &clocksource_jiffies;
49static struct clocksource *next_clocksource;
50static LIST_HEAD(clocksource_list);
51static DEFINE_SPINLOCK(clocksource_lock);
52static char override_name[32];
53static int finished_booting;
54
55/* clocksource_done_booting - Called near the end of bootup
56 *
57 * Hack to avoid lots of clocksource churn at boot time
58 */
59static int __init clocksource_done_booting(void)
60{
61 finished_booting = 1;
62 return 0;
63}
64
65late_initcall(clocksource_done_booting);
66
67/**
68 * clocksource_get_next - Returns the selected clocksource
69 *
70 */
71struct clocksource *clocksource_get_next(void)
72{
73 unsigned long flags;
74
75 spin_lock_irqsave(&clocksource_lock, flags);
76 if (next_clocksource && finished_booting) {
77 curr_clocksource = next_clocksource;
78 next_clocksource = NULL;
79 }
80 spin_unlock_irqrestore(&clocksource_lock, flags);
81
82 return curr_clocksource;
83}
84
85/**
86 * select_clocksource - Finds the best registered clocksource.
87 *
88 * Private function. Must hold clocksource_lock when called.
89 *
90 * Looks through the list of registered clocksources, returning
91 * the one with the highest rating value. If there is a clocksource
92 * name that matches the override string, it returns that clocksource.
93 */
94static struct clocksource *select_clocksource(void)
95{
96 struct clocksource *best = NULL;
97 struct list_head *tmp;
98
99 list_for_each(tmp, &clocksource_list) {
100 struct clocksource *src;
101
102 src = list_entry(tmp, struct clocksource, list);
103 if (!best)
104 best = src;
105
106 /* check for override: */
107 if (strlen(src->name) == strlen(override_name) &&
108 !strcmp(src->name, override_name)) {
109 best = src;
110 break;
111 }
112 /* pick the highest rating: */
113 if (src->rating > best->rating)
114 best = src;
115 }
116
117 return best;
118}
119
120/**
121 * is_registered_source - Checks if clocksource is registered
122 * @c: pointer to a clocksource
123 *
124 * Private helper function. Must hold clocksource_lock when called.
125 *
126 * Returns one if the clocksource is already registered, zero otherwise.
127 */
128static int is_registered_source(struct clocksource *c)
129{
130 int len = strlen(c->name);
131 struct list_head *tmp;
132
133 list_for_each(tmp, &clocksource_list) {
134 struct clocksource *src;
135
136 src = list_entry(tmp, struct clocksource, list);
137 if (strlen(src->name) == len && !strcmp(src->name, c->name))
138 return 1;
139 }
140
141 return 0;
142}
143
144/**
145 * clocksource_register - Used to install new clocksources
146 * @t: clocksource to be registered
147 *
148 * Returns -EBUSY if registration fails, zero otherwise.
149 */
150int clocksource_register(struct clocksource *c)
151{
152 int ret = 0;
153 unsigned long flags;
154
155 spin_lock_irqsave(&clocksource_lock, flags);
156 /* check if clocksource is already registered */
157 if (is_registered_source(c)) {
158 printk("register_clocksource: Cannot register %s. "
159 "Already registered!", c->name);
160 ret = -EBUSY;
161 } else {
162 /* register it */
163 list_add(&c->list, &clocksource_list);
164 /* scan the registered clocksources, and pick the best one */
165 next_clocksource = select_clocksource();
166 }
167 spin_unlock_irqrestore(&clocksource_lock, flags);
168 return ret;
169}
170EXPORT_SYMBOL(clocksource_register);
171
172/**
173 * clocksource_reselect - Rescan list for next clocksource
174 *
175 * A quick helper function to be used if a clocksource changes its
176 * rating. Forces the clocksource list to be re-scanned for the best
177 * clocksource.
178 */
179void clocksource_reselect(void)
180{
181 unsigned long flags;
182
183 spin_lock_irqsave(&clocksource_lock, flags);
184 next_clocksource = select_clocksource();
185 spin_unlock_irqrestore(&clocksource_lock, flags);
186}
187EXPORT_SYMBOL(clocksource_reselect);
188
189/**
190 * sysfs_show_current_clocksources - sysfs interface for current clocksource
191 * @dev: unused
192 * @buf: char buffer to be filled with clocksource list
193 *
194 * Provides sysfs interface for listing current clocksource.
195 */
196static ssize_t
197sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
198{
199 char *curr = buf;
200
201 spin_lock_irq(&clocksource_lock);
202 curr += sprintf(curr, "%s ", curr_clocksource->name);
203 spin_unlock_irq(&clocksource_lock);
204
205 curr += sprintf(curr, "\n");
206
207 return curr - buf;
208}
209
210/**
211 * sysfs_override_clocksource - interface for manually overriding clocksource
212 * @dev: unused
213 * @buf: name of override clocksource
214 * @count: length of buffer
215 *
216 * Takes input from sysfs interface for manually overriding the default
217 * clocksource selction.
218 */
219static ssize_t sysfs_override_clocksource(struct sys_device *dev,
220 const char *buf, size_t count)
221{
222 size_t ret = count;
223 /* strings from sysfs write are not 0 terminated! */
224 if (count >= sizeof(override_name))
225 return -EINVAL;
226
227 /* strip of \n: */
228 if (buf[count-1] == '\n')
229 count--;
230 if (count < 1)
231 return -EINVAL;
232
233 spin_lock_irq(&clocksource_lock);
234
235 /* copy the name given: */
236 memcpy(override_name, buf, count);
237 override_name[count] = 0;
238
239 /* try to select it: */
240 next_clocksource = select_clocksource();
241
242 spin_unlock_irq(&clocksource_lock);
243
244 return ret;
245}
246
247/**
248 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
249 * @dev: unused
250 * @buf: char buffer to be filled with clocksource list
251 *
252 * Provides sysfs interface for listing registered clocksources
253 */
254static ssize_t
255sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
256{
257 struct list_head *tmp;
258 char *curr = buf;
259
260 spin_lock_irq(&clocksource_lock);
261 list_for_each(tmp, &clocksource_list) {
262 struct clocksource *src;
263
264 src = list_entry(tmp, struct clocksource, list);
265 curr += sprintf(curr, "%s ", src->name);
266 }
267 spin_unlock_irq(&clocksource_lock);
268
269 curr += sprintf(curr, "\n");
270
271 return curr - buf;
272}
273
274/*
275 * Sysfs setup bits:
276 */
277static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
278 sysfs_override_clocksource);
279
280static SYSDEV_ATTR(available_clocksource, 0600,
281 sysfs_show_available_clocksources, NULL);
282
283static struct sysdev_class clocksource_sysclass = {
284 set_kset_name("clocksource"),
285};
286
287static struct sys_device device_clocksource = {
288 .id = 0,
289 .cls = &clocksource_sysclass,
290};
291
292static int __init init_clocksource_sysfs(void)
293{
294 int error = sysdev_class_register(&clocksource_sysclass);
295
296 if (!error)
297 error = sysdev_register(&device_clocksource);
298 if (!error)
299 error = sysdev_create_file(
300 &device_clocksource,
301 &attr_current_clocksource);
302 if (!error)
303 error = sysdev_create_file(
304 &device_clocksource,
305 &attr_available_clocksource);
306 return error;
307}
308
309device_initcall(init_clocksource_sysfs);
310
311/**
312 * boot_override_clocksource - boot clock override
313 * @str: override name
314 *
315 * Takes a clocksource= boot argument and uses it
316 * as the clocksource override name.
317 */
318static int __init boot_override_clocksource(char* str)
319{
320 unsigned long flags;
321 spin_lock_irqsave(&clocksource_lock, flags);
322 if (str)
323 strlcpy(override_name, str, sizeof(override_name));
324 spin_unlock_irqrestore(&clocksource_lock, flags);
325 return 1;
326}
327
328__setup("clocksource=", boot_override_clocksource);
329
330/**
331 * boot_override_clock - Compatibility layer for deprecated boot option
332 * @str: override name
333 *
334 * DEPRECATED! Takes a clock= boot argument and uses it
335 * as the clocksource override name
336 */
337static int __init boot_override_clock(char* str)
338{
339 if (!strcmp(str, "pmtmr")) {
340 printk("Warning: clock=pmtmr is deprecated. "
341 "Use clocksource=acpi_pm.\n");
342 return boot_override_clocksource("acpi_pm");
343 }
344 printk("Warning! clock= boot option is deprecated. "
345 "Use clocksource=xyz\n");
346 return boot_override_clocksource(str);
347}
348
349__setup("clock=", boot_override_clock);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
new file mode 100644
index 0000000000..126bb30c4a
--- /dev/null
+++ b/kernel/time/jiffies.c
@@ -0,0 +1,73 @@
1/***********************************************************************
2* linux/kernel/time/jiffies.c
3*
4* This file contains the jiffies based clocksource.
5*
6* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
7*
8* This program is free software; you can redistribute it and/or modify
9* it under the terms of the GNU General Public License as published by
10* the Free Software Foundation; either version 2 of the License, or
11* (at your option) any later version.
12*
13* This program is distributed in the hope that it will be useful,
14* but WITHOUT ANY WARRANTY; without even the implied warranty of
15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16* GNU General Public License for more details.
17*
18* You should have received a copy of the GNU General Public License
19* along with this program; if not, write to the Free Software
20* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21*
22************************************************************************/
23#include <linux/clocksource.h>
24#include <linux/jiffies.h>
25#include <linux/init.h>
26
27/* The Jiffies based clocksource is the lowest common
28 * denominator clock source which should function on
29 * all systems. It has the same coarse resolution as
30 * the timer interrupt frequency HZ and it suffers
31 * inaccuracies caused by missed or lost timer
32 * interrupts and the inability for the timer
33 * interrupt hardware to accuratly tick at the
34 * requested HZ value. It is also not reccomended
35 * for "tick-less" systems.
36 */
37#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
38
39/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
40 * conversion, the .shift value could be zero. However
41 * this would make NTP adjustments impossible as they are
42 * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
43 * shift both the nominator and denominator the same
44 * amount, and give ntp adjustments in units of 1/2^8
45 *
46 * The value 8 is somewhat carefully chosen, as anything
47 * larger can result in overflows. NSEC_PER_JIFFY grows as
48 * HZ shrinks, so values greater then 8 overflow 32bits when
49 * HZ=100.
50 */
51#define JIFFIES_SHIFT 8
52
53static cycle_t jiffies_read(void)
54{
55 return (cycle_t) jiffies;
56}
57
58struct clocksource clocksource_jiffies = {
59 .name = "jiffies",
60 .rating = 0, /* lowest rating*/
61 .read = jiffies_read,
62 .mask = 0xffffffff, /*32bits*/
63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
64 .shift = JIFFIES_SHIFT,
65 .is_continuous = 0, /* tick based, not free running */
66};
67
68static int __init init_jiffies_clocksource(void)
69{
70 return clocksource_register(&clocksource_jiffies);
71}
72
73module_init(init_jiffies_clocksource);
diff --git a/kernel/timer.c b/kernel/timer.c
index f35b3939e9..5bb6b7976e 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -146,7 +146,7 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
146void fastcall init_timer(struct timer_list *timer) 146void fastcall init_timer(struct timer_list *timer)
147{ 147{
148 timer->entry.next = NULL; 148 timer->entry.next = NULL;
149 timer->base = per_cpu(tvec_bases, raw_smp_processor_id()); 149 timer->base = __raw_get_cpu_var(tvec_bases);
150} 150}
151EXPORT_SYMBOL(init_timer); 151EXPORT_SYMBOL(init_timer);
152 152
@@ -597,7 +597,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
597long time_precision = 1; /* clock precision (us) */ 597long time_precision = 1; /* clock precision (us) */
598long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ 598long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
599long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ 599long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
600static long time_phase; /* phase offset (scaled us) */
601long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; 600long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
602 /* frequency offset (scaled ppm)*/ 601 /* frequency offset (scaled ppm)*/
603static long time_adj; /* tick adjust (scaled 1 / HZ) */ 602static long time_adj; /* tick adjust (scaled 1 / HZ) */
@@ -747,27 +746,14 @@ static long adjtime_adjustment(void)
747} 746}
748 747
749/* in the NTP reference this is called "hardclock()" */ 748/* in the NTP reference this is called "hardclock()" */
750static void update_wall_time_one_tick(void) 749static void update_ntp_one_tick(void)
751{ 750{
752 long time_adjust_step, delta_nsec; 751 long time_adjust_step;
753 752
754 time_adjust_step = adjtime_adjustment(); 753 time_adjust_step = adjtime_adjustment();
755 if (time_adjust_step) 754 if (time_adjust_step)
756 /* Reduce by this step the amount of time left */ 755 /* Reduce by this step the amount of time left */
757 time_adjust -= time_adjust_step; 756 time_adjust -= time_adjust_step;
758 delta_nsec = tick_nsec + time_adjust_step * 1000;
759 /*
760 * Advance the phase, once it gets to one microsecond, then
761 * advance the tick more.
762 */
763 time_phase += time_adj;
764 if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
765 long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
766 time_phase -= ltemp << (SHIFT_SCALE - 10);
767 delta_nsec += ltemp;
768 }
769 xtime.tv_nsec += delta_nsec;
770 time_interpolator_update(delta_nsec);
771 757
772 /* Changes by adjtime() do not take effect till next tick. */ 758 /* Changes by adjtime() do not take effect till next tick. */
773 if (time_next_adjust != 0) { 759 if (time_next_adjust != 0) {
@@ -780,36 +766,378 @@ static void update_wall_time_one_tick(void)
780 * Return how long ticks are at the moment, that is, how much time 766 * Return how long ticks are at the moment, that is, how much time
781 * update_wall_time_one_tick will add to xtime next time we call it 767 * update_wall_time_one_tick will add to xtime next time we call it
782 * (assuming no calls to do_adjtimex in the meantime). 768 * (assuming no calls to do_adjtimex in the meantime).
783 * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 769 * The return value is in fixed-point nanoseconds shifted by the
784 * bits to the right of the binary point. 770 * specified number of bits to the right of the binary point.
785 * This function has no side-effects. 771 * This function has no side-effects.
786 */ 772 */
787u64 current_tick_length(void) 773u64 current_tick_length(void)
788{ 774{
789 long delta_nsec; 775 long delta_nsec;
776 u64 ret;
790 777
778 /* calculate the finest interval NTP will allow.
779 * ie: nanosecond value shifted by (SHIFT_SCALE - 10)
780 */
791 delta_nsec = tick_nsec + adjtime_adjustment() * 1000; 781 delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
792 return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; 782 ret = (u64)delta_nsec << TICK_LENGTH_SHIFT;
783 ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10));
784
785 return ret;
793} 786}
794 787
795/* 788/* XXX - all of this timekeeping code should be later moved to time.c */
796 * Using a loop looks inefficient, but "ticks" is 789#include <linux/clocksource.h>
797 * usually just one (we shouldn't be losing ticks, 790static struct clocksource *clock; /* pointer to current clocksource */
798 * we're doing this this way mainly for interrupt 791
799 * latency reasons, not because we think we'll 792#ifdef CONFIG_GENERIC_TIME
800 * have lots of lost timer ticks 793/**
794 * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
795 *
796 * private function, must hold xtime_lock lock when being
797 * called. Returns the number of nanoseconds since the
798 * last call to update_wall_time() (adjusted by NTP scaling)
799 */
800static inline s64 __get_nsec_offset(void)
801{
802 cycle_t cycle_now, cycle_delta;
803 s64 ns_offset;
804
805 /* read clocksource: */
806 cycle_now = clocksource_read(clock);
807
808 /* calculate the delta since the last update_wall_time: */
809 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
810
811 /* convert to nanoseconds: */
812 ns_offset = cyc2ns(clock, cycle_delta);
813
814 return ns_offset;
815}
816
817/**
818 * __get_realtime_clock_ts - Returns the time of day in a timespec
819 * @ts: pointer to the timespec to be set
820 *
821 * Returns the time of day in a timespec. Used by
822 * do_gettimeofday() and get_realtime_clock_ts().
801 */ 823 */
802static void update_wall_time(unsigned long ticks) 824static inline void __get_realtime_clock_ts(struct timespec *ts)
803{ 825{
826 unsigned long seq;
827 s64 nsecs;
828
829 do {
830 seq = read_seqbegin(&xtime_lock);
831
832 *ts = xtime;
833 nsecs = __get_nsec_offset();
834
835 } while (read_seqretry(&xtime_lock, seq));
836
837 timespec_add_ns(ts, nsecs);
838}
839
840/**
841 * getnstimeofday - Returns the time of day in a timespec
842 * @ts: pointer to the timespec to be set
843 *
844 * Returns the time of day in a timespec.
845 */
846void getnstimeofday(struct timespec *ts)
847{
848 __get_realtime_clock_ts(ts);
849}
850
851EXPORT_SYMBOL(getnstimeofday);
852
853/**
854 * do_gettimeofday - Returns the time of day in a timeval
855 * @tv: pointer to the timeval to be set
856 *
857 * NOTE: Users should be converted to using get_realtime_clock_ts()
858 */
859void do_gettimeofday(struct timeval *tv)
860{
861 struct timespec now;
862
863 __get_realtime_clock_ts(&now);
864 tv->tv_sec = now.tv_sec;
865 tv->tv_usec = now.tv_nsec/1000;
866}
867
868EXPORT_SYMBOL(do_gettimeofday);
869/**
870 * do_settimeofday - Sets the time of day
871 * @tv: pointer to the timespec variable containing the new time
872 *
873 * Sets the time of day to the new time and update NTP and notify hrtimers
874 */
875int do_settimeofday(struct timespec *tv)
876{
877 unsigned long flags;
878 time_t wtm_sec, sec = tv->tv_sec;
879 long wtm_nsec, nsec = tv->tv_nsec;
880
881 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
882 return -EINVAL;
883
884 write_seqlock_irqsave(&xtime_lock, flags);
885
886 nsec -= __get_nsec_offset();
887
888 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
889 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
890
891 set_normalized_timespec(&xtime, sec, nsec);
892 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
893
894 ntp_clear();
895
896 write_sequnlock_irqrestore(&xtime_lock, flags);
897
898 /* signal hrtimers about time change */
899 clock_was_set();
900
901 return 0;
902}
903
904EXPORT_SYMBOL(do_settimeofday);
905
906/**
907 * change_clocksource - Swaps clocksources if a new one is available
908 *
909 * Accumulates current time interval and initializes new clocksource
910 */
911static int change_clocksource(void)
912{
913 struct clocksource *new;
914 cycle_t now;
915 u64 nsec;
916 new = clocksource_get_next();
917 if (clock != new) {
918 now = clocksource_read(new);
919 nsec = __get_nsec_offset();
920 timespec_add_ns(&xtime, nsec);
921
922 clock = new;
923 clock->cycle_last = now;
924 printk(KERN_INFO "Time: %s clocksource has been installed.\n",
925 clock->name);
926 return 1;
927 } else if (clock->update_callback) {
928 return clock->update_callback();
929 }
930 return 0;
931}
932#else
933#define change_clocksource() (0)
934#endif
935
936/**
937 * timeofday_is_continuous - check to see if timekeeping is free running
938 */
939int timekeeping_is_continuous(void)
940{
941 unsigned long seq;
942 int ret;
943
804 do { 944 do {
805 ticks--; 945 seq = read_seqbegin(&xtime_lock);
806 update_wall_time_one_tick(); 946
807 if (xtime.tv_nsec >= 1000000000) { 947 ret = clock->is_continuous;
808 xtime.tv_nsec -= 1000000000; 948
949 } while (read_seqretry(&xtime_lock, seq));
950
951 return ret;
952}
953
954/*
955 * timekeeping_init - Initializes the clocksource and common timekeeping values
956 */
957void __init timekeeping_init(void)
958{
959 unsigned long flags;
960
961 write_seqlock_irqsave(&xtime_lock, flags);
962 clock = clocksource_get_next();
963 clocksource_calculate_interval(clock, tick_nsec);
964 clock->cycle_last = clocksource_read(clock);
965 ntp_clear();
966 write_sequnlock_irqrestore(&xtime_lock, flags);
967}
968
969
970/*
971 * timekeeping_resume - Resumes the generic timekeeping subsystem.
972 * @dev: unused
973 *
974 * This is for the generic clocksource timekeeping.
975 * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are
976 * still managed by arch specific suspend/resume code.
977 */
978static int timekeeping_resume(struct sys_device *dev)
979{
980 unsigned long flags;
981
982 write_seqlock_irqsave(&xtime_lock, flags);
983 /* restart the last cycle value */
984 clock->cycle_last = clocksource_read(clock);
985 write_sequnlock_irqrestore(&xtime_lock, flags);
986 return 0;
987}
988
989/* sysfs resume/suspend bits for timekeeping */
990static struct sysdev_class timekeeping_sysclass = {
991 .resume = timekeeping_resume,
992 set_kset_name("timekeeping"),
993};
994
995static struct sys_device device_timer = {
996 .id = 0,
997 .cls = &timekeeping_sysclass,
998};
999
1000static int __init timekeeping_init_device(void)
1001{
1002 int error = sysdev_class_register(&timekeeping_sysclass);
1003 if (!error)
1004 error = sysdev_register(&device_timer);
1005 return error;
1006}
1007
1008device_initcall(timekeeping_init_device);
1009
1010/*
1011 * If the error is already larger, we look ahead another tick,
1012 * to compensate for late or lost adjustments.
1013 */
1014static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset)
1015{
1016 int adj;
1017
1018 /*
1019 * As soon as the machine is synchronized to the external time
1020 * source this should be the common case.
1021 */
1022 error >>= 2;
1023 if (likely(sign > 0 ? error <= *interval : error >= *interval))
1024 return sign;
1025
1026 /*
1027 * An extra look ahead dampens the effect of the current error,
1028 * which can grow quite large with continously late updates, as
1029 * it would dominate the adjustment value and can lead to
1030 * oscillation.
1031 */
1032 error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
1033 error -= clock->xtime_interval >> 1;
1034
1035 adj = 0;
1036 while (1) {
1037 error >>= 1;
1038 if (sign > 0 ? error <= *interval : error >= *interval)
1039 break;
1040 adj++;
1041 }
1042
1043 /*
1044 * Add the current adjustments to the error and take the offset
1045 * into account, the latter can cause the error to be hardly
1046 * reduced at the next tick. Check the error again if there's
1047 * room for another adjustment, thus further reducing the error
1048 * which otherwise had to be corrected at the next update.
1049 */
1050 error = (error << 1) - *interval + *offset;
1051 if (sign > 0 ? error > *interval : error < *interval)
1052 adj++;
1053
1054 *interval <<= adj;
1055 *offset <<= adj;
1056 return sign << adj;
1057}
1058
1059/*
1060 * Adjust the multiplier to reduce the error value,
1061 * this is optimized for the most common adjustments of -1,0,1,
1062 * for other values we can do a bit more work.
1063 */
1064static void clocksource_adjust(struct clocksource *clock, s64 offset)
1065{
1066 s64 error, interval = clock->cycle_interval;
1067 int adj;
1068
1069 error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1);
1070 if (error > interval) {
1071 adj = clocksource_bigadjust(1, error, &interval, &offset);
1072 } else if (error < -interval) {
1073 interval = -interval;
1074 offset = -offset;
1075 adj = clocksource_bigadjust(-1, error, &interval, &offset);
1076 } else
1077 return;
1078
1079 clock->mult += adj;
1080 clock->xtime_interval += interval;
1081 clock->xtime_nsec -= offset;
1082 clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
1083}
1084
1085/*
1086 * update_wall_time - Uses the current clocksource to increment the wall time
1087 *
1088 * Called from the timer interrupt, must hold a write on xtime_lock.
1089 */
1090static void update_wall_time(void)
1091{
1092 cycle_t offset;
1093
1094 clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
1095
1096#ifdef CONFIG_GENERIC_TIME
1097 offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
1098#else
1099 offset = clock->cycle_interval;
1100#endif
1101
1102 /* normally this loop will run just once, however in the
1103 * case of lost or late ticks, it will accumulate correctly.
1104 */
1105 while (offset >= clock->cycle_interval) {
1106 /* accumulate one interval */
1107 clock->xtime_nsec += clock->xtime_interval;
1108 clock->cycle_last += clock->cycle_interval;
1109 offset -= clock->cycle_interval;
1110
1111 if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
1112 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
809 xtime.tv_sec++; 1113 xtime.tv_sec++;
810 second_overflow(); 1114 second_overflow();
811 } 1115 }
812 } while (ticks); 1116
1117 /* interpolator bits */
1118 time_interpolator_update(clock->xtime_interval
1119 >> clock->shift);
1120 /* increment the NTP state machine */
1121 update_ntp_one_tick();
1122
1123 /* accumulate error between NTP and clock interval */
1124 clock->error += current_tick_length();
1125 clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
1126 }
1127
1128 /* correct the clock when NTP error is too big */
1129 clocksource_adjust(clock, offset);
1130
1131 /* store full nanoseconds into xtime */
1132 xtime.tv_nsec = clock->xtime_nsec >> clock->shift;
1133 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
1134
1135 /* check to see if there is a new clocksource to use */
1136 if (change_clocksource()) {
1137 clock->error = 0;
1138 clock->xtime_nsec = 0;
1139 clocksource_calculate_interval(clock, tick_nsec);
1140 }
813} 1141}
814 1142
815/* 1143/*
@@ -915,10 +1243,8 @@ static inline void update_times(void)
915 unsigned long ticks; 1243 unsigned long ticks;
916 1244
917 ticks = jiffies - wall_jiffies; 1245 ticks = jiffies - wall_jiffies;
918 if (ticks) { 1246 wall_jiffies += ticks;
919 wall_jiffies += ticks; 1247 update_wall_time();
920 update_wall_time(ticks);
921 }
922 calc_load(ticks); 1248 calc_load(ticks);
923} 1249}
924 1250
diff --git a/kernel/unwind.c b/kernel/unwind.c
new file mode 100644
index 0000000000..f69c804c8e
--- /dev/null
+++ b/kernel/unwind.c
@@ -0,0 +1,918 @@
1/*
2 * Copyright (C) 2002-2006 Novell, Inc.
3 * Jan Beulich <jbeulich@novell.com>
4 * This code is released under version 2 of the GNU GPL.
5 *
6 * A simple API for unwinding kernel stacks. This is used for
7 * debugging and error reporting purposes. The kernel doesn't need
8 * full-blown stack unwinding with all the bells and whistles, so there
9 * is not much point in implementing the full Dwarf2 unwind API.
10 */
11
12#include <linux/unwind.h>
13#include <linux/module.h>
14#include <linux/delay.h>
15#include <linux/stop_machine.h>
16#include <asm/sections.h>
17#include <asm/uaccess.h>
18#include <asm/unaligned.h>
19
20extern char __start_unwind[], __end_unwind[];
21
22#define MAX_STACK_DEPTH 8
23
24#define EXTRA_INFO(f) { \
25 BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
26 % FIELD_SIZEOF(struct unwind_frame_info, f)) \
27 + offsetof(struct unwind_frame_info, f) \
28 / FIELD_SIZEOF(struct unwind_frame_info, f), \
29 FIELD_SIZEOF(struct unwind_frame_info, f) \
30 }
31#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
32
33static const struct {
34 unsigned offs:BITS_PER_LONG / 2;
35 unsigned width:BITS_PER_LONG / 2;
36} reg_info[] = {
37 UNW_REGISTER_INFO
38};
39
40#undef PTREGS_INFO
41#undef EXTRA_INFO
42
43#ifndef REG_INVALID
44#define REG_INVALID(r) (reg_info[r].width == 0)
45#endif
46
47#define DW_CFA_nop 0x00
48#define DW_CFA_set_loc 0x01
49#define DW_CFA_advance_loc1 0x02
50#define DW_CFA_advance_loc2 0x03
51#define DW_CFA_advance_loc4 0x04
52#define DW_CFA_offset_extended 0x05
53#define DW_CFA_restore_extended 0x06
54#define DW_CFA_undefined 0x07
55#define DW_CFA_same_value 0x08
56#define DW_CFA_register 0x09
57#define DW_CFA_remember_state 0x0a
58#define DW_CFA_restore_state 0x0b
59#define DW_CFA_def_cfa 0x0c
60#define DW_CFA_def_cfa_register 0x0d
61#define DW_CFA_def_cfa_offset 0x0e
62#define DW_CFA_def_cfa_expression 0x0f
63#define DW_CFA_expression 0x10
64#define DW_CFA_offset_extended_sf 0x11
65#define DW_CFA_def_cfa_sf 0x12
66#define DW_CFA_def_cfa_offset_sf 0x13
67#define DW_CFA_val_offset 0x14
68#define DW_CFA_val_offset_sf 0x15
69#define DW_CFA_val_expression 0x16
70#define DW_CFA_lo_user 0x1c
71#define DW_CFA_GNU_window_save 0x2d
72#define DW_CFA_GNU_args_size 0x2e
73#define DW_CFA_GNU_negative_offset_extended 0x2f
74#define DW_CFA_hi_user 0x3f
75
76#define DW_EH_PE_FORM 0x07
77#define DW_EH_PE_native 0x00
78#define DW_EH_PE_leb128 0x01
79#define DW_EH_PE_data2 0x02
80#define DW_EH_PE_data4 0x03
81#define DW_EH_PE_data8 0x04
82#define DW_EH_PE_signed 0x08
83#define DW_EH_PE_ADJUST 0x70
84#define DW_EH_PE_abs 0x00
85#define DW_EH_PE_pcrel 0x10
86#define DW_EH_PE_textrel 0x20
87#define DW_EH_PE_datarel 0x30
88#define DW_EH_PE_funcrel 0x40
89#define DW_EH_PE_aligned 0x50
90#define DW_EH_PE_indirect 0x80
91#define DW_EH_PE_omit 0xff
92
93typedef unsigned long uleb128_t;
94typedef signed long sleb128_t;
95
96static struct unwind_table {
97 struct {
98 unsigned long pc;
99 unsigned long range;
100 } core, init;
101 const void *address;
102 unsigned long size;
103 struct unwind_table *link;
104 const char *name;
105} root_table, *last_table;
106
107struct unwind_item {
108 enum item_location {
109 Nowhere,
110 Memory,
111 Register,
112 Value
113 } where;
114 uleb128_t value;
115};
116
117struct unwind_state {
118 uleb128_t loc, org;
119 const u8 *cieStart, *cieEnd;
120 uleb128_t codeAlign;
121 sleb128_t dataAlign;
122 struct cfa {
123 uleb128_t reg, offs;
124 } cfa;
125 struct unwind_item regs[ARRAY_SIZE(reg_info)];
126 unsigned stackDepth:8;
127 unsigned version:8;
128 const u8 *label;
129 const u8 *stack[MAX_STACK_DEPTH];
130};
131
132static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
133
134static struct unwind_table *find_table(unsigned long pc)
135{
136 struct unwind_table *table;
137
138 for (table = &root_table; table; table = table->link)
139 if ((pc >= table->core.pc
140 && pc < table->core.pc + table->core.range)
141 || (pc >= table->init.pc
142 && pc < table->init.pc + table->init.range))
143 break;
144
145 return table;
146}
147
148static void init_unwind_table(struct unwind_table *table,
149 const char *name,
150 const void *core_start,
151 unsigned long core_size,
152 const void *init_start,
153 unsigned long init_size,
154 const void *table_start,
155 unsigned long table_size)
156{
157 table->core.pc = (unsigned long)core_start;
158 table->core.range = core_size;
159 table->init.pc = (unsigned long)init_start;
160 table->init.range = init_size;
161 table->address = table_start;
162 table->size = table_size;
163 table->link = NULL;
164 table->name = name;
165}
166
167void __init unwind_init(void)
168{
169 init_unwind_table(&root_table, "kernel",
170 _text, _end - _text,
171 NULL, 0,
172 __start_unwind, __end_unwind - __start_unwind);
173}
174
175#ifdef CONFIG_MODULES
176
177/* Must be called with module_mutex held. */
178void *unwind_add_table(struct module *module,
179 const void *table_start,
180 unsigned long table_size)
181{
182 struct unwind_table *table;
183
184 if (table_size <= 0)
185 return NULL;
186
187 table = kmalloc(sizeof(*table), GFP_KERNEL);
188 if (!table)
189 return NULL;
190
191 init_unwind_table(table, module->name,
192 module->module_core, module->core_size,
193 module->module_init, module->init_size,
194 table_start, table_size);
195
196 if (last_table)
197 last_table->link = table;
198 else
199 root_table.link = table;
200 last_table = table;
201
202 return table;
203}
204
205struct unlink_table_info
206{
207 struct unwind_table *table;
208 int init_only;
209};
210
211static int unlink_table(void *arg)
212{
213 struct unlink_table_info *info = arg;
214 struct unwind_table *table = info->table, *prev;
215
216 for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
217 ;
218
219 if (prev->link) {
220 if (info->init_only) {
221 table->init.pc = 0;
222 table->init.range = 0;
223 info->table = NULL;
224 } else {
225 prev->link = table->link;
226 if (!prev->link)
227 last_table = prev;
228 }
229 } else
230 info->table = NULL;
231
232 return 0;
233}
234
235/* Must be called with module_mutex held. */
236void unwind_remove_table(void *handle, int init_only)
237{
238 struct unwind_table *table = handle;
239 struct unlink_table_info info;
240
241 if (!table || table == &root_table)
242 return;
243
244 if (init_only && table == last_table) {
245 table->init.pc = 0;
246 table->init.range = 0;
247 return;
248 }
249
250 info.table = table;
251 info.init_only = init_only;
252 stop_machine_run(unlink_table, &info, NR_CPUS);
253
254 if (info.table)
255 kfree(table);
256}
257
258#endif /* CONFIG_MODULES */
259
260static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
261{
262 const u8 *cur = *pcur;
263 uleb128_t value;
264 unsigned shift;
265
266 for (shift = 0, value = 0; cur < end; shift += 7) {
267 if (shift + 7 > 8 * sizeof(value)
268 && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
269 cur = end + 1;
270 break;
271 }
272 value |= (uleb128_t)(*cur & 0x7f) << shift;
273 if (!(*cur++ & 0x80))
274 break;
275 }
276 *pcur = cur;
277
278 return value;
279}
280
281static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
282{
283 const u8 *cur = *pcur;
284 sleb128_t value;
285 unsigned shift;
286
287 for (shift = 0, value = 0; cur < end; shift += 7) {
288 if (shift + 7 > 8 * sizeof(value)
289 && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
290 cur = end + 1;
291 break;
292 }
293 value |= (sleb128_t)(*cur & 0x7f) << shift;
294 if (!(*cur & 0x80)) {
295 value |= -(*cur++ & 0x40) << shift;
296 break;
297 }
298 }
299 *pcur = cur;
300
301 return value;
302}
303
304static unsigned long read_pointer(const u8 **pLoc,
305 const void *end,
306 signed ptrType)
307{
308 unsigned long value = 0;
309 union {
310 const u8 *p8;
311 const u16 *p16u;
312 const s16 *p16s;
313 const u32 *p32u;
314 const s32 *p32s;
315 const unsigned long *pul;
316 } ptr;
317
318 if (ptrType < 0 || ptrType == DW_EH_PE_omit)
319 return 0;
320 ptr.p8 = *pLoc;
321 switch(ptrType & DW_EH_PE_FORM) {
322 case DW_EH_PE_data2:
323 if (end < (const void *)(ptr.p16u + 1))
324 return 0;
325 if(ptrType & DW_EH_PE_signed)
326 value = get_unaligned(ptr.p16s++);
327 else
328 value = get_unaligned(ptr.p16u++);
329 break;
330 case DW_EH_PE_data4:
331#ifdef CONFIG_64BIT
332 if (end < (const void *)(ptr.p32u + 1))
333 return 0;
334 if(ptrType & DW_EH_PE_signed)
335 value = get_unaligned(ptr.p32s++);
336 else
337 value = get_unaligned(ptr.p32u++);
338 break;
339 case DW_EH_PE_data8:
340 BUILD_BUG_ON(sizeof(u64) != sizeof(value));
341#else
342 BUILD_BUG_ON(sizeof(u32) != sizeof(value));
343#endif
344 case DW_EH_PE_native:
345 if (end < (const void *)(ptr.pul + 1))
346 return 0;
347 value = get_unaligned(ptr.pul++);
348 break;
349 case DW_EH_PE_leb128:
350 BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
351 value = ptrType & DW_EH_PE_signed
352 ? get_sleb128(&ptr.p8, end)
353 : get_uleb128(&ptr.p8, end);
354 if ((const void *)ptr.p8 > end)
355 return 0;
356 break;
357 default:
358 return 0;
359 }
360 switch(ptrType & DW_EH_PE_ADJUST) {
361 case DW_EH_PE_abs:
362 break;
363 case DW_EH_PE_pcrel:
364 value += (unsigned long)*pLoc;
365 break;
366 default:
367 return 0;
368 }
369 if ((ptrType & DW_EH_PE_indirect)
370 && __get_user(value, (unsigned long *)value))
371 return 0;
372 *pLoc = ptr.p8;
373
374 return value;
375}
376
377static signed fde_pointer_type(const u32 *cie)
378{
379 const u8 *ptr = (const u8 *)(cie + 2);
380 unsigned version = *ptr;
381
382 if (version != 1)
383 return -1; /* unsupported */
384 if (*++ptr) {
385 const char *aug;
386 const u8 *end = (const u8 *)(cie + 1) + *cie;
387 uleb128_t len;
388
389 /* check if augmentation size is first (and thus present) */
390 if (*ptr != 'z')
391 return -1;
392 /* check if augmentation string is nul-terminated */
393 if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
394 return -1;
395 ++ptr; /* skip terminator */
396 get_uleb128(&ptr, end); /* skip code alignment */
397 get_sleb128(&ptr, end); /* skip data alignment */
398 /* skip return address column */
399 version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
400 len = get_uleb128(&ptr, end); /* augmentation length */
401 if (ptr + len < ptr || ptr + len > end)
402 return -1;
403 end = ptr + len;
404 while (*++aug) {
405 if (ptr >= end)
406 return -1;
407 switch(*aug) {
408 case 'L':
409 ++ptr;
410 break;
411 case 'P': {
412 signed ptrType = *ptr++;
413
414 if (!read_pointer(&ptr, end, ptrType) || ptr > end)
415 return -1;
416 }
417 break;
418 case 'R':
419 return *ptr;
420 default:
421 return -1;
422 }
423 }
424 }
425 return DW_EH_PE_native|DW_EH_PE_abs;
426}
427
428static int advance_loc(unsigned long delta, struct unwind_state *state)
429{
430 state->loc += delta * state->codeAlign;
431
432 return delta > 0;
433}
434
435static void set_rule(uleb128_t reg,
436 enum item_location where,
437 uleb128_t value,
438 struct unwind_state *state)
439{
440 if (reg < ARRAY_SIZE(state->regs)) {
441 state->regs[reg].where = where;
442 state->regs[reg].value = value;
443 }
444}
445
446static int processCFI(const u8 *start,
447 const u8 *end,
448 unsigned long targetLoc,
449 signed ptrType,
450 struct unwind_state *state)
451{
452 union {
453 const u8 *p8;
454 const u16 *p16;
455 const u32 *p32;
456 } ptr;
457 int result = 1;
458
459 if (start != state->cieStart) {
460 state->loc = state->org;
461 result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
462 if (targetLoc == 0 && state->label == NULL)
463 return result;
464 }
465 for (ptr.p8 = start; result && ptr.p8 < end; ) {
466 switch(*ptr.p8 >> 6) {
467 uleb128_t value;
468
469 case 0:
470 switch(*ptr.p8++) {
471 case DW_CFA_nop:
472 break;
473 case DW_CFA_set_loc:
474 if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0)
475 result = 0;
476 break;
477 case DW_CFA_advance_loc1:
478 result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
479 break;
480 case DW_CFA_advance_loc2:
481 result = ptr.p8 <= end + 2
482 && advance_loc(*ptr.p16++, state);
483 break;
484 case DW_CFA_advance_loc4:
485 result = ptr.p8 <= end + 4
486 && advance_loc(*ptr.p32++, state);
487 break;
488 case DW_CFA_offset_extended:
489 value = get_uleb128(&ptr.p8, end);
490 set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
491 break;
492 case DW_CFA_val_offset:
493 value = get_uleb128(&ptr.p8, end);
494 set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
495 break;
496 case DW_CFA_offset_extended_sf:
497 value = get_uleb128(&ptr.p8, end);
498 set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
499 break;
500 case DW_CFA_val_offset_sf:
501 value = get_uleb128(&ptr.p8, end);
502 set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
503 break;
504 case DW_CFA_restore_extended:
505 case DW_CFA_undefined:
506 case DW_CFA_same_value:
507 set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
508 break;
509 case DW_CFA_register:
510 value = get_uleb128(&ptr.p8, end);
511 set_rule(value,
512 Register,
513 get_uleb128(&ptr.p8, end), state);
514 break;
515 case DW_CFA_remember_state:
516 if (ptr.p8 == state->label) {
517 state->label = NULL;
518 return 1;
519 }
520 if (state->stackDepth >= MAX_STACK_DEPTH)
521 return 0;
522 state->stack[state->stackDepth++] = ptr.p8;
523 break;
524 case DW_CFA_restore_state:
525 if (state->stackDepth) {
526 const uleb128_t loc = state->loc;
527 const u8 *label = state->label;
528
529 state->label = state->stack[state->stackDepth - 1];
530 memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
531 memset(state->regs, 0, sizeof(state->regs));
532 state->stackDepth = 0;
533 result = processCFI(start, end, 0, ptrType, state);
534 state->loc = loc;
535 state->label = label;
536 } else
537 return 0;
538 break;
539 case DW_CFA_def_cfa:
540 state->cfa.reg = get_uleb128(&ptr.p8, end);
541 /*nobreak*/
542 case DW_CFA_def_cfa_offset:
543 state->cfa.offs = get_uleb128(&ptr.p8, end);
544 break;
545 case DW_CFA_def_cfa_sf:
546 state->cfa.reg = get_uleb128(&ptr.p8, end);
547 /*nobreak*/
548 case DW_CFA_def_cfa_offset_sf:
549 state->cfa.offs = get_sleb128(&ptr.p8, end)
550 * state->dataAlign;
551 break;
552 case DW_CFA_def_cfa_register:
553 state->cfa.reg = get_uleb128(&ptr.p8, end);
554 break;
555 /*todo case DW_CFA_def_cfa_expression: */
556 /*todo case DW_CFA_expression: */
557 /*todo case DW_CFA_val_expression: */
558 case DW_CFA_GNU_args_size:
559 get_uleb128(&ptr.p8, end);
560 break;
561 case DW_CFA_GNU_negative_offset_extended:
562 value = get_uleb128(&ptr.p8, end);
563 set_rule(value,
564 Memory,
565 (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
566 break;
567 case DW_CFA_GNU_window_save:
568 default:
569 result = 0;
570 break;
571 }
572 break;
573 case 1:
574 result = advance_loc(*ptr.p8++ & 0x3f, state);
575 break;
576 case 2:
577 value = *ptr.p8++ & 0x3f;
578 set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
579 break;
580 case 3:
581 set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
582 break;
583 }
584 if (ptr.p8 > end)
585 result = 0;
586 if (result && targetLoc != 0 && targetLoc < state->loc)
587 return 1;
588 }
589
590 return result
591 && ptr.p8 == end
592 && (targetLoc == 0
593 || (/*todo While in theory this should apply, gcc in practice omits
594 everything past the function prolog, and hence the location
595 never reaches the end of the function.
596 targetLoc < state->loc &&*/ state->label == NULL));
597}
598
599/* Unwind to previous to frame. Returns 0 if successful, negative
600 * number in case of an error. */
601int unwind(struct unwind_frame_info *frame)
602{
603#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
604 const u32 *fde = NULL, *cie = NULL;
605 const u8 *ptr = NULL, *end = NULL;
606 unsigned long startLoc = 0, endLoc = 0, cfa;
607 unsigned i;
608 signed ptrType = -1;
609 uleb128_t retAddrReg = 0;
610 struct unwind_table *table;
611 struct unwind_state state;
612
613 if (UNW_PC(frame) == 0)
614 return -EINVAL;
615 if ((table = find_table(UNW_PC(frame))) != NULL
616 && !(table->size & (sizeof(*fde) - 1))) {
617 unsigned long tableSize = table->size;
618
619 for (fde = table->address;
620 tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
621 tableSize -= sizeof(*fde) + *fde,
622 fde += 1 + *fde / sizeof(*fde)) {
623 if (!*fde || (*fde & (sizeof(*fde) - 1)))
624 break;
625 if (!fde[1])
626 continue; /* this is a CIE */
627 if ((fde[1] & (sizeof(*fde) - 1))
628 || fde[1] > (unsigned long)(fde + 1)
629 - (unsigned long)table->address)
630 continue; /* this is not a valid FDE */
631 cie = fde + 1 - fde[1] / sizeof(*fde);
632 if (*cie <= sizeof(*cie) + 4
633 || *cie >= fde[1] - sizeof(*fde)
634 || (*cie & (sizeof(*cie) - 1))
635 || cie[1]
636 || (ptrType = fde_pointer_type(cie)) < 0) {
637 cie = NULL; /* this is not a (valid) CIE */
638 continue;
639 }
640 ptr = (const u8 *)(fde + 2);
641 startLoc = read_pointer(&ptr,
642 (const u8 *)(fde + 1) + *fde,
643 ptrType);
644 endLoc = startLoc
645 + read_pointer(&ptr,
646 (const u8 *)(fde + 1) + *fde,
647 ptrType & DW_EH_PE_indirect
648 ? ptrType
649 : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
650 if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc)
651 break;
652 cie = NULL;
653 }
654 }
655 if (cie != NULL) {
656 memset(&state, 0, sizeof(state));
657 state.cieEnd = ptr; /* keep here temporarily */
658 ptr = (const u8 *)(cie + 2);
659 end = (const u8 *)(cie + 1) + *cie;
660 if ((state.version = *ptr) != 1)
661 cie = NULL; /* unsupported version */
662 else if (*++ptr) {
663 /* check if augmentation size is first (and thus present) */
664 if (*ptr == 'z') {
665 /* check for ignorable (or already handled)
666 * nul-terminated augmentation string */
667 while (++ptr < end && *ptr)
668 if (strchr("LPR", *ptr) == NULL)
669 break;
670 }
671 if (ptr >= end || *ptr)
672 cie = NULL;
673 }
674 ++ptr;
675 }
676 if (cie != NULL) {
677 /* get code aligment factor */
678 state.codeAlign = get_uleb128(&ptr, end);
679 /* get data aligment factor */
680 state.dataAlign = get_sleb128(&ptr, end);
681 if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
682 cie = NULL;
683 else {
684 retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
685 /* skip augmentation */
686 if (((const char *)(cie + 2))[1] == 'z')
687 ptr += get_uleb128(&ptr, end);
688 if (ptr > end
689 || retAddrReg >= ARRAY_SIZE(reg_info)
690 || REG_INVALID(retAddrReg)
691 || reg_info[retAddrReg].width != sizeof(unsigned long))
692 cie = NULL;
693 }
694 }
695 if (cie != NULL) {
696 state.cieStart = ptr;
697 ptr = state.cieEnd;
698 state.cieEnd = end;
699 end = (const u8 *)(fde + 1) + *fde;
700 /* skip augmentation */
701 if (((const char *)(cie + 2))[1] == 'z') {
702 uleb128_t augSize = get_uleb128(&ptr, end);
703
704 if ((ptr += augSize) > end)
705 fde = NULL;
706 }
707 }
708 if (cie == NULL || fde == NULL) {
709#ifdef CONFIG_FRAME_POINTER
710 unsigned long top, bottom;
711#endif
712
713#ifdef CONFIG_FRAME_POINTER
714 top = STACK_TOP(frame->task);
715 bottom = STACK_BOTTOM(frame->task);
716# if FRAME_RETADDR_OFFSET < 0
717 if (UNW_SP(frame) < top
718 && UNW_FP(frame) <= UNW_SP(frame)
719 && bottom < UNW_FP(frame)
720# else
721 if (UNW_SP(frame) > top
722 && UNW_FP(frame) >= UNW_SP(frame)
723 && bottom > UNW_FP(frame)
724# endif
725 && !((UNW_SP(frame) | UNW_FP(frame))
726 & (sizeof(unsigned long) - 1))) {
727 unsigned long link;
728
729 if (!__get_user(link,
730 (unsigned long *)(UNW_FP(frame)
731 + FRAME_LINK_OFFSET))
732# if FRAME_RETADDR_OFFSET < 0
733 && link > bottom && link < UNW_FP(frame)
734# else
735 && link > UNW_FP(frame) && link < bottom
736# endif
737 && !(link & (sizeof(link) - 1))
738 && !__get_user(UNW_PC(frame),
739 (unsigned long *)(UNW_FP(frame)
740 + FRAME_RETADDR_OFFSET))) {
741 UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
742# if FRAME_RETADDR_OFFSET < 0
743 -
744# else
745 +
746# endif
747 sizeof(UNW_PC(frame));
748 UNW_FP(frame) = link;
749 return 0;
750 }
751 }
752#endif
753 return -ENXIO;
754 }
755 state.org = startLoc;
756 memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
757 /* process instructions */
758 if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state)
759 || state.loc > endLoc
760 || state.regs[retAddrReg].where == Nowhere
761 || state.cfa.reg >= ARRAY_SIZE(reg_info)
762 || reg_info[state.cfa.reg].width != sizeof(unsigned long)
763 || state.cfa.offs % sizeof(unsigned long))
764 return -EIO;
765 /* update frame */
766 cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
767 startLoc = min((unsigned long)UNW_SP(frame), cfa);
768 endLoc = max((unsigned long)UNW_SP(frame), cfa);
769 if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
770 startLoc = min(STACK_LIMIT(cfa), cfa);
771 endLoc = max(STACK_LIMIT(cfa), cfa);
772 }
773#ifndef CONFIG_64BIT
774# define CASES CASE(8); CASE(16); CASE(32)
775#else
776# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
777#endif
778 for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
779 if (REG_INVALID(i)) {
780 if (state.regs[i].where == Nowhere)
781 continue;
782 return -EIO;
783 }
784 switch(state.regs[i].where) {
785 default:
786 break;
787 case Register:
788 if (state.regs[i].value >= ARRAY_SIZE(reg_info)
789 || REG_INVALID(state.regs[i].value)
790 || reg_info[i].width > reg_info[state.regs[i].value].width)
791 return -EIO;
792 switch(reg_info[state.regs[i].value].width) {
793#define CASE(n) \
794 case sizeof(u##n): \
795 state.regs[i].value = FRAME_REG(state.regs[i].value, \
796 const u##n); \
797 break
798 CASES;
799#undef CASE
800 default:
801 return -EIO;
802 }
803 break;
804 }
805 }
806 for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
807 if (REG_INVALID(i))
808 continue;
809 switch(state.regs[i].where) {
810 case Nowhere:
811 if (reg_info[i].width != sizeof(UNW_SP(frame))
812 || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
813 != &UNW_SP(frame))
814 continue;
815 UNW_SP(frame) = cfa;
816 break;
817 case Register:
818 switch(reg_info[i].width) {
819#define CASE(n) case sizeof(u##n): \
820 FRAME_REG(i, u##n) = state.regs[i].value; \
821 break
822 CASES;
823#undef CASE
824 default:
825 return -EIO;
826 }
827 break;
828 case Value:
829 if (reg_info[i].width != sizeof(unsigned long))
830 return -EIO;
831 FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
832 * state.dataAlign;
833 break;
834 case Memory: {
835 unsigned long addr = cfa + state.regs[i].value
836 * state.dataAlign;
837
838 if ((state.regs[i].value * state.dataAlign)
839 % sizeof(unsigned long)
840 || addr < startLoc
841 || addr + sizeof(unsigned long) < addr
842 || addr + sizeof(unsigned long) > endLoc)
843 return -EIO;
844 switch(reg_info[i].width) {
845#define CASE(n) case sizeof(u##n): \
846 __get_user(FRAME_REG(i, u##n), (u##n *)addr); \
847 break
848 CASES;
849#undef CASE
850 default:
851 return -EIO;
852 }
853 }
854 break;
855 }
856 }
857
858 return 0;
859#undef CASES
860#undef FRAME_REG
861}
862EXPORT_SYMBOL(unwind);
863
864int unwind_init_frame_info(struct unwind_frame_info *info,
865 struct task_struct *tsk,
866 /*const*/ struct pt_regs *regs)
867{
868 info->task = tsk;
869 arch_unw_init_frame_info(info, regs);
870
871 return 0;
872}
873EXPORT_SYMBOL(unwind_init_frame_info);
874
875/*
876 * Prepare to unwind a blocked task.
877 */
878int unwind_init_blocked(struct unwind_frame_info *info,
879 struct task_struct *tsk)
880{
881 info->task = tsk;
882 arch_unw_init_blocked(info);
883
884 return 0;
885}
886EXPORT_SYMBOL(unwind_init_blocked);
887
888/*
889 * Prepare to unwind the currently running thread.
890 */
891int unwind_init_running(struct unwind_frame_info *info,
892 asmlinkage int (*callback)(struct unwind_frame_info *,
893 void *arg),
894 void *arg)
895{
896 info->task = current;
897
898 return arch_unwind_init_running(info, callback, arg);
899}
900EXPORT_SYMBOL(unwind_init_running);
901
902/*
903 * Unwind until the return pointer is in user-land (or until an error
904 * occurs). Returns 0 if successful, negative number in case of
905 * error.
906 */
907int unwind_to_user(struct unwind_frame_info *info)
908{
909 while (!arch_unw_user_mode(info)) {
910 int err = unwind(info);
911
912 if (err < 0)
913 return err;
914 }
915
916 return 0;
917}
918EXPORT_SYMBOL(unwind_to_user);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 740c5abceb..565cf7a1fe 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -428,22 +428,34 @@ int schedule_delayed_work_on(int cpu,
428 return ret; 428 return ret;
429} 429}
430 430
431int schedule_on_each_cpu(void (*func) (void *info), void *info) 431/**
432 * schedule_on_each_cpu - call a function on each online CPU from keventd
433 * @func: the function to call
434 * @info: a pointer to pass to func()
435 *
436 * Returns zero on success.
437 * Returns -ve errno on failure.
438 *
439 * Appears to be racy against CPU hotplug.
440 *
441 * schedule_on_each_cpu() is very slow.
442 */
443int schedule_on_each_cpu(void (*func)(void *info), void *info)
432{ 444{
433 int cpu; 445 int cpu;
434 struct work_struct *work; 446 struct work_struct *works;
435 447
436 work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL); 448 works = alloc_percpu(struct work_struct);
437 449 if (!works)
438 if (!work)
439 return -ENOMEM; 450 return -ENOMEM;
451
440 for_each_online_cpu(cpu) { 452 for_each_online_cpu(cpu) {
441 INIT_WORK(work + cpu, func, info); 453 INIT_WORK(per_cpu_ptr(works, cpu), func, info);
442 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), 454 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
443 work + cpu); 455 per_cpu_ptr(works, cpu));
444 } 456 }
445 flush_workqueue(keventd_wq); 457 flush_workqueue(keventd_wq);
446 kfree(work); 458 free_percpu(works);
447 return 0; 459 return 0;
448} 460}
449 461
@@ -578,6 +590,8 @@ static int workqueue_cpu_callback(struct notifier_block *nfb,
578 590
579 case CPU_UP_CANCELED: 591 case CPU_UP_CANCELED:
580 list_for_each_entry(wq, &workqueues, list) { 592 list_for_each_entry(wq, &workqueues, list) {
593 if (!per_cpu_ptr(wq->cpu_wq, hotcpu)->thread)
594 continue;
581 /* Unbind so it can run. */ 595 /* Unbind so it can run. */
582 kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, 596 kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
583 any_online_cpu(cpu_online_map)); 597 any_online_cpu(cpu_online_map));