diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 5 | ||||
-rw-r--r-- | kernel/acct.c | 30 | ||||
-rw-r--r-- | kernel/cpuset.c | 8 | ||||
-rw-r--r-- | kernel/dma.c | 10 | ||||
-rw-r--r-- | kernel/exit.c | 13 | ||||
-rw-r--r-- | kernel/fork.c | 82 | ||||
-rw-r--r-- | kernel/futex.c | 2 | ||||
-rw-r--r-- | kernel/kallsyms.c | 124 | ||||
-rw-r--r-- | kernel/kmod.c | 62 | ||||
-rw-r--r-- | kernel/kprobes.c | 53 | ||||
-rw-r--r-- | kernel/latency.c | 279 | ||||
-rw-r--r-- | kernel/lockdep.c | 6 | ||||
-rw-r--r-- | kernel/module.c | 40 | ||||
-rw-r--r-- | kernel/nsproxy.c | 139 | ||||
-rw-r--r-- | kernel/panic.c | 1 | ||||
-rw-r--r-- | kernel/pid.c | 111 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 10 | ||||
-rw-r--r-- | kernel/resource.c | 83 | ||||
-rw-r--r-- | kernel/sched.c | 326 | ||||
-rw-r--r-- | kernel/signal.c | 65 | ||||
-rw-r--r-- | kernel/spinlock.c | 4 | ||||
-rw-r--r-- | kernel/sys.c | 110 | ||||
-rw-r--r-- | kernel/sys_ni.c | 5 | ||||
-rw-r--r-- | kernel/sysctl.c | 363 | ||||
-rw-r--r-- | kernel/taskstats.c | 10 | ||||
-rw-r--r-- | kernel/time.c | 173 | ||||
-rw-r--r-- | kernel/time/Makefile | 2 | ||||
-rw-r--r-- | kernel/time/ntp.c | 350 | ||||
-rw-r--r-- | kernel/timer.c | 230 | ||||
-rw-r--r-- | kernel/tsacct.c | 124 | ||||
-rw-r--r-- | kernel/utsname.c | 95 |
31 files changed, 2034 insertions, 881 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d62ec66c1a..d948ca12ac 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o | 11 | hrtimer.o rwsem.o latency.o nsproxy.o |
12 | 12 | ||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
14 | obj-y += time/ | 14 | obj-y += time/ |
@@ -48,8 +48,9 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | |||
48 | obj-$(CONFIG_SECCOMP) += seccomp.o | 48 | obj-$(CONFIG_SECCOMP) += seccomp.o |
49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
50 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
51 | obj-$(CONFIG_UTS_NS) += utsname.o | ||
51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 52 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
52 | obj-$(CONFIG_TASKSTATS) += taskstats.o | 53 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
53 | 54 | ||
54 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 55 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
55 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 56 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/acct.c b/kernel/acct.c index f4330acead..0aad5ca36a 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -602,33 +602,3 @@ void acct_process(void) | |||
602 | do_acct_process(file); | 602 | do_acct_process(file); |
603 | fput(file); | 603 | fput(file); |
604 | } | 604 | } |
605 | |||
606 | |||
607 | /** | ||
608 | * acct_update_integrals - update mm integral fields in task_struct | ||
609 | * @tsk: task_struct for accounting | ||
610 | */ | ||
611 | void acct_update_integrals(struct task_struct *tsk) | ||
612 | { | ||
613 | if (likely(tsk->mm)) { | ||
614 | long delta = | ||
615 | cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; | ||
616 | |||
617 | if (delta == 0) | ||
618 | return; | ||
619 | tsk->acct_stimexpd = tsk->stime; | ||
620 | tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); | ||
621 | tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; | ||
622 | } | ||
623 | } | ||
624 | |||
625 | /** | ||
626 | * acct_clear_integrals - clear the mm integral fields in task_struct | ||
627 | * @tsk: task_struct whose accounting fields are cleared | ||
628 | */ | ||
629 | void acct_clear_integrals(struct task_struct *tsk) | ||
630 | { | ||
631 | tsk->acct_stimexpd = 0; | ||
632 | tsk->acct_rss_mem1 = 0; | ||
633 | tsk->acct_vm_mem1 = 0; | ||
634 | } | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8c3c400cce..9d850ae13b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -377,7 +377,7 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data, | |||
377 | inode->i_op = &simple_dir_inode_operations; | 377 | inode->i_op = &simple_dir_inode_operations; |
378 | inode->i_fop = &simple_dir_operations; | 378 | inode->i_fop = &simple_dir_operations; |
379 | /* directories start off with i_nlink == 2 (for "." entry) */ | 379 | /* directories start off with i_nlink == 2 (for "." entry) */ |
380 | inode->i_nlink++; | 380 | inc_nlink(inode); |
381 | } else { | 381 | } else { |
382 | return -ENOMEM; | 382 | return -ENOMEM; |
383 | } | 383 | } |
@@ -1565,7 +1565,7 @@ static int cpuset_create_file(struct dentry *dentry, int mode) | |||
1565 | inode->i_fop = &simple_dir_operations; | 1565 | inode->i_fop = &simple_dir_operations; |
1566 | 1566 | ||
1567 | /* start off with i_nlink == 2 (for "." entry) */ | 1567 | /* start off with i_nlink == 2 (for "." entry) */ |
1568 | inode->i_nlink++; | 1568 | inc_nlink(inode); |
1569 | } else if (S_ISREG(mode)) { | 1569 | } else if (S_ISREG(mode)) { |
1570 | inode->i_size = 0; | 1570 | inode->i_size = 0; |
1571 | inode->i_fop = &cpuset_file_operations; | 1571 | inode->i_fop = &cpuset_file_operations; |
@@ -1598,7 +1598,7 @@ static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode) | |||
1598 | error = cpuset_create_file(dentry, S_IFDIR | mode); | 1598 | error = cpuset_create_file(dentry, S_IFDIR | mode); |
1599 | if (!error) { | 1599 | if (!error) { |
1600 | dentry->d_fsdata = cs; | 1600 | dentry->d_fsdata = cs; |
1601 | parent->d_inode->i_nlink++; | 1601 | inc_nlink(parent->d_inode); |
1602 | cs->dentry = dentry; | 1602 | cs->dentry = dentry; |
1603 | } | 1603 | } |
1604 | dput(dentry); | 1604 | dput(dentry); |
@@ -2033,7 +2033,7 @@ int __init cpuset_init(void) | |||
2033 | } | 2033 | } |
2034 | root = cpuset_mount->mnt_sb->s_root; | 2034 | root = cpuset_mount->mnt_sb->s_root; |
2035 | root->d_fsdata = &top_cpuset; | 2035 | root->d_fsdata = &top_cpuset; |
2036 | root->d_inode->i_nlink++; | 2036 | inc_nlink(root->d_inode); |
2037 | top_cpuset.dentry = root; | 2037 | top_cpuset.dentry = root; |
2038 | root->d_inode->i_op = &cpuset_dir_inode_operations; | 2038 | root->d_inode->i_op = &cpuset_dir_inode_operations; |
2039 | number_of_cpusets = 1; | 2039 | number_of_cpusets = 1; |
diff --git a/kernel/dma.c b/kernel/dma.c index aef0a45b78..2020644c93 100644 --- a/kernel/dma.c +++ b/kernel/dma.c | |||
@@ -62,6 +62,11 @@ static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = { | |||
62 | }; | 62 | }; |
63 | 63 | ||
64 | 64 | ||
65 | /** | ||
66 | * request_dma - request and reserve a system DMA channel | ||
67 | * @dmanr: DMA channel number | ||
68 | * @device_id: reserving device ID string, used in /proc/dma | ||
69 | */ | ||
65 | int request_dma(unsigned int dmanr, const char * device_id) | 70 | int request_dma(unsigned int dmanr, const char * device_id) |
66 | { | 71 | { |
67 | if (dmanr >= MAX_DMA_CHANNELS) | 72 | if (dmanr >= MAX_DMA_CHANNELS) |
@@ -76,7 +81,10 @@ int request_dma(unsigned int dmanr, const char * device_id) | |||
76 | return 0; | 81 | return 0; |
77 | } /* request_dma */ | 82 | } /* request_dma */ |
78 | 83 | ||
79 | 84 | /** | |
85 | * free_dma - free a reserved system DMA channel | ||
86 | * @dmanr: DMA channel number | ||
87 | */ | ||
80 | void free_dma(unsigned int dmanr) | 88 | void free_dma(unsigned int dmanr) |
81 | { | 89 | { |
82 | if (dmanr >= MAX_DMA_CHANNELS) { | 90 | if (dmanr >= MAX_DMA_CHANNELS) { |
diff --git a/kernel/exit.c b/kernel/exit.c index 2e4c13cba9..f250a5e3e2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -18,8 +18,10 @@ | |||
18 | #include <linux/security.h> | 18 | #include <linux/security.h> |
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/acct.h> | 20 | #include <linux/acct.h> |
21 | #include <linux/tsacct_kern.h> | ||
21 | #include <linux/file.h> | 22 | #include <linux/file.h> |
22 | #include <linux/binfmts.h> | 23 | #include <linux/binfmts.h> |
24 | #include <linux/nsproxy.h> | ||
23 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
24 | #include <linux/profile.h> | 26 | #include <linux/profile.h> |
25 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
@@ -38,6 +40,7 @@ | |||
38 | #include <linux/pipe_fs_i.h> | 40 | #include <linux/pipe_fs_i.h> |
39 | #include <linux/audit.h> /* for audit_free() */ | 41 | #include <linux/audit.h> /* for audit_free() */ |
40 | #include <linux/resource.h> | 42 | #include <linux/resource.h> |
43 | #include <linux/blkdev.h> | ||
41 | 44 | ||
42 | #include <asm/uaccess.h> | 45 | #include <asm/uaccess.h> |
43 | #include <asm/unistd.h> | 46 | #include <asm/unistd.h> |
@@ -395,9 +398,11 @@ void daemonize(const char *name, ...) | |||
395 | fs = init_task.fs; | 398 | fs = init_task.fs; |
396 | current->fs = fs; | 399 | current->fs = fs; |
397 | atomic_inc(&fs->count); | 400 | atomic_inc(&fs->count); |
398 | exit_namespace(current); | 401 | |
399 | current->namespace = init_task.namespace; | 402 | exit_task_namespaces(current); |
400 | get_namespace(current->namespace); | 403 | current->nsproxy = init_task.nsproxy; |
404 | get_task_namespaces(current); | ||
405 | |||
401 | exit_files(current); | 406 | exit_files(current); |
402 | current->files = init_task.files; | 407 | current->files = init_task.files; |
403 | atomic_inc(¤t->files->count); | 408 | atomic_inc(¤t->files->count); |
@@ -915,7 +920,6 @@ fastcall NORET_TYPE void do_exit(long code) | |||
915 | exit_sem(tsk); | 920 | exit_sem(tsk); |
916 | __exit_files(tsk); | 921 | __exit_files(tsk); |
917 | __exit_fs(tsk); | 922 | __exit_fs(tsk); |
918 | exit_namespace(tsk); | ||
919 | exit_thread(); | 923 | exit_thread(); |
920 | cpuset_exit(tsk); | 924 | cpuset_exit(tsk); |
921 | exit_keys(tsk); | 925 | exit_keys(tsk); |
@@ -930,6 +934,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
930 | tsk->exit_code = code; | 934 | tsk->exit_code = code; |
931 | proc_exit_connector(tsk); | 935 | proc_exit_connector(tsk); |
932 | exit_notify(tsk); | 936 | exit_notify(tsk); |
937 | exit_task_namespaces(tsk); | ||
933 | #ifdef CONFIG_NUMA | 938 | #ifdef CONFIG_NUMA |
934 | mpol_free(tsk->mempolicy); | 939 | mpol_free(tsk->mempolicy); |
935 | tsk->mempolicy = NULL; | 940 | tsk->mempolicy = NULL; |
diff --git a/kernel/fork.c b/kernel/fork.c index 1c999f3e0b..7dc6140baa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/binfmts.h> | 27 | #include <linux/binfmts.h> |
28 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/nsproxy.h> | ||
30 | #include <linux/capability.h> | 31 | #include <linux/capability.h> |
31 | #include <linux/cpu.h> | 32 | #include <linux/cpu.h> |
32 | #include <linux/cpuset.h> | 33 | #include <linux/cpuset.h> |
@@ -42,6 +43,7 @@ | |||
42 | #include <linux/profile.h> | 43 | #include <linux/profile.h> |
43 | #include <linux/rmap.h> | 44 | #include <linux/rmap.h> |
44 | #include <linux/acct.h> | 45 | #include <linux/acct.h> |
46 | #include <linux/tsacct_kern.h> | ||
45 | #include <linux/cn_proc.h> | 47 | #include <linux/cn_proc.h> |
46 | #include <linux/delayacct.h> | 48 | #include <linux/delayacct.h> |
47 | #include <linux/taskstats_kern.h> | 49 | #include <linux/taskstats_kern.h> |
@@ -1115,11 +1117,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1115 | goto bad_fork_cleanup_signal; | 1117 | goto bad_fork_cleanup_signal; |
1116 | if ((retval = copy_keys(clone_flags, p))) | 1118 | if ((retval = copy_keys(clone_flags, p))) |
1117 | goto bad_fork_cleanup_mm; | 1119 | goto bad_fork_cleanup_mm; |
1118 | if ((retval = copy_namespace(clone_flags, p))) | 1120 | if ((retval = copy_namespaces(clone_flags, p))) |
1119 | goto bad_fork_cleanup_keys; | 1121 | goto bad_fork_cleanup_keys; |
1120 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); | 1122 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); |
1121 | if (retval) | 1123 | if (retval) |
1122 | goto bad_fork_cleanup_namespace; | 1124 | goto bad_fork_cleanup_namespaces; |
1123 | 1125 | ||
1124 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1126 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
1125 | /* | 1127 | /* |
@@ -1211,7 +1213,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1211 | spin_unlock(¤t->sighand->siglock); | 1213 | spin_unlock(¤t->sighand->siglock); |
1212 | write_unlock_irq(&tasklist_lock); | 1214 | write_unlock_irq(&tasklist_lock); |
1213 | retval = -ERESTARTNOINTR; | 1215 | retval = -ERESTARTNOINTR; |
1214 | goto bad_fork_cleanup_namespace; | 1216 | goto bad_fork_cleanup_namespaces; |
1215 | } | 1217 | } |
1216 | 1218 | ||
1217 | if (clone_flags & CLONE_THREAD) { | 1219 | if (clone_flags & CLONE_THREAD) { |
@@ -1259,8 +1261,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1259 | proc_fork_connector(p); | 1261 | proc_fork_connector(p); |
1260 | return p; | 1262 | return p; |
1261 | 1263 | ||
1262 | bad_fork_cleanup_namespace: | 1264 | bad_fork_cleanup_namespaces: |
1263 | exit_namespace(p); | 1265 | exit_task_namespaces(p); |
1264 | bad_fork_cleanup_keys: | 1266 | bad_fork_cleanup_keys: |
1265 | exit_keys(p); | 1267 | exit_keys(p); |
1266 | bad_fork_cleanup_mm: | 1268 | bad_fork_cleanup_mm: |
@@ -1513,10 +1515,9 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
1513 | */ | 1515 | */ |
1514 | static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) | 1516 | static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) |
1515 | { | 1517 | { |
1516 | struct namespace *ns = current->namespace; | 1518 | struct namespace *ns = current->nsproxy->namespace; |
1517 | 1519 | ||
1518 | if ((unshare_flags & CLONE_NEWNS) && | 1520 | if ((unshare_flags & CLONE_NEWNS) && ns) { |
1519 | (ns && atomic_read(&ns->count) > 1)) { | ||
1520 | if (!capable(CAP_SYS_ADMIN)) | 1521 | if (!capable(CAP_SYS_ADMIN)) |
1521 | return -EPERM; | 1522 | return -EPERM; |
1522 | 1523 | ||
@@ -1588,6 +1589,16 @@ static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **n | |||
1588 | return 0; | 1589 | return 0; |
1589 | } | 1590 | } |
1590 | 1591 | ||
1592 | #ifndef CONFIG_IPC_NS | ||
1593 | static inline int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns) | ||
1594 | { | ||
1595 | if (flags & CLONE_NEWIPC) | ||
1596 | return -EINVAL; | ||
1597 | |||
1598 | return 0; | ||
1599 | } | ||
1600 | #endif | ||
1601 | |||
1591 | /* | 1602 | /* |
1592 | * unshare allows a process to 'unshare' part of the process | 1603 | * unshare allows a process to 'unshare' part of the process |
1593 | * context which was originally shared using clone. copy_* | 1604 | * context which was originally shared using clone. copy_* |
@@ -1605,13 +1616,17 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1605 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | 1616 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; |
1606 | struct files_struct *fd, *new_fd = NULL; | 1617 | struct files_struct *fd, *new_fd = NULL; |
1607 | struct sem_undo_list *new_ulist = NULL; | 1618 | struct sem_undo_list *new_ulist = NULL; |
1619 | struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; | ||
1620 | struct uts_namespace *uts, *new_uts = NULL; | ||
1621 | struct ipc_namespace *ipc, *new_ipc = NULL; | ||
1608 | 1622 | ||
1609 | check_unshare_flags(&unshare_flags); | 1623 | check_unshare_flags(&unshare_flags); |
1610 | 1624 | ||
1611 | /* Return -EINVAL for all unsupported flags */ | 1625 | /* Return -EINVAL for all unsupported flags */ |
1612 | err = -EINVAL; | 1626 | err = -EINVAL; |
1613 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1627 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1614 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM)) | 1628 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1629 | CLONE_NEWUTS|CLONE_NEWIPC)) | ||
1615 | goto bad_unshare_out; | 1630 | goto bad_unshare_out; |
1616 | 1631 | ||
1617 | if ((err = unshare_thread(unshare_flags))) | 1632 | if ((err = unshare_thread(unshare_flags))) |
@@ -1628,11 +1643,30 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1628 | goto bad_unshare_cleanup_vm; | 1643 | goto bad_unshare_cleanup_vm; |
1629 | if ((err = unshare_semundo(unshare_flags, &new_ulist))) | 1644 | if ((err = unshare_semundo(unshare_flags, &new_ulist))) |
1630 | goto bad_unshare_cleanup_fd; | 1645 | goto bad_unshare_cleanup_fd; |
1646 | if ((err = unshare_utsname(unshare_flags, &new_uts))) | ||
1647 | goto bad_unshare_cleanup_semundo; | ||
1648 | if ((err = unshare_ipcs(unshare_flags, &new_ipc))) | ||
1649 | goto bad_unshare_cleanup_uts; | ||
1650 | |||
1651 | if (new_ns || new_uts || new_ipc) { | ||
1652 | old_nsproxy = current->nsproxy; | ||
1653 | new_nsproxy = dup_namespaces(old_nsproxy); | ||
1654 | if (!new_nsproxy) { | ||
1655 | err = -ENOMEM; | ||
1656 | goto bad_unshare_cleanup_ipc; | ||
1657 | } | ||
1658 | } | ||
1631 | 1659 | ||
1632 | if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) { | 1660 | if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist || |
1661 | new_uts || new_ipc) { | ||
1633 | 1662 | ||
1634 | task_lock(current); | 1663 | task_lock(current); |
1635 | 1664 | ||
1665 | if (new_nsproxy) { | ||
1666 | current->nsproxy = new_nsproxy; | ||
1667 | new_nsproxy = old_nsproxy; | ||
1668 | } | ||
1669 | |||
1636 | if (new_fs) { | 1670 | if (new_fs) { |
1637 | fs = current->fs; | 1671 | fs = current->fs; |
1638 | current->fs = new_fs; | 1672 | current->fs = new_fs; |
@@ -1640,8 +1674,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1640 | } | 1674 | } |
1641 | 1675 | ||
1642 | if (new_ns) { | 1676 | if (new_ns) { |
1643 | ns = current->namespace; | 1677 | ns = current->nsproxy->namespace; |
1644 | current->namespace = new_ns; | 1678 | current->nsproxy->namespace = new_ns; |
1645 | new_ns = ns; | 1679 | new_ns = ns; |
1646 | } | 1680 | } |
1647 | 1681 | ||
@@ -1666,9 +1700,33 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1666 | new_fd = fd; | 1700 | new_fd = fd; |
1667 | } | 1701 | } |
1668 | 1702 | ||
1703 | if (new_uts) { | ||
1704 | uts = current->nsproxy->uts_ns; | ||
1705 | current->nsproxy->uts_ns = new_uts; | ||
1706 | new_uts = uts; | ||
1707 | } | ||
1708 | |||
1709 | if (new_ipc) { | ||
1710 | ipc = current->nsproxy->ipc_ns; | ||
1711 | current->nsproxy->ipc_ns = new_ipc; | ||
1712 | new_ipc = ipc; | ||
1713 | } | ||
1714 | |||
1669 | task_unlock(current); | 1715 | task_unlock(current); |
1670 | } | 1716 | } |
1671 | 1717 | ||
1718 | if (new_nsproxy) | ||
1719 | put_nsproxy(new_nsproxy); | ||
1720 | |||
1721 | bad_unshare_cleanup_ipc: | ||
1722 | if (new_ipc) | ||
1723 | put_ipc_ns(new_ipc); | ||
1724 | |||
1725 | bad_unshare_cleanup_uts: | ||
1726 | if (new_uts) | ||
1727 | put_uts_ns(new_uts); | ||
1728 | |||
1729 | bad_unshare_cleanup_semundo: | ||
1672 | bad_unshare_cleanup_fd: | 1730 | bad_unshare_cleanup_fd: |
1673 | if (new_fd) | 1731 | if (new_fd) |
1674 | put_files_struct(new_fd); | 1732 | put_files_struct(new_fd); |
diff --git a/kernel/futex.c b/kernel/futex.c index 4b6770e980..4aaf91951a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1527,7 +1527,7 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
1527 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; | 1527 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; |
1528 | 1528 | ||
1529 | if (signal) { | 1529 | if (signal) { |
1530 | err = f_setown(filp, current->pid, 1); | 1530 | err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1); |
1531 | if (err < 0) { | 1531 | if (err < 0) { |
1532 | goto error; | 1532 | goto error; |
1533 | } | 1533 | } |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index ab16a5a4cf..eeac3e313b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -69,6 +69,15 @@ static inline int is_kernel(unsigned long addr) | |||
69 | return in_gate_area_no_task(addr); | 69 | return in_gate_area_no_task(addr); |
70 | } | 70 | } |
71 | 71 | ||
72 | static int is_ksym_addr(unsigned long addr) | ||
73 | { | ||
74 | if (all_var) | ||
75 | return is_kernel(addr); | ||
76 | |||
77 | return is_kernel_text(addr) || is_kernel_inittext(addr) || | ||
78 | is_kernel_extratext(addr); | ||
79 | } | ||
80 | |||
72 | /* expand a compressed symbol data into the resulting uncompressed string, | 81 | /* expand a compressed symbol data into the resulting uncompressed string, |
73 | given the offset to where the symbol is in the compressed stream */ | 82 | given the offset to where the symbol is in the compressed stream */ |
74 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) | 83 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) |
@@ -154,7 +163,73 @@ unsigned long kallsyms_lookup_name(const char *name) | |||
154 | } | 163 | } |
155 | return module_kallsyms_lookup_name(name); | 164 | return module_kallsyms_lookup_name(name); |
156 | } | 165 | } |
157 | EXPORT_SYMBOL_GPL(kallsyms_lookup_name); | 166 | |
167 | static unsigned long get_symbol_pos(unsigned long addr, | ||
168 | unsigned long *symbolsize, | ||
169 | unsigned long *offset) | ||
170 | { | ||
171 | unsigned long symbol_start = 0, symbol_end = 0; | ||
172 | unsigned long i, low, high, mid; | ||
173 | |||
174 | /* This kernel should never had been booted. */ | ||
175 | BUG_ON(!kallsyms_addresses); | ||
176 | |||
177 | /* do a binary search on the sorted kallsyms_addresses array */ | ||
178 | low = 0; | ||
179 | high = kallsyms_num_syms; | ||
180 | |||
181 | while (high - low > 1) { | ||
182 | mid = (low + high) / 2; | ||
183 | if (kallsyms_addresses[mid] <= addr) | ||
184 | low = mid; | ||
185 | else | ||
186 | high = mid; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * search for the first aliased symbol. Aliased | ||
191 | * symbols are symbols with the same address | ||
192 | */ | ||
193 | while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) | ||
194 | --low; | ||
195 | |||
196 | symbol_start = kallsyms_addresses[low]; | ||
197 | |||
198 | /* Search for next non-aliased symbol */ | ||
199 | for (i = low + 1; i < kallsyms_num_syms; i++) { | ||
200 | if (kallsyms_addresses[i] > symbol_start) { | ||
201 | symbol_end = kallsyms_addresses[i]; | ||
202 | break; | ||
203 | } | ||
204 | } | ||
205 | |||
206 | /* if we found no next symbol, we use the end of the section */ | ||
207 | if (!symbol_end) { | ||
208 | if (is_kernel_inittext(addr)) | ||
209 | symbol_end = (unsigned long)_einittext; | ||
210 | else if (all_var) | ||
211 | symbol_end = (unsigned long)_end; | ||
212 | else | ||
213 | symbol_end = (unsigned long)_etext; | ||
214 | } | ||
215 | |||
216 | *symbolsize = symbol_end - symbol_start; | ||
217 | *offset = addr - symbol_start; | ||
218 | |||
219 | return low; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Lookup an address but don't bother to find any names. | ||
224 | */ | ||
225 | int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, | ||
226 | unsigned long *offset) | ||
227 | { | ||
228 | if (is_ksym_addr(addr)) | ||
229 | return !!get_symbol_pos(addr, symbolsize, offset); | ||
230 | |||
231 | return !!module_address_lookup(addr, symbolsize, offset, NULL); | ||
232 | } | ||
158 | 233 | ||
159 | /* | 234 | /* |
160 | * Lookup an address | 235 | * Lookup an address |
@@ -168,57 +243,18 @@ const char *kallsyms_lookup(unsigned long addr, | |||
168 | unsigned long *offset, | 243 | unsigned long *offset, |
169 | char **modname, char *namebuf) | 244 | char **modname, char *namebuf) |
170 | { | 245 | { |
171 | unsigned long i, low, high, mid; | ||
172 | const char *msym; | 246 | const char *msym; |
173 | 247 | ||
174 | /* This kernel should never had been booted. */ | ||
175 | BUG_ON(!kallsyms_addresses); | ||
176 | |||
177 | namebuf[KSYM_NAME_LEN] = 0; | 248 | namebuf[KSYM_NAME_LEN] = 0; |
178 | namebuf[0] = 0; | 249 | namebuf[0] = 0; |
179 | 250 | ||
180 | if ((all_var && is_kernel(addr)) || | 251 | if (is_ksym_addr(addr)) { |
181 | (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr) || | 252 | unsigned long pos; |
182 | is_kernel_extratext(addr)))) { | ||
183 | unsigned long symbol_end = 0; | ||
184 | |||
185 | /* do a binary search on the sorted kallsyms_addresses array */ | ||
186 | low = 0; | ||
187 | high = kallsyms_num_syms; | ||
188 | |||
189 | while (high-low > 1) { | ||
190 | mid = (low + high) / 2; | ||
191 | if (kallsyms_addresses[mid] <= addr) low = mid; | ||
192 | else high = mid; | ||
193 | } | ||
194 | |||
195 | /* search for the first aliased symbol. Aliased symbols are | ||
196 | symbols with the same address */ | ||
197 | while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low]) | ||
198 | --low; | ||
199 | 253 | ||
254 | pos = get_symbol_pos(addr, symbolsize, offset); | ||
200 | /* Grab name */ | 255 | /* Grab name */ |
201 | kallsyms_expand_symbol(get_symbol_offset(low), namebuf); | 256 | kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); |
202 | |||
203 | /* Search for next non-aliased symbol */ | ||
204 | for (i = low + 1; i < kallsyms_num_syms; i++) { | ||
205 | if (kallsyms_addresses[i] > kallsyms_addresses[low]) { | ||
206 | symbol_end = kallsyms_addresses[i]; | ||
207 | break; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* if we found no next symbol, we use the end of the section */ | ||
212 | if (!symbol_end) { | ||
213 | if (is_kernel_inittext(addr)) | ||
214 | symbol_end = (unsigned long)_einittext; | ||
215 | else | ||
216 | symbol_end = all_var ? (unsigned long)_end : (unsigned long)_etext; | ||
217 | } | ||
218 | |||
219 | *symbolsize = symbol_end - kallsyms_addresses[low]; | ||
220 | *modname = NULL; | 257 | *modname = NULL; |
221 | *offset = addr - kallsyms_addresses[low]; | ||
222 | return namebuf; | 258 | return namebuf; |
223 | } | 259 | } |
224 | 260 | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 842f8015d7..bb4e29d924 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -18,8 +18,6 @@ | |||
18 | call_usermodehelper wait flag, and remove exec_usermodehelper. | 18 | call_usermodehelper wait flag, and remove exec_usermodehelper. |
19 | Rusty Russell <rusty@rustcorp.com.au> Jan 2003 | 19 | Rusty Russell <rusty@rustcorp.com.au> Jan 2003 |
20 | */ | 20 | */ |
21 | #define __KERNEL_SYSCALLS__ | ||
22 | |||
23 | #include <linux/module.h> | 21 | #include <linux/module.h> |
24 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
25 | #include <linux/syscalls.h> | 23 | #include <linux/syscalls.h> |
@@ -35,6 +33,7 @@ | |||
35 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
36 | #include <linux/kernel.h> | 34 | #include <linux/kernel.h> |
37 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/resource.h> | ||
38 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
39 | 38 | ||
40 | extern int max_threads; | 39 | extern int max_threads; |
@@ -122,6 +121,7 @@ struct subprocess_info { | |||
122 | struct key *ring; | 121 | struct key *ring; |
123 | int wait; | 122 | int wait; |
124 | int retval; | 123 | int retval; |
124 | struct file *stdin; | ||
125 | }; | 125 | }; |
126 | 126 | ||
127 | /* | 127 | /* |
@@ -145,12 +145,30 @@ static int ____call_usermodehelper(void *data) | |||
145 | 145 | ||
146 | key_put(old_session); | 146 | key_put(old_session); |
147 | 147 | ||
148 | /* Install input pipe when needed */ | ||
149 | if (sub_info->stdin) { | ||
150 | struct files_struct *f = current->files; | ||
151 | struct fdtable *fdt; | ||
152 | /* no races because files should be private here */ | ||
153 | sys_close(0); | ||
154 | fd_install(0, sub_info->stdin); | ||
155 | spin_lock(&f->file_lock); | ||
156 | fdt = files_fdtable(f); | ||
157 | FD_SET(0, fdt->open_fds); | ||
158 | FD_CLR(0, fdt->close_on_exec); | ||
159 | spin_unlock(&f->file_lock); | ||
160 | |||
161 | /* and disallow core files too */ | ||
162 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0}; | ||
163 | } | ||
164 | |||
148 | /* We can run anywhere, unlike our parent keventd(). */ | 165 | /* We can run anywhere, unlike our parent keventd(). */ |
149 | set_cpus_allowed(current, CPU_MASK_ALL); | 166 | set_cpus_allowed(current, CPU_MASK_ALL); |
150 | 167 | ||
151 | retval = -EPERM; | 168 | retval = -EPERM; |
152 | if (current->fs->root) | 169 | if (current->fs->root) |
153 | retval = execve(sub_info->path, sub_info->argv,sub_info->envp); | 170 | retval = kernel_execve(sub_info->path, |
171 | sub_info->argv, sub_info->envp); | ||
154 | 172 | ||
155 | /* Exec failed? */ | 173 | /* Exec failed? */ |
156 | sub_info->retval = retval; | 174 | sub_info->retval = retval; |
@@ -268,6 +286,44 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, | |||
268 | } | 286 | } |
269 | EXPORT_SYMBOL(call_usermodehelper_keys); | 287 | EXPORT_SYMBOL(call_usermodehelper_keys); |
270 | 288 | ||
289 | int call_usermodehelper_pipe(char *path, char **argv, char **envp, | ||
290 | struct file **filp) | ||
291 | { | ||
292 | DECLARE_COMPLETION(done); | ||
293 | struct subprocess_info sub_info = { | ||
294 | .complete = &done, | ||
295 | .path = path, | ||
296 | .argv = argv, | ||
297 | .envp = envp, | ||
298 | .retval = 0, | ||
299 | }; | ||
300 | struct file *f; | ||
301 | DECLARE_WORK(work, __call_usermodehelper, &sub_info); | ||
302 | |||
303 | if (!khelper_wq) | ||
304 | return -EBUSY; | ||
305 | |||
306 | if (path[0] == '\0') | ||
307 | return 0; | ||
308 | |||
309 | f = create_write_pipe(); | ||
310 | if (!f) | ||
311 | return -ENOMEM; | ||
312 | *filp = f; | ||
313 | |||
314 | f = create_read_pipe(f); | ||
315 | if (!f) { | ||
316 | free_write_pipe(*filp); | ||
317 | return -ENOMEM; | ||
318 | } | ||
319 | sub_info.stdin = f; | ||
320 | |||
321 | queue_work(khelper_wq, &work); | ||
322 | wait_for_completion(&done); | ||
323 | return sub_info.retval; | ||
324 | } | ||
325 | EXPORT_SYMBOL(call_usermodehelper_pipe); | ||
326 | |||
271 | void __init usermodehelper_init(void) | 327 | void __init usermodehelper_init(void) |
272 | { | 328 | { |
273 | khelper_wq = create_singlethread_workqueue("khelper"); | 329 | khelper_wq = create_singlethread_workqueue("khelper"); |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3f57dfdc8f..610c837ad9 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/module.h> | 38 | #include <linux/module.h> |
39 | #include <linux/moduleloader.h> | 39 | #include <linux/moduleloader.h> |
40 | #include <linux/kallsyms.h> | ||
40 | #include <asm-generic/sections.h> | 41 | #include <asm-generic/sections.h> |
41 | #include <asm/cacheflush.h> | 42 | #include <asm/cacheflush.h> |
42 | #include <asm/errno.h> | 43 | #include <asm/errno.h> |
@@ -45,6 +46,16 @@ | |||
45 | #define KPROBE_HASH_BITS 6 | 46 | #define KPROBE_HASH_BITS 6 |
46 | #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) | 47 | #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) |
47 | 48 | ||
49 | |||
50 | /* | ||
51 | * Some oddball architectures like 64bit powerpc have function descriptors | ||
52 | * so this must be overridable. | ||
53 | */ | ||
54 | #ifndef kprobe_lookup_name | ||
55 | #define kprobe_lookup_name(name, addr) \ | ||
56 | addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) | ||
57 | #endif | ||
58 | |||
48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | 59 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; |
49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 60 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
50 | static atomic_t kprobe_count; | 61 | static atomic_t kprobe_count; |
@@ -308,7 +319,8 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri) | |||
308 | } | 319 | } |
309 | 320 | ||
310 | /* Called with kretprobe_lock held */ | 321 | /* Called with kretprobe_lock held */ |
311 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) | 322 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, |
323 | struct hlist_head *head) | ||
312 | { | 324 | { |
313 | /* remove rp inst off the rprobe_inst_table */ | 325 | /* remove rp inst off the rprobe_inst_table */ |
314 | hlist_del(&ri->hlist); | 326 | hlist_del(&ri->hlist); |
@@ -320,7 +332,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) | |||
320 | hlist_add_head(&ri->uflist, &ri->rp->free_instances); | 332 | hlist_add_head(&ri->uflist, &ri->rp->free_instances); |
321 | } else | 333 | } else |
322 | /* Unregistering */ | 334 | /* Unregistering */ |
323 | kfree(ri); | 335 | hlist_add_head(&ri->hlist, head); |
324 | } | 336 | } |
325 | 337 | ||
326 | struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) | 338 | struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) |
@@ -336,18 +348,24 @@ struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) | |||
336 | */ | 348 | */ |
337 | void __kprobes kprobe_flush_task(struct task_struct *tk) | 349 | void __kprobes kprobe_flush_task(struct task_struct *tk) |
338 | { | 350 | { |
339 | struct kretprobe_instance *ri; | 351 | struct kretprobe_instance *ri; |
340 | struct hlist_head *head; | 352 | struct hlist_head *head, empty_rp; |
341 | struct hlist_node *node, *tmp; | 353 | struct hlist_node *node, *tmp; |
342 | unsigned long flags = 0; | 354 | unsigned long flags = 0; |
343 | 355 | ||
356 | INIT_HLIST_HEAD(&empty_rp); | ||
344 | spin_lock_irqsave(&kretprobe_lock, flags); | 357 | spin_lock_irqsave(&kretprobe_lock, flags); |
345 | head = kretprobe_inst_table_head(tk); | 358 | head = kretprobe_inst_table_head(tk); |
346 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | 359 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { |
347 | if (ri->task == tk) | 360 | if (ri->task == tk) |
348 | recycle_rp_inst(ri); | 361 | recycle_rp_inst(ri, &empty_rp); |
349 | } | 362 | } |
350 | spin_unlock_irqrestore(&kretprobe_lock, flags); | 363 | spin_unlock_irqrestore(&kretprobe_lock, flags); |
364 | |||
365 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | ||
366 | hlist_del(&ri->hlist); | ||
367 | kfree(ri); | ||
368 | } | ||
351 | } | 369 | } |
352 | 370 | ||
353 | static inline void free_rp_inst(struct kretprobe *rp) | 371 | static inline void free_rp_inst(struct kretprobe *rp) |
@@ -447,6 +465,21 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
447 | struct kprobe *old_p; | 465 | struct kprobe *old_p; |
448 | struct module *probed_mod; | 466 | struct module *probed_mod; |
449 | 467 | ||
468 | /* | ||
469 | * If we have a symbol_name argument look it up, | ||
470 | * and add it to the address. That way the addr | ||
471 | * field can either be global or relative to a symbol. | ||
472 | */ | ||
473 | if (p->symbol_name) { | ||
474 | if (p->addr) | ||
475 | return -EINVAL; | ||
476 | kprobe_lookup_name(p->symbol_name, p->addr); | ||
477 | } | ||
478 | |||
479 | if (!p->addr) | ||
480 | return -EINVAL; | ||
481 | p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset); | ||
482 | |||
450 | if ((!kernel_text_address((unsigned long) p->addr)) || | 483 | if ((!kernel_text_address((unsigned long) p->addr)) || |
451 | in_kprobes_functions((unsigned long) p->addr)) | 484 | in_kprobes_functions((unsigned long) p->addr)) |
452 | return -EINVAL; | 485 | return -EINVAL; |
@@ -488,7 +521,7 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
488 | (ARCH_INACTIVE_KPROBE_COUNT + 1)) | 521 | (ARCH_INACTIVE_KPROBE_COUNT + 1)) |
489 | register_page_fault_notifier(&kprobe_page_fault_nb); | 522 | register_page_fault_notifier(&kprobe_page_fault_nb); |
490 | 523 | ||
491 | arch_arm_kprobe(p); | 524 | arch_arm_kprobe(p); |
492 | 525 | ||
493 | out: | 526 | out: |
494 | mutex_unlock(&kprobe_mutex); | 527 | mutex_unlock(&kprobe_mutex); |
diff --git a/kernel/latency.c b/kernel/latency.c new file mode 100644 index 0000000000..258f2555ab --- /dev/null +++ b/kernel/latency.c | |||
@@ -0,0 +1,279 @@ | |||
1 | /* | ||
2 | * latency.c: Explicit system-wide latency-expectation infrastructure | ||
3 | * | ||
4 | * The purpose of this infrastructure is to allow device drivers to set | ||
5 | * latency constraint they have and to collect and summarize these | ||
6 | * expectations globally. The cummulated result can then be used by | ||
7 | * power management and similar users to make decisions that have | ||
8 | * tradoffs with a latency component. | ||
9 | * | ||
10 | * An example user of this are the x86 C-states; each higher C state saves | ||
11 | * more power, but has a higher exit latency. For the idle loop power | ||
12 | * code to make a good decision which C-state to use, information about | ||
13 | * acceptable latencies is required. | ||
14 | * | ||
15 | * An example announcer of latency is an audio driver that knowns it | ||
16 | * will get an interrupt when the hardware has 200 usec of samples | ||
17 | * left in the DMA buffer; in that case the driver can set a latency | ||
18 | * constraint of, say, 150 usec. | ||
19 | * | ||
20 | * Multiple drivers can each announce their maximum accepted latency, | ||
21 | * to keep these appart, a string based identifier is used. | ||
22 | * | ||
23 | * | ||
24 | * (C) Copyright 2006 Intel Corporation | ||
25 | * Author: Arjan van de Ven <arjan@linux.intel.com> | ||
26 | * | ||
27 | * This program is free software; you can redistribute it and/or | ||
28 | * modify it under the terms of the GNU General Public License | ||
29 | * as published by the Free Software Foundation; version 2 | ||
30 | * of the License. | ||
31 | */ | ||
32 | |||
33 | #include <linux/latency.h> | ||
34 | #include <linux/list.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/slab.h> | ||
37 | #include <linux/module.h> | ||
38 | #include <linux/notifier.h> | ||
39 | #include <asm/atomic.h> | ||
40 | |||
41 | struct latency_info { | ||
42 | struct list_head list; | ||
43 | int usecs; | ||
44 | char *identifier; | ||
45 | }; | ||
46 | |||
47 | /* | ||
48 | * locking rule: all modifications to current_max_latency and | ||
49 | * latency_list need to be done while holding the latency_lock. | ||
50 | * latency_lock needs to be taken _irqsave. | ||
51 | */ | ||
52 | static atomic_t current_max_latency; | ||
53 | static DEFINE_SPINLOCK(latency_lock); | ||
54 | |||
55 | static LIST_HEAD(latency_list); | ||
56 | static BLOCKING_NOTIFIER_HEAD(latency_notifier); | ||
57 | |||
58 | /* | ||
59 | * This function returns the maximum latency allowed, which | ||
60 | * happens to be the minimum of all maximum latencies on the | ||
61 | * list. | ||
62 | */ | ||
63 | static int __find_max_latency(void) | ||
64 | { | ||
65 | int min = INFINITE_LATENCY; | ||
66 | struct latency_info *info; | ||
67 | |||
68 | list_for_each_entry(info, &latency_list, list) { | ||
69 | if (info->usecs < min) | ||
70 | min = info->usecs; | ||
71 | } | ||
72 | return min; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * set_acceptable_latency - sets the maximum latency acceptable | ||
77 | * @identifier: string that identifies this driver | ||
78 | * @usecs: maximum acceptable latency for this driver | ||
79 | * | ||
80 | * This function informs the kernel that this device(driver) | ||
81 | * can accept at most usecs latency. This setting is used for | ||
82 | * power management and similar tradeoffs. | ||
83 | * | ||
84 | * This function sleeps and can only be called from process | ||
85 | * context. | ||
86 | * Calling this function with an existing identifier is valid | ||
87 | * and will cause the existing latency setting to be changed. | ||
88 | */ | ||
89 | void set_acceptable_latency(char *identifier, int usecs) | ||
90 | { | ||
91 | struct latency_info *info, *iter; | ||
92 | unsigned long flags; | ||
93 | int found_old = 0; | ||
94 | |||
95 | info = kzalloc(sizeof(struct latency_info), GFP_KERNEL); | ||
96 | if (!info) | ||
97 | return; | ||
98 | info->usecs = usecs; | ||
99 | info->identifier = kstrdup(identifier, GFP_KERNEL); | ||
100 | if (!info->identifier) | ||
101 | goto free_info; | ||
102 | |||
103 | spin_lock_irqsave(&latency_lock, flags); | ||
104 | list_for_each_entry(iter, &latency_list, list) { | ||
105 | if (strcmp(iter->identifier, identifier)==0) { | ||
106 | found_old = 1; | ||
107 | iter->usecs = usecs; | ||
108 | break; | ||
109 | } | ||
110 | } | ||
111 | if (!found_old) | ||
112 | list_add(&info->list, &latency_list); | ||
113 | |||
114 | if (usecs < atomic_read(¤t_max_latency)) | ||
115 | atomic_set(¤t_max_latency, usecs); | ||
116 | |||
117 | spin_unlock_irqrestore(&latency_lock, flags); | ||
118 | |||
119 | blocking_notifier_call_chain(&latency_notifier, | ||
120 | atomic_read(¤t_max_latency), NULL); | ||
121 | |||
122 | /* | ||
123 | * if we inserted the new one, we're done; otherwise there was | ||
124 | * an existing one so we need to free the redundant data | ||
125 | */ | ||
126 | if (!found_old) | ||
127 | return; | ||
128 | |||
129 | kfree(info->identifier); | ||
130 | free_info: | ||
131 | kfree(info); | ||
132 | } | ||
133 | EXPORT_SYMBOL_GPL(set_acceptable_latency); | ||
134 | |||
135 | /** | ||
136 | * modify_acceptable_latency - changes the maximum latency acceptable | ||
137 | * @identifier: string that identifies this driver | ||
138 | * @usecs: maximum acceptable latency for this driver | ||
139 | * | ||
140 | * This function informs the kernel that this device(driver) | ||
141 | * can accept at most usecs latency. This setting is used for | ||
142 | * power management and similar tradeoffs. | ||
143 | * | ||
144 | * This function does not sleep and can be called in any context. | ||
145 | * Trying to use a non-existing identifier silently gets ignored. | ||
146 | * | ||
147 | * Due to the atomic nature of this function, the modified latency | ||
148 | * value will only be used for future decisions; past decisions | ||
149 | * can still lead to longer latencies in the near future. | ||
150 | */ | ||
151 | void modify_acceptable_latency(char *identifier, int usecs) | ||
152 | { | ||
153 | struct latency_info *iter; | ||
154 | unsigned long flags; | ||
155 | |||
156 | spin_lock_irqsave(&latency_lock, flags); | ||
157 | list_for_each_entry(iter, &latency_list, list) { | ||
158 | if (strcmp(iter->identifier, identifier) == 0) { | ||
159 | iter->usecs = usecs; | ||
160 | break; | ||
161 | } | ||
162 | } | ||
163 | if (usecs < atomic_read(¤t_max_latency)) | ||
164 | atomic_set(¤t_max_latency, usecs); | ||
165 | spin_unlock_irqrestore(&latency_lock, flags); | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(modify_acceptable_latency); | ||
168 | |||
169 | /** | ||
170 | * remove_acceptable_latency - removes the maximum latency acceptable | ||
171 | * @identifier: string that identifies this driver | ||
172 | * | ||
173 | * This function removes a previously set maximum latency setting | ||
174 | * for the driver and frees up any resources associated with the | ||
175 | * bookkeeping needed for this. | ||
176 | * | ||
177 | * This function does not sleep and can be called in any context. | ||
178 | * Trying to use a non-existing identifier silently gets ignored. | ||
179 | */ | ||
180 | void remove_acceptable_latency(char *identifier) | ||
181 | { | ||
182 | unsigned long flags; | ||
183 | int newmax = 0; | ||
184 | struct latency_info *iter, *temp; | ||
185 | |||
186 | spin_lock_irqsave(&latency_lock, flags); | ||
187 | |||
188 | list_for_each_entry_safe(iter, temp, &latency_list, list) { | ||
189 | if (strcmp(iter->identifier, identifier) == 0) { | ||
190 | list_del(&iter->list); | ||
191 | newmax = iter->usecs; | ||
192 | kfree(iter->identifier); | ||
193 | kfree(iter); | ||
194 | break; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | /* If we just deleted the system wide value, we need to | ||
199 | * recalculate with a full search | ||
200 | */ | ||
201 | if (newmax == atomic_read(¤t_max_latency)) { | ||
202 | newmax = __find_max_latency(); | ||
203 | atomic_set(¤t_max_latency, newmax); | ||
204 | } | ||
205 | spin_unlock_irqrestore(&latency_lock, flags); | ||
206 | } | ||
207 | EXPORT_SYMBOL_GPL(remove_acceptable_latency); | ||
208 | |||
209 | /** | ||
210 | * system_latency_constraint - queries the system wide latency maximum | ||
211 | * | ||
212 | * This function returns the system wide maximum latency in | ||
213 | * microseconds. | ||
214 | * | ||
215 | * This function does not sleep and can be called in any context. | ||
216 | */ | ||
217 | int system_latency_constraint(void) | ||
218 | { | ||
219 | return atomic_read(¤t_max_latency); | ||
220 | } | ||
221 | EXPORT_SYMBOL_GPL(system_latency_constraint); | ||
222 | |||
223 | /** | ||
224 | * synchronize_acceptable_latency - recalculates all latency decisions | ||
225 | * | ||
226 | * This function will cause a callback to various kernel pieces that | ||
227 | * will make those pieces rethink their latency decisions. This implies | ||
228 | * that if there are overlong latencies in hardware state already, those | ||
229 | * latencies get taken right now. When this call completes no overlong | ||
230 | * latency decisions should be active anymore. | ||
231 | * | ||
232 | * Typical usecase of this is after a modify_acceptable_latency() call, | ||
233 | * which in itself is non-blocking and non-synchronizing. | ||
234 | * | ||
235 | * This function blocks and should not be called with locks held. | ||
236 | */ | ||
237 | |||
238 | void synchronize_acceptable_latency(void) | ||
239 | { | ||
240 | blocking_notifier_call_chain(&latency_notifier, | ||
241 | atomic_read(¤t_max_latency), NULL); | ||
242 | } | ||
243 | EXPORT_SYMBOL_GPL(synchronize_acceptable_latency); | ||
244 | |||
245 | /* | ||
246 | * Latency notifier: this notifier gets called when a non-atomic new | ||
247 | * latency value gets set. The expectation nof the caller of the | ||
248 | * non-atomic set is that when the call returns, future latencies | ||
249 | * are within bounds, so the functions on the notifier list are | ||
250 | * expected to take the overlong latencies immediately, inside the | ||
251 | * callback, and not make a overlong latency decision anymore. | ||
252 | * | ||
253 | * The callback gets called when the new latency value is made | ||
254 | * active so system_latency_constraint() returns the new latency. | ||
255 | */ | ||
256 | int register_latency_notifier(struct notifier_block * nb) | ||
257 | { | ||
258 | return blocking_notifier_chain_register(&latency_notifier, nb); | ||
259 | } | ||
260 | EXPORT_SYMBOL_GPL(register_latency_notifier); | ||
261 | |||
262 | int unregister_latency_notifier(struct notifier_block * nb) | ||
263 | { | ||
264 | return blocking_notifier_chain_unregister(&latency_notifier, nb); | ||
265 | } | ||
266 | EXPORT_SYMBOL_GPL(unregister_latency_notifier); | ||
267 | |||
268 | static __init int latency_init(void) | ||
269 | { | ||
270 | atomic_set(¤t_max_latency, INFINITE_LATENCY); | ||
271 | /* | ||
272 | * we don't want by default to have longer latencies than 2 ticks, | ||
273 | * since that would cause lost ticks | ||
274 | */ | ||
275 | set_acceptable_latency("kernel", 2*1000000/HZ); | ||
276 | return 0; | ||
277 | } | ||
278 | |||
279 | module_init(latency_init); | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e596525669..4c05534610 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -518,9 +518,9 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth) | |||
518 | 518 | ||
519 | static void print_kernel_version(void) | 519 | static void print_kernel_version(void) |
520 | { | 520 | { |
521 | printk("%s %.*s\n", system_utsname.release, | 521 | printk("%s %.*s\n", init_utsname()->release, |
522 | (int)strcspn(system_utsname.version, " "), | 522 | (int)strcspn(init_utsname()->version, " "), |
523 | system_utsname.version); | 523 | init_utsname()->version); |
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |
diff --git a/kernel/module.c b/kernel/module.c index 05625d5dc7..7f60e782de 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -851,6 +851,7 @@ static int check_version(Elf_Shdr *sechdrs, | |||
851 | printk("%s: no version for \"%s\" found: kernel tainted.\n", | 851 | printk("%s: no version for \"%s\" found: kernel tainted.\n", |
852 | mod->name, symname); | 852 | mod->name, symname); |
853 | add_taint(TAINT_FORCED_MODULE); | 853 | add_taint(TAINT_FORCED_MODULE); |
854 | mod->taints |= TAINT_FORCED_MODULE; | ||
854 | } | 855 | } |
855 | return 1; | 856 | return 1; |
856 | } | 857 | } |
@@ -1339,6 +1340,7 @@ static void set_license(struct module *mod, const char *license) | |||
1339 | printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", | 1340 | printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", |
1340 | mod->name, license); | 1341 | mod->name, license); |
1341 | add_taint(TAINT_PROPRIETARY_MODULE); | 1342 | add_taint(TAINT_PROPRIETARY_MODULE); |
1343 | mod->taints |= TAINT_PROPRIETARY_MODULE; | ||
1342 | } | 1344 | } |
1343 | } | 1345 | } |
1344 | 1346 | ||
@@ -1618,6 +1620,7 @@ static struct module *load_module(void __user *umod, | |||
1618 | /* This is allowed: modprobe --force will invalidate it. */ | 1620 | /* This is allowed: modprobe --force will invalidate it. */ |
1619 | if (!modmagic) { | 1621 | if (!modmagic) { |
1620 | add_taint(TAINT_FORCED_MODULE); | 1622 | add_taint(TAINT_FORCED_MODULE); |
1623 | mod->taints |= TAINT_FORCED_MODULE; | ||
1621 | printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", | 1624 | printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", |
1622 | mod->name); | 1625 | mod->name); |
1623 | } else if (!same_magic(modmagic, vermagic)) { | 1626 | } else if (!same_magic(modmagic, vermagic)) { |
@@ -1711,10 +1714,14 @@ static struct module *load_module(void __user *umod, | |||
1711 | /* Set up license info based on the info section */ | 1714 | /* Set up license info based on the info section */ |
1712 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); | 1715 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); |
1713 | 1716 | ||
1714 | if (strcmp(mod->name, "ndiswrapper") == 0) | 1717 | if (strcmp(mod->name, "ndiswrapper") == 0) { |
1715 | add_taint(TAINT_PROPRIETARY_MODULE); | 1718 | add_taint(TAINT_PROPRIETARY_MODULE); |
1716 | if (strcmp(mod->name, "driverloader") == 0) | 1719 | mod->taints |= TAINT_PROPRIETARY_MODULE; |
1720 | } | ||
1721 | if (strcmp(mod->name, "driverloader") == 0) { | ||
1717 | add_taint(TAINT_PROPRIETARY_MODULE); | 1722 | add_taint(TAINT_PROPRIETARY_MODULE); |
1723 | mod->taints |= TAINT_PROPRIETARY_MODULE; | ||
1724 | } | ||
1718 | 1725 | ||
1719 | /* Set up MODINFO_ATTR fields */ | 1726 | /* Set up MODINFO_ATTR fields */ |
1720 | setup_modinfo(mod, sechdrs, infoindex); | 1727 | setup_modinfo(mod, sechdrs, infoindex); |
@@ -1760,6 +1767,7 @@ static struct module *load_module(void __user *umod, | |||
1760 | printk(KERN_WARNING "%s: No versions for exported symbols." | 1767 | printk(KERN_WARNING "%s: No versions for exported symbols." |
1761 | " Tainting kernel.\n", mod->name); | 1768 | " Tainting kernel.\n", mod->name); |
1762 | add_taint(TAINT_FORCED_MODULE); | 1769 | add_taint(TAINT_FORCED_MODULE); |
1770 | mod->taints |= TAINT_FORCED_MODULE; | ||
1763 | } | 1771 | } |
1764 | #endif | 1772 | #endif |
1765 | 1773 | ||
@@ -2032,7 +2040,8 @@ const char *module_address_lookup(unsigned long addr, | |||
2032 | list_for_each_entry(mod, &modules, list) { | 2040 | list_for_each_entry(mod, &modules, list) { |
2033 | if (within(addr, mod->module_init, mod->init_size) | 2041 | if (within(addr, mod->module_init, mod->init_size) |
2034 | || within(addr, mod->module_core, mod->core_size)) { | 2042 | || within(addr, mod->module_core, mod->core_size)) { |
2035 | *modname = mod->name; | 2043 | if (modname) |
2044 | *modname = mod->name; | ||
2036 | return get_ksymbol(mod, addr, size, offset); | 2045 | return get_ksymbol(mod, addr, size, offset); |
2037 | } | 2046 | } |
2038 | } | 2047 | } |
@@ -2226,14 +2235,37 @@ struct module *module_text_address(unsigned long addr) | |||
2226 | return mod; | 2235 | return mod; |
2227 | } | 2236 | } |
2228 | 2237 | ||
2238 | static char *taint_flags(unsigned int taints, char *buf) | ||
2239 | { | ||
2240 | *buf = '\0'; | ||
2241 | if (taints) { | ||
2242 | int bx; | ||
2243 | |||
2244 | buf[0] = '('; | ||
2245 | bx = 1; | ||
2246 | if (taints & TAINT_PROPRIETARY_MODULE) | ||
2247 | buf[bx++] = 'P'; | ||
2248 | if (taints & TAINT_FORCED_MODULE) | ||
2249 | buf[bx++] = 'F'; | ||
2250 | /* | ||
2251 | * TAINT_FORCED_RMMOD: could be added. | ||
2252 | * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't | ||
2253 | * apply to modules. | ||
2254 | */ | ||
2255 | buf[bx] = ')'; | ||
2256 | } | ||
2257 | return buf; | ||
2258 | } | ||
2259 | |||
2229 | /* Don't grab lock, we're oopsing. */ | 2260 | /* Don't grab lock, we're oopsing. */ |
2230 | void print_modules(void) | 2261 | void print_modules(void) |
2231 | { | 2262 | { |
2232 | struct module *mod; | 2263 | struct module *mod; |
2264 | char buf[8]; | ||
2233 | 2265 | ||
2234 | printk("Modules linked in:"); | 2266 | printk("Modules linked in:"); |
2235 | list_for_each_entry(mod, &modules, list) | 2267 | list_for_each_entry(mod, &modules, list) |
2236 | printk(" %s", mod->name); | 2268 | printk(" %s%s", mod->name, taint_flags(mod->taints, buf)); |
2237 | printk("\n"); | 2269 | printk("\n"); |
2238 | } | 2270 | } |
2239 | 2271 | ||
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c new file mode 100644 index 0000000000..6ebdb82a0c --- /dev/null +++ b/kernel/nsproxy.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006 IBM Corporation | ||
3 | * | ||
4 | * Author: Serge Hallyn <serue@us.ibm.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation, version 2 of the | ||
9 | * License. | ||
10 | * | ||
11 | * Jun 2006 - namespaces support | ||
12 | * OpenVZ, SWsoft Inc. | ||
13 | * Pavel Emelianov <xemul@openvz.org> | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/version.h> | ||
18 | #include <linux/nsproxy.h> | ||
19 | #include <linux/init_task.h> | ||
20 | #include <linux/namespace.h> | ||
21 | #include <linux/utsname.h> | ||
22 | |||
23 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); | ||
24 | |||
25 | static inline void get_nsproxy(struct nsproxy *ns) | ||
26 | { | ||
27 | atomic_inc(&ns->count); | ||
28 | } | ||
29 | |||
30 | void get_task_namespaces(struct task_struct *tsk) | ||
31 | { | ||
32 | struct nsproxy *ns = tsk->nsproxy; | ||
33 | if (ns) { | ||
34 | get_nsproxy(ns); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * creates a copy of "orig" with refcount 1. | ||
40 | * This does not grab references to the contained namespaces, | ||
41 | * so that needs to be done by dup_namespaces. | ||
42 | */ | ||
43 | static inline struct nsproxy *clone_namespaces(struct nsproxy *orig) | ||
44 | { | ||
45 | struct nsproxy *ns; | ||
46 | |||
47 | ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL); | ||
48 | if (ns) { | ||
49 | memcpy(ns, orig, sizeof(struct nsproxy)); | ||
50 | atomic_set(&ns->count, 1); | ||
51 | } | ||
52 | return ns; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * copies the nsproxy, setting refcount to 1, and grabbing a | ||
57 | * reference to all contained namespaces. Called from | ||
58 | * sys_unshare() | ||
59 | */ | ||
60 | struct nsproxy *dup_namespaces(struct nsproxy *orig) | ||
61 | { | ||
62 | struct nsproxy *ns = clone_namespaces(orig); | ||
63 | |||
64 | if (ns) { | ||
65 | if (ns->namespace) | ||
66 | get_namespace(ns->namespace); | ||
67 | if (ns->uts_ns) | ||
68 | get_uts_ns(ns->uts_ns); | ||
69 | if (ns->ipc_ns) | ||
70 | get_ipc_ns(ns->ipc_ns); | ||
71 | } | ||
72 | |||
73 | return ns; | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * called from clone. This now handles copy for nsproxy and all | ||
78 | * namespaces therein. | ||
79 | */ | ||
80 | int copy_namespaces(int flags, struct task_struct *tsk) | ||
81 | { | ||
82 | struct nsproxy *old_ns = tsk->nsproxy; | ||
83 | struct nsproxy *new_ns; | ||
84 | int err = 0; | ||
85 | |||
86 | if (!old_ns) | ||
87 | return 0; | ||
88 | |||
89 | get_nsproxy(old_ns); | ||
90 | |||
91 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) | ||
92 | return 0; | ||
93 | |||
94 | new_ns = clone_namespaces(old_ns); | ||
95 | if (!new_ns) { | ||
96 | err = -ENOMEM; | ||
97 | goto out; | ||
98 | } | ||
99 | |||
100 | tsk->nsproxy = new_ns; | ||
101 | |||
102 | err = copy_namespace(flags, tsk); | ||
103 | if (err) | ||
104 | goto out_ns; | ||
105 | |||
106 | err = copy_utsname(flags, tsk); | ||
107 | if (err) | ||
108 | goto out_uts; | ||
109 | |||
110 | err = copy_ipcs(flags, tsk); | ||
111 | if (err) | ||
112 | goto out_ipc; | ||
113 | |||
114 | out: | ||
115 | put_nsproxy(old_ns); | ||
116 | return err; | ||
117 | |||
118 | out_ipc: | ||
119 | if (new_ns->uts_ns) | ||
120 | put_uts_ns(new_ns->uts_ns); | ||
121 | out_uts: | ||
122 | if (new_ns->namespace) | ||
123 | put_namespace(new_ns->namespace); | ||
124 | out_ns: | ||
125 | tsk->nsproxy = old_ns; | ||
126 | kfree(new_ns); | ||
127 | goto out; | ||
128 | } | ||
129 | |||
130 | void free_nsproxy(struct nsproxy *ns) | ||
131 | { | ||
132 | if (ns->namespace) | ||
133 | put_namespace(ns->namespace); | ||
134 | if (ns->uts_ns) | ||
135 | put_uts_ns(ns->uts_ns); | ||
136 | if (ns->ipc_ns) | ||
137 | put_ipc_ns(ns->ipc_ns); | ||
138 | kfree(ns); | ||
139 | } | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 6ceb664fb5..525e365f72 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/debug_locks.h> | 21 | #include <linux/debug_locks.h> |
22 | 22 | ||
23 | int panic_on_oops; | 23 | int panic_on_oops; |
24 | int panic_on_unrecovered_nmi; | ||
25 | int tainted; | 24 | int tainted; |
26 | static int pause_on_oops; | 25 | static int pause_on_oops; |
27 | static int pause_on_oops_flag; | 26 | static int pause_on_oops_flag; |
diff --git a/kernel/pid.c b/kernel/pid.c index 8387e8c681..b914392085 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/hash.h> | 28 | #include <linux/hash.h> |
29 | #include <linux/pspace.h> | ||
29 | 30 | ||
30 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) | 31 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) |
31 | static struct hlist_head *pid_hash; | 32 | static struct hlist_head *pid_hash; |
@@ -33,17 +34,20 @@ static int pidhash_shift; | |||
33 | static kmem_cache_t *pid_cachep; | 34 | static kmem_cache_t *pid_cachep; |
34 | 35 | ||
35 | int pid_max = PID_MAX_DEFAULT; | 36 | int pid_max = PID_MAX_DEFAULT; |
36 | int last_pid; | ||
37 | 37 | ||
38 | #define RESERVED_PIDS 300 | 38 | #define RESERVED_PIDS 300 |
39 | 39 | ||
40 | int pid_max_min = RESERVED_PIDS + 1; | 40 | int pid_max_min = RESERVED_PIDS + 1; |
41 | int pid_max_max = PID_MAX_LIMIT; | 41 | int pid_max_max = PID_MAX_LIMIT; |
42 | 42 | ||
43 | #define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) | ||
44 | #define BITS_PER_PAGE (PAGE_SIZE*8) | 43 | #define BITS_PER_PAGE (PAGE_SIZE*8) |
45 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) | 44 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) |
46 | #define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off)) | 45 | |
46 | static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off) | ||
47 | { | ||
48 | return (map - pspace->pidmap)*BITS_PER_PAGE + off; | ||
49 | } | ||
50 | |||
47 | #define find_next_offset(map, off) \ | 51 | #define find_next_offset(map, off) \ |
48 | find_next_zero_bit((map)->page, BITS_PER_PAGE, off) | 52 | find_next_zero_bit((map)->page, BITS_PER_PAGE, off) |
49 | 53 | ||
@@ -53,13 +57,12 @@ int pid_max_max = PID_MAX_LIMIT; | |||
53 | * value does not cause lots of bitmaps to be allocated, but | 57 | * value does not cause lots of bitmaps to be allocated, but |
54 | * the scheme scales to up to 4 million PIDs, runtime. | 58 | * the scheme scales to up to 4 million PIDs, runtime. |
55 | */ | 59 | */ |
56 | typedef struct pidmap { | 60 | struct pspace init_pspace = { |
57 | atomic_t nr_free; | 61 | .pidmap = { |
58 | void *page; | 62 | [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } |
59 | } pidmap_t; | 63 | }, |
60 | 64 | .last_pid = 0 | |
61 | static pidmap_t pidmap_array[PIDMAP_ENTRIES] = | 65 | }; |
62 | { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; | ||
63 | 66 | ||
64 | /* | 67 | /* |
65 | * Note: disable interrupts while the pidmap_lock is held as an | 68 | * Note: disable interrupts while the pidmap_lock is held as an |
@@ -74,40 +77,41 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] = | |||
74 | * irq handlers that take it we can leave the interrupts enabled. | 77 | * irq handlers that take it we can leave the interrupts enabled. |
75 | * For now it is easier to be safe than to prove it can't happen. | 78 | * For now it is easier to be safe than to prove it can't happen. |
76 | */ | 79 | */ |
80 | |||
77 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); | 81 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); |
78 | 82 | ||
79 | static fastcall void free_pidmap(int pid) | 83 | static fastcall void free_pidmap(struct pspace *pspace, int pid) |
80 | { | 84 | { |
81 | pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; | 85 | struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE; |
82 | int offset = pid & BITS_PER_PAGE_MASK; | 86 | int offset = pid & BITS_PER_PAGE_MASK; |
83 | 87 | ||
84 | clear_bit(offset, map->page); | 88 | clear_bit(offset, map->page); |
85 | atomic_inc(&map->nr_free); | 89 | atomic_inc(&map->nr_free); |
86 | } | 90 | } |
87 | 91 | ||
88 | static int alloc_pidmap(void) | 92 | static int alloc_pidmap(struct pspace *pspace) |
89 | { | 93 | { |
90 | int i, offset, max_scan, pid, last = last_pid; | 94 | int i, offset, max_scan, pid, last = pspace->last_pid; |
91 | pidmap_t *map; | 95 | struct pidmap *map; |
92 | 96 | ||
93 | pid = last + 1; | 97 | pid = last + 1; |
94 | if (pid >= pid_max) | 98 | if (pid >= pid_max) |
95 | pid = RESERVED_PIDS; | 99 | pid = RESERVED_PIDS; |
96 | offset = pid & BITS_PER_PAGE_MASK; | 100 | offset = pid & BITS_PER_PAGE_MASK; |
97 | map = &pidmap_array[pid/BITS_PER_PAGE]; | 101 | map = &pspace->pidmap[pid/BITS_PER_PAGE]; |
98 | max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; | 102 | max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; |
99 | for (i = 0; i <= max_scan; ++i) { | 103 | for (i = 0; i <= max_scan; ++i) { |
100 | if (unlikely(!map->page)) { | 104 | if (unlikely(!map->page)) { |
101 | unsigned long page = get_zeroed_page(GFP_KERNEL); | 105 | void *page = kzalloc(PAGE_SIZE, GFP_KERNEL); |
102 | /* | 106 | /* |
103 | * Free the page if someone raced with us | 107 | * Free the page if someone raced with us |
104 | * installing it: | 108 | * installing it: |
105 | */ | 109 | */ |
106 | spin_lock_irq(&pidmap_lock); | 110 | spin_lock_irq(&pidmap_lock); |
107 | if (map->page) | 111 | if (map->page) |
108 | free_page(page); | 112 | kfree(page); |
109 | else | 113 | else |
110 | map->page = (void *)page; | 114 | map->page = page; |
111 | spin_unlock_irq(&pidmap_lock); | 115 | spin_unlock_irq(&pidmap_lock); |
112 | if (unlikely(!map->page)) | 116 | if (unlikely(!map->page)) |
113 | break; | 117 | break; |
@@ -116,11 +120,11 @@ static int alloc_pidmap(void) | |||
116 | do { | 120 | do { |
117 | if (!test_and_set_bit(offset, map->page)) { | 121 | if (!test_and_set_bit(offset, map->page)) { |
118 | atomic_dec(&map->nr_free); | 122 | atomic_dec(&map->nr_free); |
119 | last_pid = pid; | 123 | pspace->last_pid = pid; |
120 | return pid; | 124 | return pid; |
121 | } | 125 | } |
122 | offset = find_next_offset(map, offset); | 126 | offset = find_next_offset(map, offset); |
123 | pid = mk_pid(map, offset); | 127 | pid = mk_pid(pspace, map, offset); |
124 | /* | 128 | /* |
125 | * find_next_offset() found a bit, the pid from it | 129 | * find_next_offset() found a bit, the pid from it |
126 | * is in-bounds, and if we fell back to the last | 130 | * is in-bounds, and if we fell back to the last |
@@ -131,16 +135,34 @@ static int alloc_pidmap(void) | |||
131 | (i != max_scan || pid < last || | 135 | (i != max_scan || pid < last || |
132 | !((last+1) & BITS_PER_PAGE_MASK))); | 136 | !((last+1) & BITS_PER_PAGE_MASK))); |
133 | } | 137 | } |
134 | if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) { | 138 | if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) { |
135 | ++map; | 139 | ++map; |
136 | offset = 0; | 140 | offset = 0; |
137 | } else { | 141 | } else { |
138 | map = &pidmap_array[0]; | 142 | map = &pspace->pidmap[0]; |
139 | offset = RESERVED_PIDS; | 143 | offset = RESERVED_PIDS; |
140 | if (unlikely(last == offset)) | 144 | if (unlikely(last == offset)) |
141 | break; | 145 | break; |
142 | } | 146 | } |
143 | pid = mk_pid(map, offset); | 147 | pid = mk_pid(pspace, map, offset); |
148 | } | ||
149 | return -1; | ||
150 | } | ||
151 | |||
152 | static int next_pidmap(struct pspace *pspace, int last) | ||
153 | { | ||
154 | int offset; | ||
155 | struct pidmap *map, *end; | ||
156 | |||
157 | offset = (last + 1) & BITS_PER_PAGE_MASK; | ||
158 | map = &pspace->pidmap[(last + 1)/BITS_PER_PAGE]; | ||
159 | end = &pspace->pidmap[PIDMAP_ENTRIES]; | ||
160 | for (; map < end; map++, offset = 0) { | ||
161 | if (unlikely(!map->page)) | ||
162 | continue; | ||
163 | offset = find_next_bit((map)->page, BITS_PER_PAGE, offset); | ||
164 | if (offset < BITS_PER_PAGE) | ||
165 | return mk_pid(pspace, map, offset); | ||
144 | } | 166 | } |
145 | return -1; | 167 | return -1; |
146 | } | 168 | } |
@@ -153,6 +175,7 @@ fastcall void put_pid(struct pid *pid) | |||
153 | atomic_dec_and_test(&pid->count)) | 175 | atomic_dec_and_test(&pid->count)) |
154 | kmem_cache_free(pid_cachep, pid); | 176 | kmem_cache_free(pid_cachep, pid); |
155 | } | 177 | } |
178 | EXPORT_SYMBOL_GPL(put_pid); | ||
156 | 179 | ||
157 | static void delayed_put_pid(struct rcu_head *rhp) | 180 | static void delayed_put_pid(struct rcu_head *rhp) |
158 | { | 181 | { |
@@ -169,7 +192,7 @@ fastcall void free_pid(struct pid *pid) | |||
169 | hlist_del_rcu(&pid->pid_chain); | 192 | hlist_del_rcu(&pid->pid_chain); |
170 | spin_unlock_irqrestore(&pidmap_lock, flags); | 193 | spin_unlock_irqrestore(&pidmap_lock, flags); |
171 | 194 | ||
172 | free_pidmap(pid->nr); | 195 | free_pidmap(&init_pspace, pid->nr); |
173 | call_rcu(&pid->rcu, delayed_put_pid); | 196 | call_rcu(&pid->rcu, delayed_put_pid); |
174 | } | 197 | } |
175 | 198 | ||
@@ -183,7 +206,7 @@ struct pid *alloc_pid(void) | |||
183 | if (!pid) | 206 | if (!pid) |
184 | goto out; | 207 | goto out; |
185 | 208 | ||
186 | nr = alloc_pidmap(); | 209 | nr = alloc_pidmap(&init_pspace); |
187 | if (nr < 0) | 210 | if (nr < 0) |
188 | goto out_free; | 211 | goto out_free; |
189 | 212 | ||
@@ -217,6 +240,7 @@ struct pid * fastcall find_pid(int nr) | |||
217 | } | 240 | } |
218 | return NULL; | 241 | return NULL; |
219 | } | 242 | } |
243 | EXPORT_SYMBOL_GPL(find_pid); | ||
220 | 244 | ||
221 | int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr) | 245 | int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr) |
222 | { | 246 | { |
@@ -280,6 +304,15 @@ struct task_struct *find_task_by_pid_type(int type, int nr) | |||
280 | 304 | ||
281 | EXPORT_SYMBOL(find_task_by_pid_type); | 305 | EXPORT_SYMBOL(find_task_by_pid_type); |
282 | 306 | ||
307 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | ||
308 | { | ||
309 | struct pid *pid; | ||
310 | rcu_read_lock(); | ||
311 | pid = get_pid(task->pids[type].pid); | ||
312 | rcu_read_unlock(); | ||
313 | return pid; | ||
314 | } | ||
315 | |||
283 | struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type) | 316 | struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type) |
284 | { | 317 | { |
285 | struct task_struct *result; | 318 | struct task_struct *result; |
@@ -303,6 +336,26 @@ struct pid *find_get_pid(pid_t nr) | |||
303 | } | 336 | } |
304 | 337 | ||
305 | /* | 338 | /* |
339 | * Used by proc to find the first pid that is greater then or equal to nr. | ||
340 | * | ||
341 | * If there is a pid at nr this function is exactly the same as find_pid. | ||
342 | */ | ||
343 | struct pid *find_ge_pid(int nr) | ||
344 | { | ||
345 | struct pid *pid; | ||
346 | |||
347 | do { | ||
348 | pid = find_pid(nr); | ||
349 | if (pid) | ||
350 | break; | ||
351 | nr = next_pidmap(&init_pspace, nr); | ||
352 | } while (nr > 0); | ||
353 | |||
354 | return pid; | ||
355 | } | ||
356 | EXPORT_SYMBOL_GPL(find_get_pid); | ||
357 | |||
358 | /* | ||
306 | * The pid hash table is scaled according to the amount of memory in the | 359 | * The pid hash table is scaled according to the amount of memory in the |
307 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or | 360 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or |
308 | * more. | 361 | * more. |
@@ -329,10 +382,10 @@ void __init pidhash_init(void) | |||
329 | 382 | ||
330 | void __init pidmap_init(void) | 383 | void __init pidmap_init(void) |
331 | { | 384 | { |
332 | pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); | 385 | init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); |
333 | /* Reserve PID 0. We never call free_pidmap(0) */ | 386 | /* Reserve PID 0. We never call free_pidmap(0) */ |
334 | set_bit(0, pidmap_array->page); | 387 | set_bit(0, init_pspace.pidmap[0].page); |
335 | atomic_dec(&pidmap_array->nr_free); | 388 | atomic_dec(&init_pspace.pidmap[0].nr_free); |
336 | 389 | ||
337 | pid_cachep = kmem_cache_create("pid", sizeof(struct pid), | 390 | pid_cachep = kmem_cache_create("pid", sizeof(struct pid), |
338 | __alignof__(struct pid), | 391 | __alignof__(struct pid), |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 1b84313cba..99f9b7d177 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -906,7 +906,7 @@ static void init_header(struct swsusp_info *info) | |||
906 | memset(info, 0, sizeof(struct swsusp_info)); | 906 | memset(info, 0, sizeof(struct swsusp_info)); |
907 | info->version_code = LINUX_VERSION_CODE; | 907 | info->version_code = LINUX_VERSION_CODE; |
908 | info->num_physpages = num_physpages; | 908 | info->num_physpages = num_physpages; |
909 | memcpy(&info->uts, &system_utsname, sizeof(system_utsname)); | 909 | memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); |
910 | info->cpus = num_online_cpus(); | 910 | info->cpus = num_online_cpus(); |
911 | info->image_pages = nr_copy_pages; | 911 | info->image_pages = nr_copy_pages; |
912 | info->pages = nr_copy_pages + nr_meta_pages + 1; | 912 | info->pages = nr_copy_pages + nr_meta_pages + 1; |
@@ -1050,13 +1050,13 @@ static inline int check_header(struct swsusp_info *info) | |||
1050 | reason = "kernel version"; | 1050 | reason = "kernel version"; |
1051 | if (info->num_physpages != num_physpages) | 1051 | if (info->num_physpages != num_physpages) |
1052 | reason = "memory size"; | 1052 | reason = "memory size"; |
1053 | if (strcmp(info->uts.sysname,system_utsname.sysname)) | 1053 | if (strcmp(info->uts.sysname,init_utsname()->sysname)) |
1054 | reason = "system type"; | 1054 | reason = "system type"; |
1055 | if (strcmp(info->uts.release,system_utsname.release)) | 1055 | if (strcmp(info->uts.release,init_utsname()->release)) |
1056 | reason = "kernel release"; | 1056 | reason = "kernel release"; |
1057 | if (strcmp(info->uts.version,system_utsname.version)) | 1057 | if (strcmp(info->uts.version,init_utsname()->version)) |
1058 | reason = "version"; | 1058 | reason = "version"; |
1059 | if (strcmp(info->uts.machine,system_utsname.machine)) | 1059 | if (strcmp(info->uts.machine,init_utsname()->machine)) |
1060 | reason = "machine"; | 1060 | reason = "machine"; |
1061 | if (reason) { | 1061 | if (reason) { |
1062 | printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason); | 1062 | printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason); |
diff --git a/kernel/resource.c b/kernel/resource.c index 9db38a1a75..6de60c1214 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -193,6 +193,13 @@ static int __release_resource(struct resource *old) | |||
193 | return -EINVAL; | 193 | return -EINVAL; |
194 | } | 194 | } |
195 | 195 | ||
196 | /** | ||
197 | * request_resource - request and reserve an I/O or memory resource | ||
198 | * @root: root resource descriptor | ||
199 | * @new: resource descriptor desired by caller | ||
200 | * | ||
201 | * Returns 0 for success, negative error code on error. | ||
202 | */ | ||
196 | int request_resource(struct resource *root, struct resource *new) | 203 | int request_resource(struct resource *root, struct resource *new) |
197 | { | 204 | { |
198 | struct resource *conflict; | 205 | struct resource *conflict; |
@@ -205,6 +212,15 @@ int request_resource(struct resource *root, struct resource *new) | |||
205 | 212 | ||
206 | EXPORT_SYMBOL(request_resource); | 213 | EXPORT_SYMBOL(request_resource); |
207 | 214 | ||
215 | /** | ||
216 | * ____request_resource - reserve a resource, with resource conflict returned | ||
217 | * @root: root resource descriptor | ||
218 | * @new: resource descriptor desired by caller | ||
219 | * | ||
220 | * Returns: | ||
221 | * On success, NULL is returned. | ||
222 | * On error, a pointer to the conflicting resource is returned. | ||
223 | */ | ||
208 | struct resource *____request_resource(struct resource *root, struct resource *new) | 224 | struct resource *____request_resource(struct resource *root, struct resource *new) |
209 | { | 225 | { |
210 | struct resource *conflict; | 226 | struct resource *conflict; |
@@ -217,6 +233,10 @@ struct resource *____request_resource(struct resource *root, struct resource *ne | |||
217 | 233 | ||
218 | EXPORT_SYMBOL(____request_resource); | 234 | EXPORT_SYMBOL(____request_resource); |
219 | 235 | ||
236 | /** | ||
237 | * release_resource - release a previously reserved resource | ||
238 | * @old: resource pointer | ||
239 | */ | ||
220 | int release_resource(struct resource *old) | 240 | int release_resource(struct resource *old) |
221 | { | 241 | { |
222 | int retval; | 242 | int retval; |
@@ -315,8 +335,16 @@ static int find_resource(struct resource *root, struct resource *new, | |||
315 | return -EBUSY; | 335 | return -EBUSY; |
316 | } | 336 | } |
317 | 337 | ||
318 | /* | 338 | /** |
319 | * Allocate empty slot in the resource tree given range and alignment. | 339 | * allocate_resource - allocate empty slot in the resource tree given range & alignment |
340 | * @root: root resource descriptor | ||
341 | * @new: resource descriptor desired by caller | ||
342 | * @size: requested resource region size | ||
343 | * @min: minimum size to allocate | ||
344 | * @max: maximum size to allocate | ||
345 | * @align: alignment requested, in bytes | ||
346 | * @alignf: alignment function, optional, called if not NULL | ||
347 | * @alignf_data: arbitrary data to pass to the @alignf function | ||
320 | */ | 348 | */ |
321 | int allocate_resource(struct resource *root, struct resource *new, | 349 | int allocate_resource(struct resource *root, struct resource *new, |
322 | resource_size_t size, resource_size_t min, | 350 | resource_size_t size, resource_size_t min, |
@@ -407,10 +435,15 @@ int insert_resource(struct resource *parent, struct resource *new) | |||
407 | return result; | 435 | return result; |
408 | } | 436 | } |
409 | 437 | ||
410 | /* | 438 | /** |
439 | * adjust_resource - modify a resource's start and size | ||
440 | * @res: resource to modify | ||
441 | * @start: new start value | ||
442 | * @size: new size | ||
443 | * | ||
411 | * Given an existing resource, change its start and size to match the | 444 | * Given an existing resource, change its start and size to match the |
412 | * arguments. Returns -EBUSY if it can't fit. Existing children of | 445 | * arguments. Returns 0 on success, -EBUSY if it can't fit. |
413 | * the resource are assumed to be immutable. | 446 | * Existing children of the resource are assumed to be immutable. |
414 | */ | 447 | */ |
415 | int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) | 448 | int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) |
416 | { | 449 | { |
@@ -456,11 +489,19 @@ EXPORT_SYMBOL(adjust_resource); | |||
456 | * Note how this, unlike the above, knows about | 489 | * Note how this, unlike the above, knows about |
457 | * the IO flag meanings (busy etc). | 490 | * the IO flag meanings (busy etc). |
458 | * | 491 | * |
459 | * Request-region creates a new busy region. | 492 | * request_region creates a new busy region. |
460 | * | 493 | * |
461 | * Check-region returns non-zero if the area is already busy | 494 | * check_region returns non-zero if the area is already busy. |
462 | * | 495 | * |
463 | * Release-region releases a matching busy region. | 496 | * release_region releases a matching busy region. |
497 | */ | ||
498 | |||
499 | /** | ||
500 | * __request_region - create a new busy resource region | ||
501 | * @parent: parent resource descriptor | ||
502 | * @start: resource start address | ||
503 | * @n: resource region size | ||
504 | * @name: reserving caller's ID string | ||
464 | */ | 505 | */ |
465 | struct resource * __request_region(struct resource *parent, | 506 | struct resource * __request_region(struct resource *parent, |
466 | resource_size_t start, resource_size_t n, | 507 | resource_size_t start, resource_size_t n, |
@@ -497,9 +538,23 @@ struct resource * __request_region(struct resource *parent, | |||
497 | } | 538 | } |
498 | return res; | 539 | return res; |
499 | } | 540 | } |
500 | |||
501 | EXPORT_SYMBOL(__request_region); | 541 | EXPORT_SYMBOL(__request_region); |
502 | 542 | ||
543 | /** | ||
544 | * __check_region - check if a resource region is busy or free | ||
545 | * @parent: parent resource descriptor | ||
546 | * @start: resource start address | ||
547 | * @n: resource region size | ||
548 | * | ||
549 | * Returns 0 if the region is free at the moment it is checked, | ||
550 | * returns %-EBUSY if the region is busy. | ||
551 | * | ||
552 | * NOTE: | ||
553 | * This function is deprecated because its use is racy. | ||
554 | * Even if it returns 0, a subsequent call to request_region() | ||
555 | * may fail because another driver etc. just allocated the region. | ||
556 | * Do NOT use it. It will be removed from the kernel. | ||
557 | */ | ||
503 | int __check_region(struct resource *parent, resource_size_t start, | 558 | int __check_region(struct resource *parent, resource_size_t start, |
504 | resource_size_t n) | 559 | resource_size_t n) |
505 | { | 560 | { |
@@ -513,9 +568,16 @@ int __check_region(struct resource *parent, resource_size_t start, | |||
513 | kfree(res); | 568 | kfree(res); |
514 | return 0; | 569 | return 0; |
515 | } | 570 | } |
516 | |||
517 | EXPORT_SYMBOL(__check_region); | 571 | EXPORT_SYMBOL(__check_region); |
518 | 572 | ||
573 | /** | ||
574 | * __release_region - release a previously reserved resource region | ||
575 | * @parent: parent resource descriptor | ||
576 | * @start: resource start address | ||
577 | * @n: resource region size | ||
578 | * | ||
579 | * The described resource region must match a currently busy region. | ||
580 | */ | ||
519 | void __release_region(struct resource *parent, resource_size_t start, | 581 | void __release_region(struct resource *parent, resource_size_t start, |
520 | resource_size_t n) | 582 | resource_size_t n) |
521 | { | 583 | { |
@@ -553,7 +615,6 @@ void __release_region(struct resource *parent, resource_size_t start, | |||
553 | "<%016llx-%016llx>\n", (unsigned long long)start, | 615 | "<%016llx-%016llx>\n", (unsigned long long)start, |
554 | (unsigned long long)end); | 616 | (unsigned long long)end); |
555 | } | 617 | } |
556 | |||
557 | EXPORT_SYMBOL(__release_region); | 618 | EXPORT_SYMBOL(__release_region); |
558 | 619 | ||
559 | /* | 620 | /* |
diff --git a/kernel/sched.c b/kernel/sched.c index 74f169ac07..53608a59d6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -49,7 +49,7 @@ | |||
49 | #include <linux/seq_file.h> | 49 | #include <linux/seq_file.h> |
50 | #include <linux/syscalls.h> | 50 | #include <linux/syscalls.h> |
51 | #include <linux/times.h> | 51 | #include <linux/times.h> |
52 | #include <linux/acct.h> | 52 | #include <linux/tsacct_kern.h> |
53 | #include <linux/kprobes.h> | 53 | #include <linux/kprobes.h> |
54 | #include <linux/delayacct.h> | 54 | #include <linux/delayacct.h> |
55 | #include <asm/tlb.h> | 55 | #include <asm/tlb.h> |
@@ -1232,7 +1232,7 @@ nextgroup: | |||
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | /* | 1234 | /* |
1235 | * find_idlest_queue - find the idlest runqueue among the cpus in group. | 1235 | * find_idlest_cpu - find the idlest cpu among the cpus in group. |
1236 | */ | 1236 | */ |
1237 | static int | 1237 | static int |
1238 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | 1238 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) |
@@ -1286,21 +1286,29 @@ static int sched_balance_self(int cpu, int flag) | |||
1286 | while (sd) { | 1286 | while (sd) { |
1287 | cpumask_t span; | 1287 | cpumask_t span; |
1288 | struct sched_group *group; | 1288 | struct sched_group *group; |
1289 | int new_cpu; | 1289 | int new_cpu, weight; |
1290 | int weight; | 1290 | |
1291 | if (!(sd->flags & flag)) { | ||
1292 | sd = sd->child; | ||
1293 | continue; | ||
1294 | } | ||
1291 | 1295 | ||
1292 | span = sd->span; | 1296 | span = sd->span; |
1293 | group = find_idlest_group(sd, t, cpu); | 1297 | group = find_idlest_group(sd, t, cpu); |
1294 | if (!group) | 1298 | if (!group) { |
1295 | goto nextlevel; | 1299 | sd = sd->child; |
1300 | continue; | ||
1301 | } | ||
1296 | 1302 | ||
1297 | new_cpu = find_idlest_cpu(group, t, cpu); | 1303 | new_cpu = find_idlest_cpu(group, t, cpu); |
1298 | if (new_cpu == -1 || new_cpu == cpu) | 1304 | if (new_cpu == -1 || new_cpu == cpu) { |
1299 | goto nextlevel; | 1305 | /* Now try balancing at a lower domain level of cpu */ |
1306 | sd = sd->child; | ||
1307 | continue; | ||
1308 | } | ||
1300 | 1309 | ||
1301 | /* Now try balancing at a lower domain level */ | 1310 | /* Now try balancing at a lower domain level of new_cpu */ |
1302 | cpu = new_cpu; | 1311 | cpu = new_cpu; |
1303 | nextlevel: | ||
1304 | sd = NULL; | 1312 | sd = NULL; |
1305 | weight = cpus_weight(span); | 1313 | weight = cpus_weight(span); |
1306 | for_each_domain(cpu, tmp) { | 1314 | for_each_domain(cpu, tmp) { |
@@ -2533,8 +2541,14 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2533 | struct rq *busiest; | 2541 | struct rq *busiest; |
2534 | cpumask_t cpus = CPU_MASK_ALL; | 2542 | cpumask_t cpus = CPU_MASK_ALL; |
2535 | 2543 | ||
2544 | /* | ||
2545 | * When power savings policy is enabled for the parent domain, idle | ||
2546 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
2547 | * let the state of idle sibling percolate up as IDLE, instead of | ||
2548 | * portraying it as NOT_IDLE. | ||
2549 | */ | ||
2536 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | 2550 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && |
2537 | !sched_smt_power_savings) | 2551 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2538 | sd_idle = 1; | 2552 | sd_idle = 1; |
2539 | 2553 | ||
2540 | schedstat_inc(sd, lb_cnt[idle]); | 2554 | schedstat_inc(sd, lb_cnt[idle]); |
@@ -2630,7 +2644,7 @@ redo: | |||
2630 | } | 2644 | } |
2631 | 2645 | ||
2632 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2646 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2633 | !sched_smt_power_savings) | 2647 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2634 | return -1; | 2648 | return -1; |
2635 | return nr_moved; | 2649 | return nr_moved; |
2636 | 2650 | ||
@@ -2646,7 +2660,7 @@ out_one_pinned: | |||
2646 | sd->balance_interval *= 2; | 2660 | sd->balance_interval *= 2; |
2647 | 2661 | ||
2648 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2662 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2649 | !sched_smt_power_savings) | 2663 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2650 | return -1; | 2664 | return -1; |
2651 | return 0; | 2665 | return 0; |
2652 | } | 2666 | } |
@@ -2668,7 +2682,14 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2668 | int sd_idle = 0; | 2682 | int sd_idle = 0; |
2669 | cpumask_t cpus = CPU_MASK_ALL; | 2683 | cpumask_t cpus = CPU_MASK_ALL; |
2670 | 2684 | ||
2671 | if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) | 2685 | /* |
2686 | * When power savings policy is enabled for the parent domain, idle | ||
2687 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
2688 | * let the state of idle sibling percolate up as IDLE, instead of | ||
2689 | * portraying it as NOT_IDLE. | ||
2690 | */ | ||
2691 | if (sd->flags & SD_SHARE_CPUPOWER && | ||
2692 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
2672 | sd_idle = 1; | 2693 | sd_idle = 1; |
2673 | 2694 | ||
2674 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2695 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
@@ -2709,7 +2730,8 @@ redo: | |||
2709 | 2730 | ||
2710 | if (!nr_moved) { | 2731 | if (!nr_moved) { |
2711 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2732 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); |
2712 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | 2733 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2734 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
2713 | return -1; | 2735 | return -1; |
2714 | } else | 2736 | } else |
2715 | sd->nr_balance_failed = 0; | 2737 | sd->nr_balance_failed = 0; |
@@ -2719,7 +2741,7 @@ redo: | |||
2719 | out_balanced: | 2741 | out_balanced: |
2720 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2742 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
2721 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2743 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2722 | !sched_smt_power_savings) | 2744 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2723 | return -1; | 2745 | return -1; |
2724 | sd->nr_balance_failed = 0; | 2746 | sd->nr_balance_failed = 0; |
2725 | 2747 | ||
@@ -4384,7 +4406,10 @@ EXPORT_SYMBOL(cpu_present_map); | |||
4384 | 4406 | ||
4385 | #ifndef CONFIG_SMP | 4407 | #ifndef CONFIG_SMP |
4386 | cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; | 4408 | cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; |
4409 | EXPORT_SYMBOL(cpu_online_map); | ||
4410 | |||
4387 | cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; | 4411 | cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; |
4412 | EXPORT_SYMBOL(cpu_possible_map); | ||
4388 | #endif | 4413 | #endif |
4389 | 4414 | ||
4390 | long sched_getaffinity(pid_t pid, cpumask_t *mask) | 4415 | long sched_getaffinity(pid_t pid, cpumask_t *mask) |
@@ -4814,7 +4839,7 @@ void show_state(void) | |||
4814 | * NOTE: this function does not set the idle thread's NEED_RESCHED | 4839 | * NOTE: this function does not set the idle thread's NEED_RESCHED |
4815 | * flag, to make booting more robust. | 4840 | * flag, to make booting more robust. |
4816 | */ | 4841 | */ |
4817 | void __devinit init_idle(struct task_struct *idle, int cpu) | 4842 | void __cpuinit init_idle(struct task_struct *idle, int cpu) |
4818 | { | 4843 | { |
4819 | struct rq *rq = cpu_rq(cpu); | 4844 | struct rq *rq = cpu_rq(cpu); |
4820 | unsigned long flags; | 4845 | unsigned long flags; |
@@ -5389,7 +5414,9 @@ static int sd_degenerate(struct sched_domain *sd) | |||
5389 | if (sd->flags & (SD_LOAD_BALANCE | | 5414 | if (sd->flags & (SD_LOAD_BALANCE | |
5390 | SD_BALANCE_NEWIDLE | | 5415 | SD_BALANCE_NEWIDLE | |
5391 | SD_BALANCE_FORK | | 5416 | SD_BALANCE_FORK | |
5392 | SD_BALANCE_EXEC)) { | 5417 | SD_BALANCE_EXEC | |
5418 | SD_SHARE_CPUPOWER | | ||
5419 | SD_SHARE_PKG_RESOURCES)) { | ||
5393 | if (sd->groups != sd->groups->next) | 5420 | if (sd->groups != sd->groups->next) |
5394 | return 0; | 5421 | return 0; |
5395 | } | 5422 | } |
@@ -5423,7 +5450,9 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
5423 | pflags &= ~(SD_LOAD_BALANCE | | 5450 | pflags &= ~(SD_LOAD_BALANCE | |
5424 | SD_BALANCE_NEWIDLE | | 5451 | SD_BALANCE_NEWIDLE | |
5425 | SD_BALANCE_FORK | | 5452 | SD_BALANCE_FORK | |
5426 | SD_BALANCE_EXEC); | 5453 | SD_BALANCE_EXEC | |
5454 | SD_SHARE_CPUPOWER | | ||
5455 | SD_SHARE_PKG_RESOURCES); | ||
5427 | } | 5456 | } |
5428 | if (~cflags & pflags) | 5457 | if (~cflags & pflags) |
5429 | return 0; | 5458 | return 0; |
@@ -5445,12 +5474,18 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu) | |||
5445 | struct sched_domain *parent = tmp->parent; | 5474 | struct sched_domain *parent = tmp->parent; |
5446 | if (!parent) | 5475 | if (!parent) |
5447 | break; | 5476 | break; |
5448 | if (sd_parent_degenerate(tmp, parent)) | 5477 | if (sd_parent_degenerate(tmp, parent)) { |
5449 | tmp->parent = parent->parent; | 5478 | tmp->parent = parent->parent; |
5479 | if (parent->parent) | ||
5480 | parent->parent->child = tmp; | ||
5481 | } | ||
5450 | } | 5482 | } |
5451 | 5483 | ||
5452 | if (sd && sd_degenerate(sd)) | 5484 | if (sd && sd_degenerate(sd)) { |
5453 | sd = sd->parent; | 5485 | sd = sd->parent; |
5486 | if (sd) | ||
5487 | sd->child = NULL; | ||
5488 | } | ||
5454 | 5489 | ||
5455 | sched_domain_debug(sd, cpu); | 5490 | sched_domain_debug(sd, cpu); |
5456 | 5491 | ||
@@ -5458,7 +5493,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu) | |||
5458 | } | 5493 | } |
5459 | 5494 | ||
5460 | /* cpus with isolated domains */ | 5495 | /* cpus with isolated domains */ |
5461 | static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; | 5496 | static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE; |
5462 | 5497 | ||
5463 | /* Setup the mask of cpus configured for isolated domains */ | 5498 | /* Setup the mask of cpus configured for isolated domains */ |
5464 | static int __init isolated_cpu_setup(char *str) | 5499 | static int __init isolated_cpu_setup(char *str) |
@@ -5486,15 +5521,17 @@ __setup ("isolcpus=", isolated_cpu_setup); | |||
5486 | * covered by the given span, and will set each group's ->cpumask correctly, | 5521 | * covered by the given span, and will set each group's ->cpumask correctly, |
5487 | * and ->cpu_power to 0. | 5522 | * and ->cpu_power to 0. |
5488 | */ | 5523 | */ |
5489 | static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | 5524 | static void |
5490 | int (*group_fn)(int cpu)) | 5525 | init_sched_build_groups(struct sched_group groups[], cpumask_t span, |
5526 | const cpumask_t *cpu_map, | ||
5527 | int (*group_fn)(int cpu, const cpumask_t *cpu_map)) | ||
5491 | { | 5528 | { |
5492 | struct sched_group *first = NULL, *last = NULL; | 5529 | struct sched_group *first = NULL, *last = NULL; |
5493 | cpumask_t covered = CPU_MASK_NONE; | 5530 | cpumask_t covered = CPU_MASK_NONE; |
5494 | int i; | 5531 | int i; |
5495 | 5532 | ||
5496 | for_each_cpu_mask(i, span) { | 5533 | for_each_cpu_mask(i, span) { |
5497 | int group = group_fn(i); | 5534 | int group = group_fn(i, cpu_map); |
5498 | struct sched_group *sg = &groups[group]; | 5535 | struct sched_group *sg = &groups[group]; |
5499 | int j; | 5536 | int j; |
5500 | 5537 | ||
@@ -5505,7 +5542,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5505 | sg->cpu_power = 0; | 5542 | sg->cpu_power = 0; |
5506 | 5543 | ||
5507 | for_each_cpu_mask(j, span) { | 5544 | for_each_cpu_mask(j, span) { |
5508 | if (group_fn(j) != group) | 5545 | if (group_fn(j, cpu_map) != group) |
5509 | continue; | 5546 | continue; |
5510 | 5547 | ||
5511 | cpu_set(j, covered); | 5548 | cpu_set(j, covered); |
@@ -5972,13 +6009,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map) | |||
5972 | #endif | 6009 | #endif |
5973 | ); | 6010 | ); |
5974 | if (system_state == SYSTEM_BOOTING) { | 6011 | if (system_state == SYSTEM_BOOTING) { |
5975 | printk("migration_cost="); | 6012 | if (num_online_cpus() > 1) { |
5976 | for (distance = 0; distance <= max_distance; distance++) { | 6013 | printk("migration_cost="); |
5977 | if (distance) | 6014 | for (distance = 0; distance <= max_distance; distance++) { |
5978 | printk(","); | 6015 | if (distance) |
5979 | printk("%ld", (long)migration_cost[distance] / 1000); | 6016 | printk(","); |
6017 | printk("%ld", (long)migration_cost[distance] / 1000); | ||
6018 | } | ||
6019 | printk("\n"); | ||
5980 | } | 6020 | } |
5981 | printk("\n"); | ||
5982 | } | 6021 | } |
5983 | j1 = jiffies; | 6022 | j1 = jiffies; |
5984 | if (migration_debug) | 6023 | if (migration_debug) |
@@ -6081,7 +6120,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
6081 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 6120 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
6082 | static struct sched_group sched_group_cpus[NR_CPUS]; | 6121 | static struct sched_group sched_group_cpus[NR_CPUS]; |
6083 | 6122 | ||
6084 | static int cpu_to_cpu_group(int cpu) | 6123 | static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map) |
6085 | { | 6124 | { |
6086 | return cpu; | 6125 | return cpu; |
6087 | } | 6126 | } |
@@ -6092,31 +6131,36 @@ static int cpu_to_cpu_group(int cpu) | |||
6092 | */ | 6131 | */ |
6093 | #ifdef CONFIG_SCHED_MC | 6132 | #ifdef CONFIG_SCHED_MC |
6094 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | 6133 | static DEFINE_PER_CPU(struct sched_domain, core_domains); |
6095 | static struct sched_group *sched_group_core_bycpu[NR_CPUS]; | 6134 | static struct sched_group sched_group_core[NR_CPUS]; |
6096 | #endif | 6135 | #endif |
6097 | 6136 | ||
6098 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6137 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6099 | static int cpu_to_core_group(int cpu) | 6138 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) |
6100 | { | 6139 | { |
6101 | return first_cpu(cpu_sibling_map[cpu]); | 6140 | cpumask_t mask = cpu_sibling_map[cpu]; |
6141 | cpus_and(mask, mask, *cpu_map); | ||
6142 | return first_cpu(mask); | ||
6102 | } | 6143 | } |
6103 | #elif defined(CONFIG_SCHED_MC) | 6144 | #elif defined(CONFIG_SCHED_MC) |
6104 | static int cpu_to_core_group(int cpu) | 6145 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) |
6105 | { | 6146 | { |
6106 | return cpu; | 6147 | return cpu; |
6107 | } | 6148 | } |
6108 | #endif | 6149 | #endif |
6109 | 6150 | ||
6110 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 6151 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
6111 | static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; | 6152 | static struct sched_group sched_group_phys[NR_CPUS]; |
6112 | 6153 | ||
6113 | static int cpu_to_phys_group(int cpu) | 6154 | static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map) |
6114 | { | 6155 | { |
6115 | #ifdef CONFIG_SCHED_MC | 6156 | #ifdef CONFIG_SCHED_MC |
6116 | cpumask_t mask = cpu_coregroup_map(cpu); | 6157 | cpumask_t mask = cpu_coregroup_map(cpu); |
6158 | cpus_and(mask, mask, *cpu_map); | ||
6117 | return first_cpu(mask); | 6159 | return first_cpu(mask); |
6118 | #elif defined(CONFIG_SCHED_SMT) | 6160 | #elif defined(CONFIG_SCHED_SMT) |
6119 | return first_cpu(cpu_sibling_map[cpu]); | 6161 | cpumask_t mask = cpu_sibling_map[cpu]; |
6162 | cpus_and(mask, mask, *cpu_map); | ||
6163 | return first_cpu(mask); | ||
6120 | #else | 6164 | #else |
6121 | return cpu; | 6165 | return cpu; |
6122 | #endif | 6166 | #endif |
@@ -6134,7 +6178,7 @@ static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; | |||
6134 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | 6178 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); |
6135 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; | 6179 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; |
6136 | 6180 | ||
6137 | static int cpu_to_allnodes_group(int cpu) | 6181 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map) |
6138 | { | 6182 | { |
6139 | return cpu_to_node(cpu); | 6183 | return cpu_to_node(cpu); |
6140 | } | 6184 | } |
@@ -6166,12 +6210,11 @@ next_sg: | |||
6166 | } | 6210 | } |
6167 | #endif | 6211 | #endif |
6168 | 6212 | ||
6213 | #ifdef CONFIG_NUMA | ||
6169 | /* Free memory allocated for various sched_group structures */ | 6214 | /* Free memory allocated for various sched_group structures */ |
6170 | static void free_sched_groups(const cpumask_t *cpu_map) | 6215 | static void free_sched_groups(const cpumask_t *cpu_map) |
6171 | { | 6216 | { |
6172 | int cpu; | 6217 | int cpu, i; |
6173 | #ifdef CONFIG_NUMA | ||
6174 | int i; | ||
6175 | 6218 | ||
6176 | for_each_cpu_mask(cpu, *cpu_map) { | 6219 | for_each_cpu_mask(cpu, *cpu_map) { |
6177 | struct sched_group *sched_group_allnodes | 6220 | struct sched_group *sched_group_allnodes |
@@ -6208,19 +6251,63 @@ next_sg: | |||
6208 | kfree(sched_group_nodes); | 6251 | kfree(sched_group_nodes); |
6209 | sched_group_nodes_bycpu[cpu] = NULL; | 6252 | sched_group_nodes_bycpu[cpu] = NULL; |
6210 | } | 6253 | } |
6254 | } | ||
6255 | #else | ||
6256 | static void free_sched_groups(const cpumask_t *cpu_map) | ||
6257 | { | ||
6258 | } | ||
6211 | #endif | 6259 | #endif |
6212 | for_each_cpu_mask(cpu, *cpu_map) { | 6260 | |
6213 | if (sched_group_phys_bycpu[cpu]) { | 6261 | /* |
6214 | kfree(sched_group_phys_bycpu[cpu]); | 6262 | * Initialize sched groups cpu_power. |
6215 | sched_group_phys_bycpu[cpu] = NULL; | 6263 | * |
6216 | } | 6264 | * cpu_power indicates the capacity of sched group, which is used while |
6217 | #ifdef CONFIG_SCHED_MC | 6265 | * distributing the load between different sched groups in a sched domain. |
6218 | if (sched_group_core_bycpu[cpu]) { | 6266 | * Typically cpu_power for all the groups in a sched domain will be same unless |
6219 | kfree(sched_group_core_bycpu[cpu]); | 6267 | * there are asymmetries in the topology. If there are asymmetries, group |
6220 | sched_group_core_bycpu[cpu] = NULL; | 6268 | * having more cpu_power will pickup more load compared to the group having |
6221 | } | 6269 | * less cpu_power. |
6222 | #endif | 6270 | * |
6271 | * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents | ||
6272 | * the maximum number of tasks a group can handle in the presence of other idle | ||
6273 | * or lightly loaded groups in the same sched domain. | ||
6274 | */ | ||
6275 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) | ||
6276 | { | ||
6277 | struct sched_domain *child; | ||
6278 | struct sched_group *group; | ||
6279 | |||
6280 | WARN_ON(!sd || !sd->groups); | ||
6281 | |||
6282 | if (cpu != first_cpu(sd->groups->cpumask)) | ||
6283 | return; | ||
6284 | |||
6285 | child = sd->child; | ||
6286 | |||
6287 | /* | ||
6288 | * For perf policy, if the groups in child domain share resources | ||
6289 | * (for example cores sharing some portions of the cache hierarchy | ||
6290 | * or SMT), then set this domain groups cpu_power such that each group | ||
6291 | * can handle only one task, when there are other idle groups in the | ||
6292 | * same sched domain. | ||
6293 | */ | ||
6294 | if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) && | ||
6295 | (child->flags & | ||
6296 | (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) { | ||
6297 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | ||
6298 | return; | ||
6223 | } | 6299 | } |
6300 | |||
6301 | sd->groups->cpu_power = 0; | ||
6302 | |||
6303 | /* | ||
6304 | * add cpu_power of each child group to this groups cpu_power | ||
6305 | */ | ||
6306 | group = child->groups; | ||
6307 | do { | ||
6308 | sd->groups->cpu_power += group->cpu_power; | ||
6309 | group = group->next; | ||
6310 | } while (group != child->groups); | ||
6224 | } | 6311 | } |
6225 | 6312 | ||
6226 | /* | 6313 | /* |
@@ -6230,10 +6317,7 @@ next_sg: | |||
6230 | static int build_sched_domains(const cpumask_t *cpu_map) | 6317 | static int build_sched_domains(const cpumask_t *cpu_map) |
6231 | { | 6318 | { |
6232 | int i; | 6319 | int i; |
6233 | struct sched_group *sched_group_phys = NULL; | 6320 | struct sched_domain *sd; |
6234 | #ifdef CONFIG_SCHED_MC | ||
6235 | struct sched_group *sched_group_core = NULL; | ||
6236 | #endif | ||
6237 | #ifdef CONFIG_NUMA | 6321 | #ifdef CONFIG_NUMA |
6238 | struct sched_group **sched_group_nodes = NULL; | 6322 | struct sched_group **sched_group_nodes = NULL; |
6239 | struct sched_group *sched_group_allnodes = NULL; | 6323 | struct sched_group *sched_group_allnodes = NULL; |
@@ -6265,9 +6349,10 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6265 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | 6349 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { |
6266 | if (!sched_group_allnodes) { | 6350 | if (!sched_group_allnodes) { |
6267 | sched_group_allnodes | 6351 | sched_group_allnodes |
6268 | = kmalloc(sizeof(struct sched_group) | 6352 | = kmalloc_node(sizeof(struct sched_group) |
6269 | * MAX_NUMNODES, | 6353 | * MAX_NUMNODES, |
6270 | GFP_KERNEL); | 6354 | GFP_KERNEL, |
6355 | cpu_to_node(i)); | ||
6271 | if (!sched_group_allnodes) { | 6356 | if (!sched_group_allnodes) { |
6272 | printk(KERN_WARNING | 6357 | printk(KERN_WARNING |
6273 | "Can not alloc allnodes sched group\n"); | 6358 | "Can not alloc allnodes sched group\n"); |
@@ -6279,7 +6364,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6279 | sd = &per_cpu(allnodes_domains, i); | 6364 | sd = &per_cpu(allnodes_domains, i); |
6280 | *sd = SD_ALLNODES_INIT; | 6365 | *sd = SD_ALLNODES_INIT; |
6281 | sd->span = *cpu_map; | 6366 | sd->span = *cpu_map; |
6282 | group = cpu_to_allnodes_group(i); | 6367 | group = cpu_to_allnodes_group(i, cpu_map); |
6283 | sd->groups = &sched_group_allnodes[group]; | 6368 | sd->groups = &sched_group_allnodes[group]; |
6284 | p = sd; | 6369 | p = sd; |
6285 | } else | 6370 | } else |
@@ -6289,60 +6374,42 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6289 | *sd = SD_NODE_INIT; | 6374 | *sd = SD_NODE_INIT; |
6290 | sd->span = sched_domain_node_span(cpu_to_node(i)); | 6375 | sd->span = sched_domain_node_span(cpu_to_node(i)); |
6291 | sd->parent = p; | 6376 | sd->parent = p; |
6377 | if (p) | ||
6378 | p->child = sd; | ||
6292 | cpus_and(sd->span, sd->span, *cpu_map); | 6379 | cpus_and(sd->span, sd->span, *cpu_map); |
6293 | #endif | 6380 | #endif |
6294 | 6381 | ||
6295 | if (!sched_group_phys) { | ||
6296 | sched_group_phys | ||
6297 | = kmalloc(sizeof(struct sched_group) * NR_CPUS, | ||
6298 | GFP_KERNEL); | ||
6299 | if (!sched_group_phys) { | ||
6300 | printk (KERN_WARNING "Can not alloc phys sched" | ||
6301 | "group\n"); | ||
6302 | goto error; | ||
6303 | } | ||
6304 | sched_group_phys_bycpu[i] = sched_group_phys; | ||
6305 | } | ||
6306 | |||
6307 | p = sd; | 6382 | p = sd; |
6308 | sd = &per_cpu(phys_domains, i); | 6383 | sd = &per_cpu(phys_domains, i); |
6309 | group = cpu_to_phys_group(i); | 6384 | group = cpu_to_phys_group(i, cpu_map); |
6310 | *sd = SD_CPU_INIT; | 6385 | *sd = SD_CPU_INIT; |
6311 | sd->span = nodemask; | 6386 | sd->span = nodemask; |
6312 | sd->parent = p; | 6387 | sd->parent = p; |
6388 | if (p) | ||
6389 | p->child = sd; | ||
6313 | sd->groups = &sched_group_phys[group]; | 6390 | sd->groups = &sched_group_phys[group]; |
6314 | 6391 | ||
6315 | #ifdef CONFIG_SCHED_MC | 6392 | #ifdef CONFIG_SCHED_MC |
6316 | if (!sched_group_core) { | ||
6317 | sched_group_core | ||
6318 | = kmalloc(sizeof(struct sched_group) * NR_CPUS, | ||
6319 | GFP_KERNEL); | ||
6320 | if (!sched_group_core) { | ||
6321 | printk (KERN_WARNING "Can not alloc core sched" | ||
6322 | "group\n"); | ||
6323 | goto error; | ||
6324 | } | ||
6325 | sched_group_core_bycpu[i] = sched_group_core; | ||
6326 | } | ||
6327 | |||
6328 | p = sd; | 6393 | p = sd; |
6329 | sd = &per_cpu(core_domains, i); | 6394 | sd = &per_cpu(core_domains, i); |
6330 | group = cpu_to_core_group(i); | 6395 | group = cpu_to_core_group(i, cpu_map); |
6331 | *sd = SD_MC_INIT; | 6396 | *sd = SD_MC_INIT; |
6332 | sd->span = cpu_coregroup_map(i); | 6397 | sd->span = cpu_coregroup_map(i); |
6333 | cpus_and(sd->span, sd->span, *cpu_map); | 6398 | cpus_and(sd->span, sd->span, *cpu_map); |
6334 | sd->parent = p; | 6399 | sd->parent = p; |
6400 | p->child = sd; | ||
6335 | sd->groups = &sched_group_core[group]; | 6401 | sd->groups = &sched_group_core[group]; |
6336 | #endif | 6402 | #endif |
6337 | 6403 | ||
6338 | #ifdef CONFIG_SCHED_SMT | 6404 | #ifdef CONFIG_SCHED_SMT |
6339 | p = sd; | 6405 | p = sd; |
6340 | sd = &per_cpu(cpu_domains, i); | 6406 | sd = &per_cpu(cpu_domains, i); |
6341 | group = cpu_to_cpu_group(i); | 6407 | group = cpu_to_cpu_group(i, cpu_map); |
6342 | *sd = SD_SIBLING_INIT; | 6408 | *sd = SD_SIBLING_INIT; |
6343 | sd->span = cpu_sibling_map[i]; | 6409 | sd->span = cpu_sibling_map[i]; |
6344 | cpus_and(sd->span, sd->span, *cpu_map); | 6410 | cpus_and(sd->span, sd->span, *cpu_map); |
6345 | sd->parent = p; | 6411 | sd->parent = p; |
6412 | p->child = sd; | ||
6346 | sd->groups = &sched_group_cpus[group]; | 6413 | sd->groups = &sched_group_cpus[group]; |
6347 | #endif | 6414 | #endif |
6348 | } | 6415 | } |
@@ -6356,7 +6423,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6356 | continue; | 6423 | continue; |
6357 | 6424 | ||
6358 | init_sched_build_groups(sched_group_cpus, this_sibling_map, | 6425 | init_sched_build_groups(sched_group_cpus, this_sibling_map, |
6359 | &cpu_to_cpu_group); | 6426 | cpu_map, &cpu_to_cpu_group); |
6360 | } | 6427 | } |
6361 | #endif | 6428 | #endif |
6362 | 6429 | ||
@@ -6368,7 +6435,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6368 | if (i != first_cpu(this_core_map)) | 6435 | if (i != first_cpu(this_core_map)) |
6369 | continue; | 6436 | continue; |
6370 | init_sched_build_groups(sched_group_core, this_core_map, | 6437 | init_sched_build_groups(sched_group_core, this_core_map, |
6371 | &cpu_to_core_group); | 6438 | cpu_map, &cpu_to_core_group); |
6372 | } | 6439 | } |
6373 | #endif | 6440 | #endif |
6374 | 6441 | ||
@@ -6382,14 +6449,14 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6382 | continue; | 6449 | continue; |
6383 | 6450 | ||
6384 | init_sched_build_groups(sched_group_phys, nodemask, | 6451 | init_sched_build_groups(sched_group_phys, nodemask, |
6385 | &cpu_to_phys_group); | 6452 | cpu_map, &cpu_to_phys_group); |
6386 | } | 6453 | } |
6387 | 6454 | ||
6388 | #ifdef CONFIG_NUMA | 6455 | #ifdef CONFIG_NUMA |
6389 | /* Set up node groups */ | 6456 | /* Set up node groups */ |
6390 | if (sched_group_allnodes) | 6457 | if (sched_group_allnodes) |
6391 | init_sched_build_groups(sched_group_allnodes, *cpu_map, | 6458 | init_sched_build_groups(sched_group_allnodes, *cpu_map, |
6392 | &cpu_to_allnodes_group); | 6459 | cpu_map, &cpu_to_allnodes_group); |
6393 | 6460 | ||
6394 | for (i = 0; i < MAX_NUMNODES; i++) { | 6461 | for (i = 0; i < MAX_NUMNODES; i++) { |
6395 | /* Set up node groups */ | 6462 | /* Set up node groups */ |
@@ -6461,72 +6528,20 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6461 | /* Calculate CPU power for physical packages and nodes */ | 6528 | /* Calculate CPU power for physical packages and nodes */ |
6462 | #ifdef CONFIG_SCHED_SMT | 6529 | #ifdef CONFIG_SCHED_SMT |
6463 | for_each_cpu_mask(i, *cpu_map) { | 6530 | for_each_cpu_mask(i, *cpu_map) { |
6464 | struct sched_domain *sd; | ||
6465 | sd = &per_cpu(cpu_domains, i); | 6531 | sd = &per_cpu(cpu_domains, i); |
6466 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | 6532 | init_sched_groups_power(i, sd); |
6467 | } | 6533 | } |
6468 | #endif | 6534 | #endif |
6469 | #ifdef CONFIG_SCHED_MC | 6535 | #ifdef CONFIG_SCHED_MC |
6470 | for_each_cpu_mask(i, *cpu_map) { | 6536 | for_each_cpu_mask(i, *cpu_map) { |
6471 | int power; | ||
6472 | struct sched_domain *sd; | ||
6473 | sd = &per_cpu(core_domains, i); | 6537 | sd = &per_cpu(core_domains, i); |
6474 | if (sched_smt_power_savings) | 6538 | init_sched_groups_power(i, sd); |
6475 | power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); | ||
6476 | else | ||
6477 | power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) | ||
6478 | * SCHED_LOAD_SCALE / 10; | ||
6479 | sd->groups->cpu_power = power; | ||
6480 | } | 6539 | } |
6481 | #endif | 6540 | #endif |
6482 | 6541 | ||
6483 | for_each_cpu_mask(i, *cpu_map) { | 6542 | for_each_cpu_mask(i, *cpu_map) { |
6484 | struct sched_domain *sd; | ||
6485 | #ifdef CONFIG_SCHED_MC | ||
6486 | sd = &per_cpu(phys_domains, i); | 6543 | sd = &per_cpu(phys_domains, i); |
6487 | if (i != first_cpu(sd->groups->cpumask)) | 6544 | init_sched_groups_power(i, sd); |
6488 | continue; | ||
6489 | |||
6490 | sd->groups->cpu_power = 0; | ||
6491 | if (sched_mc_power_savings || sched_smt_power_savings) { | ||
6492 | int j; | ||
6493 | |||
6494 | for_each_cpu_mask(j, sd->groups->cpumask) { | ||
6495 | struct sched_domain *sd1; | ||
6496 | sd1 = &per_cpu(core_domains, j); | ||
6497 | /* | ||
6498 | * for each core we will add once | ||
6499 | * to the group in physical domain | ||
6500 | */ | ||
6501 | if (j != first_cpu(sd1->groups->cpumask)) | ||
6502 | continue; | ||
6503 | |||
6504 | if (sched_smt_power_savings) | ||
6505 | sd->groups->cpu_power += sd1->groups->cpu_power; | ||
6506 | else | ||
6507 | sd->groups->cpu_power += SCHED_LOAD_SCALE; | ||
6508 | } | ||
6509 | } else | ||
6510 | /* | ||
6511 | * This has to be < 2 * SCHED_LOAD_SCALE | ||
6512 | * Lets keep it SCHED_LOAD_SCALE, so that | ||
6513 | * while calculating NUMA group's cpu_power | ||
6514 | * we can simply do | ||
6515 | * numa_group->cpu_power += phys_group->cpu_power; | ||
6516 | * | ||
6517 | * See "only add power once for each physical pkg" | ||
6518 | * comment below | ||
6519 | */ | ||
6520 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | ||
6521 | #else | ||
6522 | int power; | ||
6523 | sd = &per_cpu(phys_domains, i); | ||
6524 | if (sched_smt_power_savings) | ||
6525 | power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); | ||
6526 | else | ||
6527 | power = SCHED_LOAD_SCALE; | ||
6528 | sd->groups->cpu_power = power; | ||
6529 | #endif | ||
6530 | } | 6545 | } |
6531 | 6546 | ||
6532 | #ifdef CONFIG_NUMA | 6547 | #ifdef CONFIG_NUMA |
@@ -6534,7 +6549,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6534 | init_numa_sched_groups_power(sched_group_nodes[i]); | 6549 | init_numa_sched_groups_power(sched_group_nodes[i]); |
6535 | 6550 | ||
6536 | if (sched_group_allnodes) { | 6551 | if (sched_group_allnodes) { |
6537 | int group = cpu_to_allnodes_group(first_cpu(*cpu_map)); | 6552 | int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map); |
6538 | struct sched_group *sg = &sched_group_allnodes[group]; | 6553 | struct sched_group *sg = &sched_group_allnodes[group]; |
6539 | 6554 | ||
6540 | init_numa_sched_groups_power(sg); | 6555 | init_numa_sched_groups_power(sg); |
@@ -6560,9 +6575,11 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6560 | 6575 | ||
6561 | return 0; | 6576 | return 0; |
6562 | 6577 | ||
6578 | #ifdef CONFIG_NUMA | ||
6563 | error: | 6579 | error: |
6564 | free_sched_groups(cpu_map); | 6580 | free_sched_groups(cpu_map); |
6565 | return -ENOMEM; | 6581 | return -ENOMEM; |
6582 | #endif | ||
6566 | } | 6583 | } |
6567 | /* | 6584 | /* |
6568 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 6585 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
@@ -6744,11 +6761,20 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
6744 | 6761 | ||
6745 | void __init sched_init_smp(void) | 6762 | void __init sched_init_smp(void) |
6746 | { | 6763 | { |
6764 | cpumask_t non_isolated_cpus; | ||
6765 | |||
6747 | lock_cpu_hotplug(); | 6766 | lock_cpu_hotplug(); |
6748 | arch_init_sched_domains(&cpu_online_map); | 6767 | arch_init_sched_domains(&cpu_online_map); |
6768 | cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map); | ||
6769 | if (cpus_empty(non_isolated_cpus)) | ||
6770 | cpu_set(smp_processor_id(), non_isolated_cpus); | ||
6749 | unlock_cpu_hotplug(); | 6771 | unlock_cpu_hotplug(); |
6750 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 6772 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
6751 | hotcpu_notifier(update_sched_domains, 0); | 6773 | hotcpu_notifier(update_sched_domains, 0); |
6774 | |||
6775 | /* Move init over to a non-isolated CPU */ | ||
6776 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | ||
6777 | BUG(); | ||
6752 | } | 6778 | } |
6753 | #else | 6779 | #else |
6754 | void __init sched_init_smp(void) | 6780 | void __init sched_init_smp(void) |
diff --git a/kernel/signal.c b/kernel/signal.c index fb5da6d19f..7ed8d5304b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1055,28 +1055,44 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | |||
1055 | } | 1055 | } |
1056 | 1056 | ||
1057 | /* | 1057 | /* |
1058 | * kill_pg_info() sends a signal to a process group: this is what the tty | 1058 | * kill_pgrp_info() sends a signal to a process group: this is what the tty |
1059 | * control characters do (^C, ^Z etc) | 1059 | * control characters do (^C, ^Z etc) |
1060 | */ | 1060 | */ |
1061 | 1061 | ||
1062 | int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) | 1062 | int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) |
1063 | { | 1063 | { |
1064 | struct task_struct *p = NULL; | 1064 | struct task_struct *p = NULL; |
1065 | int retval, success; | 1065 | int retval, success; |
1066 | 1066 | ||
1067 | if (pgrp <= 0) | ||
1068 | return -EINVAL; | ||
1069 | |||
1070 | success = 0; | 1067 | success = 0; |
1071 | retval = -ESRCH; | 1068 | retval = -ESRCH; |
1072 | do_each_task_pid(pgrp, PIDTYPE_PGID, p) { | 1069 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { |
1073 | int err = group_send_sig_info(sig, info, p); | 1070 | int err = group_send_sig_info(sig, info, p); |
1074 | success |= !err; | 1071 | success |= !err; |
1075 | retval = err; | 1072 | retval = err; |
1076 | } while_each_task_pid(pgrp, PIDTYPE_PGID, p); | 1073 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); |
1077 | return success ? 0 : retval; | 1074 | return success ? 0 : retval; |
1078 | } | 1075 | } |
1079 | 1076 | ||
1077 | int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) | ||
1078 | { | ||
1079 | int retval; | ||
1080 | |||
1081 | read_lock(&tasklist_lock); | ||
1082 | retval = __kill_pgrp_info(sig, info, pgrp); | ||
1083 | read_unlock(&tasklist_lock); | ||
1084 | |||
1085 | return retval; | ||
1086 | } | ||
1087 | |||
1088 | int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) | ||
1089 | { | ||
1090 | if (pgrp <= 0) | ||
1091 | return -EINVAL; | ||
1092 | |||
1093 | return __kill_pgrp_info(sig, info, find_pid(pgrp)); | ||
1094 | } | ||
1095 | |||
1080 | int | 1096 | int |
1081 | kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) | 1097 | kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) |
1082 | { | 1098 | { |
@@ -1089,8 +1105,7 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) | |||
1089 | return retval; | 1105 | return retval; |
1090 | } | 1106 | } |
1091 | 1107 | ||
1092 | int | 1108 | int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) |
1093 | kill_proc_info(int sig, struct siginfo *info, pid_t pid) | ||
1094 | { | 1109 | { |
1095 | int error; | 1110 | int error; |
1096 | int acquired_tasklist_lock = 0; | 1111 | int acquired_tasklist_lock = 0; |
@@ -1101,7 +1116,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) | |||
1101 | read_lock(&tasklist_lock); | 1116 | read_lock(&tasklist_lock); |
1102 | acquired_tasklist_lock = 1; | 1117 | acquired_tasklist_lock = 1; |
1103 | } | 1118 | } |
1104 | p = find_task_by_pid(pid); | 1119 | p = pid_task(pid, PIDTYPE_PID); |
1105 | error = -ESRCH; | 1120 | error = -ESRCH; |
1106 | if (p) | 1121 | if (p) |
1107 | error = group_send_sig_info(sig, info, p); | 1122 | error = group_send_sig_info(sig, info, p); |
@@ -1111,8 +1126,18 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) | |||
1111 | return error; | 1126 | return error; |
1112 | } | 1127 | } |
1113 | 1128 | ||
1114 | /* like kill_proc_info(), but doesn't use uid/euid of "current" */ | 1129 | int |
1115 | int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid, | 1130 | kill_proc_info(int sig, struct siginfo *info, pid_t pid) |
1131 | { | ||
1132 | int error; | ||
1133 | rcu_read_lock(); | ||
1134 | error = kill_pid_info(sig, info, find_pid(pid)); | ||
1135 | rcu_read_unlock(); | ||
1136 | return error; | ||
1137 | } | ||
1138 | |||
1139 | /* like kill_pid_info(), but doesn't use uid/euid of "current" */ | ||
1140 | int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, | ||
1116 | uid_t uid, uid_t euid, u32 secid) | 1141 | uid_t uid, uid_t euid, u32 secid) |
1117 | { | 1142 | { |
1118 | int ret = -EINVAL; | 1143 | int ret = -EINVAL; |
@@ -1122,7 +1147,7 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid, | |||
1122 | return ret; | 1147 | return ret; |
1123 | 1148 | ||
1124 | read_lock(&tasklist_lock); | 1149 | read_lock(&tasklist_lock); |
1125 | p = find_task_by_pid(pid); | 1150 | p = pid_task(pid, PIDTYPE_PID); |
1126 | if (!p) { | 1151 | if (!p) { |
1127 | ret = -ESRCH; | 1152 | ret = -ESRCH; |
1128 | goto out_unlock; | 1153 | goto out_unlock; |
@@ -1146,7 +1171,7 @@ out_unlock: | |||
1146 | read_unlock(&tasklist_lock); | 1171 | read_unlock(&tasklist_lock); |
1147 | return ret; | 1172 | return ret; |
1148 | } | 1173 | } |
1149 | EXPORT_SYMBOL_GPL(kill_proc_info_as_uid); | 1174 | EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); |
1150 | 1175 | ||
1151 | /* | 1176 | /* |
1152 | * kill_something_info() interprets pid in interesting ways just like kill(2). | 1177 | * kill_something_info() interprets pid in interesting ways just like kill(2). |
@@ -1264,6 +1289,18 @@ force_sigsegv(int sig, struct task_struct *p) | |||
1264 | return 0; | 1289 | return 0; |
1265 | } | 1290 | } |
1266 | 1291 | ||
1292 | int kill_pgrp(struct pid *pid, int sig, int priv) | ||
1293 | { | ||
1294 | return kill_pgrp_info(sig, __si_special(priv), pid); | ||
1295 | } | ||
1296 | EXPORT_SYMBOL(kill_pgrp); | ||
1297 | |||
1298 | int kill_pid(struct pid *pid, int sig, int priv) | ||
1299 | { | ||
1300 | return kill_pid_info(sig, __si_special(priv), pid); | ||
1301 | } | ||
1302 | EXPORT_SYMBOL(kill_pid); | ||
1303 | |||
1267 | int | 1304 | int |
1268 | kill_pg(pid_t pgrp, int sig, int priv) | 1305 | kill_pg(pid_t pgrp, int sig, int priv) |
1269 | { | 1306 | { |
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index d48143eafb..476c374151 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
@@ -215,7 +215,7 @@ void __lockfunc _##op##_lock(locktype##_t *lock) \ | |||
215 | if (!(lock)->break_lock) \ | 215 | if (!(lock)->break_lock) \ |
216 | (lock)->break_lock = 1; \ | 216 | (lock)->break_lock = 1; \ |
217 | while (!op##_can_lock(lock) && (lock)->break_lock) \ | 217 | while (!op##_can_lock(lock) && (lock)->break_lock) \ |
218 | cpu_relax(); \ | 218 | _raw_##op##_relax(&lock->raw_lock); \ |
219 | } \ | 219 | } \ |
220 | (lock)->break_lock = 0; \ | 220 | (lock)->break_lock = 0; \ |
221 | } \ | 221 | } \ |
@@ -237,7 +237,7 @@ unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \ | |||
237 | if (!(lock)->break_lock) \ | 237 | if (!(lock)->break_lock) \ |
238 | (lock)->break_lock = 1; \ | 238 | (lock)->break_lock = 1; \ |
239 | while (!op##_can_lock(lock) && (lock)->break_lock) \ | 239 | while (!op##_can_lock(lock) && (lock)->break_lock) \ |
240 | cpu_relax(); \ | 240 | _raw_##op##_relax(&lock->raw_lock); \ |
241 | } \ | 241 | } \ |
242 | (lock)->break_lock = 0; \ | 242 | (lock)->break_lock = 0; \ |
243 | return flags; \ | 243 | return flags; \ |
diff --git a/kernel/sys.c b/kernel/sys.c index 8647061c08..2314867ae3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -92,7 +92,8 @@ EXPORT_SYMBOL(fs_overflowgid); | |||
92 | */ | 92 | */ |
93 | 93 | ||
94 | int C_A_D = 1; | 94 | int C_A_D = 1; |
95 | int cad_pid = 1; | 95 | struct pid *cad_pid; |
96 | EXPORT_SYMBOL(cad_pid); | ||
96 | 97 | ||
97 | /* | 98 | /* |
98 | * Notifier list for kernel code which wants to be called | 99 | * Notifier list for kernel code which wants to be called |
@@ -221,7 +222,7 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); | |||
221 | * of the last notifier function called. | 222 | * of the last notifier function called. |
222 | */ | 223 | */ |
223 | 224 | ||
224 | int atomic_notifier_call_chain(struct atomic_notifier_head *nh, | 225 | int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, |
225 | unsigned long val, void *v) | 226 | unsigned long val, void *v) |
226 | { | 227 | { |
227 | int ret; | 228 | int ret; |
@@ -607,11 +608,10 @@ static void kernel_restart_prepare(char *cmd) | |||
607 | void kernel_restart(char *cmd) | 608 | void kernel_restart(char *cmd) |
608 | { | 609 | { |
609 | kernel_restart_prepare(cmd); | 610 | kernel_restart_prepare(cmd); |
610 | if (!cmd) { | 611 | if (!cmd) |
611 | printk(KERN_EMERG "Restarting system.\n"); | 612 | printk(KERN_EMERG "Restarting system.\n"); |
612 | } else { | 613 | else |
613 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); | 614 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); |
614 | } | ||
615 | machine_restart(cmd); | 615 | machine_restart(cmd); |
616 | } | 616 | } |
617 | EXPORT_SYMBOL_GPL(kernel_restart); | 617 | EXPORT_SYMBOL_GPL(kernel_restart); |
@@ -627,9 +627,8 @@ static void kernel_kexec(void) | |||
627 | #ifdef CONFIG_KEXEC | 627 | #ifdef CONFIG_KEXEC |
628 | struct kimage *image; | 628 | struct kimage *image; |
629 | image = xchg(&kexec_image, NULL); | 629 | image = xchg(&kexec_image, NULL); |
630 | if (!image) { | 630 | if (!image) |
631 | return; | 631 | return; |
632 | } | ||
633 | kernel_restart_prepare(NULL); | 632 | kernel_restart_prepare(NULL); |
634 | printk(KERN_EMERG "Starting new kernel\n"); | 633 | printk(KERN_EMERG "Starting new kernel\n"); |
635 | machine_shutdown(); | 634 | machine_shutdown(); |
@@ -775,10 +774,9 @@ void ctrl_alt_del(void) | |||
775 | if (C_A_D) | 774 | if (C_A_D) |
776 | schedule_work(&cad_work); | 775 | schedule_work(&cad_work); |
777 | else | 776 | else |
778 | kill_proc(cad_pid, SIGINT, 1); | 777 | kill_cad_pid(SIGINT, 1); |
779 | } | 778 | } |
780 | 779 | ||
781 | |||
782 | /* | 780 | /* |
783 | * Unprivileged users may change the real gid to the effective gid | 781 | * Unprivileged users may change the real gid to the effective gid |
784 | * or vice versa. (BSD-style) | 782 | * or vice versa. (BSD-style) |
@@ -823,12 +821,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) | |||
823 | (current->sgid == egid) || | 821 | (current->sgid == egid) || |
824 | capable(CAP_SETGID)) | 822 | capable(CAP_SETGID)) |
825 | new_egid = egid; | 823 | new_egid = egid; |
826 | else { | 824 | else |
827 | return -EPERM; | 825 | return -EPERM; |
828 | } | ||
829 | } | 826 | } |
830 | if (new_egid != old_egid) | 827 | if (new_egid != old_egid) { |
831 | { | ||
832 | current->mm->dumpable = suid_dumpable; | 828 | current->mm->dumpable = suid_dumpable; |
833 | smp_wmb(); | 829 | smp_wmb(); |
834 | } | 830 | } |
@@ -857,19 +853,14 @@ asmlinkage long sys_setgid(gid_t gid) | |||
857 | if (retval) | 853 | if (retval) |
858 | return retval; | 854 | return retval; |
859 | 855 | ||
860 | if (capable(CAP_SETGID)) | 856 | if (capable(CAP_SETGID)) { |
861 | { | 857 | if (old_egid != gid) { |
862 | if(old_egid != gid) | ||
863 | { | ||
864 | current->mm->dumpable = suid_dumpable; | 858 | current->mm->dumpable = suid_dumpable; |
865 | smp_wmb(); | 859 | smp_wmb(); |
866 | } | 860 | } |
867 | current->gid = current->egid = current->sgid = current->fsgid = gid; | 861 | current->gid = current->egid = current->sgid = current->fsgid = gid; |
868 | } | 862 | } else if ((gid == current->gid) || (gid == current->sgid)) { |
869 | else if ((gid == current->gid) || (gid == current->sgid)) | 863 | if (old_egid != gid) { |
870 | { | ||
871 | if(old_egid != gid) | ||
872 | { | ||
873 | current->mm->dumpable = suid_dumpable; | 864 | current->mm->dumpable = suid_dumpable; |
874 | smp_wmb(); | 865 | smp_wmb(); |
875 | } | 866 | } |
@@ -900,8 +891,7 @@ static int set_user(uid_t new_ruid, int dumpclear) | |||
900 | 891 | ||
901 | switch_uid(new_user); | 892 | switch_uid(new_user); |
902 | 893 | ||
903 | if(dumpclear) | 894 | if (dumpclear) { |
904 | { | ||
905 | current->mm->dumpable = suid_dumpable; | 895 | current->mm->dumpable = suid_dumpable; |
906 | smp_wmb(); | 896 | smp_wmb(); |
907 | } | 897 | } |
@@ -957,8 +947,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) | |||
957 | if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) | 947 | if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) |
958 | return -EAGAIN; | 948 | return -EAGAIN; |
959 | 949 | ||
960 | if (new_euid != old_euid) | 950 | if (new_euid != old_euid) { |
961 | { | ||
962 | current->mm->dumpable = suid_dumpable; | 951 | current->mm->dumpable = suid_dumpable; |
963 | smp_wmb(); | 952 | smp_wmb(); |
964 | } | 953 | } |
@@ -1008,8 +997,7 @@ asmlinkage long sys_setuid(uid_t uid) | |||
1008 | } else if ((uid != current->uid) && (uid != new_suid)) | 997 | } else if ((uid != current->uid) && (uid != new_suid)) |
1009 | return -EPERM; | 998 | return -EPERM; |
1010 | 999 | ||
1011 | if (old_euid != uid) | 1000 | if (old_euid != uid) { |
1012 | { | ||
1013 | current->mm->dumpable = suid_dumpable; | 1001 | current->mm->dumpable = suid_dumpable; |
1014 | smp_wmb(); | 1002 | smp_wmb(); |
1015 | } | 1003 | } |
@@ -1054,8 +1042,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) | |||
1054 | return -EAGAIN; | 1042 | return -EAGAIN; |
1055 | } | 1043 | } |
1056 | if (euid != (uid_t) -1) { | 1044 | if (euid != (uid_t) -1) { |
1057 | if (euid != current->euid) | 1045 | if (euid != current->euid) { |
1058 | { | ||
1059 | current->mm->dumpable = suid_dumpable; | 1046 | current->mm->dumpable = suid_dumpable; |
1060 | smp_wmb(); | 1047 | smp_wmb(); |
1061 | } | 1048 | } |
@@ -1105,8 +1092,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) | |||
1105 | return -EPERM; | 1092 | return -EPERM; |
1106 | } | 1093 | } |
1107 | if (egid != (gid_t) -1) { | 1094 | if (egid != (gid_t) -1) { |
1108 | if (egid != current->egid) | 1095 | if (egid != current->egid) { |
1109 | { | ||
1110 | current->mm->dumpable = suid_dumpable; | 1096 | current->mm->dumpable = suid_dumpable; |
1111 | smp_wmb(); | 1097 | smp_wmb(); |
1112 | } | 1098 | } |
@@ -1151,10 +1137,8 @@ asmlinkage long sys_setfsuid(uid_t uid) | |||
1151 | 1137 | ||
1152 | if (uid == current->uid || uid == current->euid || | 1138 | if (uid == current->uid || uid == current->euid || |
1153 | uid == current->suid || uid == current->fsuid || | 1139 | uid == current->suid || uid == current->fsuid || |
1154 | capable(CAP_SETUID)) | 1140 | capable(CAP_SETUID)) { |
1155 | { | 1141 | if (uid != old_fsuid) { |
1156 | if (uid != old_fsuid) | ||
1157 | { | ||
1158 | current->mm->dumpable = suid_dumpable; | 1142 | current->mm->dumpable = suid_dumpable; |
1159 | smp_wmb(); | 1143 | smp_wmb(); |
1160 | } | 1144 | } |
@@ -1182,10 +1166,8 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
1182 | 1166 | ||
1183 | if (gid == current->gid || gid == current->egid || | 1167 | if (gid == current->gid || gid == current->egid || |
1184 | gid == current->sgid || gid == current->fsgid || | 1168 | gid == current->sgid || gid == current->fsgid || |
1185 | capable(CAP_SETGID)) | 1169 | capable(CAP_SETGID)) { |
1186 | { | 1170 | if (gid != old_fsgid) { |
1187 | if (gid != old_fsgid) | ||
1188 | { | ||
1189 | current->mm->dumpable = suid_dumpable; | 1171 | current->mm->dumpable = suid_dumpable; |
1190 | smp_wmb(); | 1172 | smp_wmb(); |
1191 | } | 1173 | } |
@@ -1321,9 +1303,9 @@ out: | |||
1321 | 1303 | ||
1322 | asmlinkage long sys_getpgid(pid_t pid) | 1304 | asmlinkage long sys_getpgid(pid_t pid) |
1323 | { | 1305 | { |
1324 | if (!pid) { | 1306 | if (!pid) |
1325 | return process_group(current); | 1307 | return process_group(current); |
1326 | } else { | 1308 | else { |
1327 | int retval; | 1309 | int retval; |
1328 | struct task_struct *p; | 1310 | struct task_struct *p; |
1329 | 1311 | ||
@@ -1353,9 +1335,9 @@ asmlinkage long sys_getpgrp(void) | |||
1353 | 1335 | ||
1354 | asmlinkage long sys_getsid(pid_t pid) | 1336 | asmlinkage long sys_getsid(pid_t pid) |
1355 | { | 1337 | { |
1356 | if (!pid) { | 1338 | if (!pid) |
1357 | return current->signal->session; | 1339 | return current->signal->session; |
1358 | } else { | 1340 | else { |
1359 | int retval; | 1341 | int retval; |
1360 | struct task_struct *p; | 1342 | struct task_struct *p; |
1361 | 1343 | ||
@@ -1363,7 +1345,7 @@ asmlinkage long sys_getsid(pid_t pid) | |||
1363 | p = find_task_by_pid(pid); | 1345 | p = find_task_by_pid(pid); |
1364 | 1346 | ||
1365 | retval = -ESRCH; | 1347 | retval = -ESRCH; |
1366 | if(p) { | 1348 | if (p) { |
1367 | retval = security_task_getsid(p); | 1349 | retval = security_task_getsid(p); |
1368 | if (!retval) | 1350 | if (!retval) |
1369 | retval = p->signal->session; | 1351 | retval = p->signal->session; |
@@ -1431,9 +1413,9 @@ struct group_info *groups_alloc(int gidsetsize) | |||
1431 | group_info->nblocks = nblocks; | 1413 | group_info->nblocks = nblocks; |
1432 | atomic_set(&group_info->usage, 1); | 1414 | atomic_set(&group_info->usage, 1); |
1433 | 1415 | ||
1434 | if (gidsetsize <= NGROUPS_SMALL) { | 1416 | if (gidsetsize <= NGROUPS_SMALL) |
1435 | group_info->blocks[0] = group_info->small_block; | 1417 | group_info->blocks[0] = group_info->small_block; |
1436 | } else { | 1418 | else { |
1437 | for (i = 0; i < nblocks; i++) { | 1419 | for (i = 0; i < nblocks; i++) { |
1438 | gid_t *b; | 1420 | gid_t *b; |
1439 | b = (void *)__get_free_page(GFP_USER); | 1421 | b = (void *)__get_free_page(GFP_USER); |
@@ -1489,7 +1471,7 @@ static int groups_to_user(gid_t __user *grouplist, | |||
1489 | /* fill a group_info from a user-space array - it must be allocated already */ | 1471 | /* fill a group_info from a user-space array - it must be allocated already */ |
1490 | static int groups_from_user(struct group_info *group_info, | 1472 | static int groups_from_user(struct group_info *group_info, |
1491 | gid_t __user *grouplist) | 1473 | gid_t __user *grouplist) |
1492 | { | 1474 | { |
1493 | int i; | 1475 | int i; |
1494 | int count = group_info->ngroups; | 1476 | int count = group_info->ngroups; |
1495 | 1477 | ||
@@ -1647,9 +1629,8 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) | |||
1647 | int in_group_p(gid_t grp) | 1629 | int in_group_p(gid_t grp) |
1648 | { | 1630 | { |
1649 | int retval = 1; | 1631 | int retval = 1; |
1650 | if (grp != current->fsgid) { | 1632 | if (grp != current->fsgid) |
1651 | retval = groups_search(current->group_info, grp); | 1633 | retval = groups_search(current->group_info, grp); |
1652 | } | ||
1653 | return retval; | 1634 | return retval; |
1654 | } | 1635 | } |
1655 | 1636 | ||
@@ -1658,9 +1639,8 @@ EXPORT_SYMBOL(in_group_p); | |||
1658 | int in_egroup_p(gid_t grp) | 1639 | int in_egroup_p(gid_t grp) |
1659 | { | 1640 | { |
1660 | int retval = 1; | 1641 | int retval = 1; |
1661 | if (grp != current->egid) { | 1642 | if (grp != current->egid) |
1662 | retval = groups_search(current->group_info, grp); | 1643 | retval = groups_search(current->group_info, grp); |
1663 | } | ||
1664 | return retval; | 1644 | return retval; |
1665 | } | 1645 | } |
1666 | 1646 | ||
@@ -1675,7 +1655,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name) | |||
1675 | int errno = 0; | 1655 | int errno = 0; |
1676 | 1656 | ||
1677 | down_read(&uts_sem); | 1657 | down_read(&uts_sem); |
1678 | if (copy_to_user(name,&system_utsname,sizeof *name)) | 1658 | if (copy_to_user(name, utsname(), sizeof *name)) |
1679 | errno = -EFAULT; | 1659 | errno = -EFAULT; |
1680 | up_read(&uts_sem); | 1660 | up_read(&uts_sem); |
1681 | return errno; | 1661 | return errno; |
@@ -1693,8 +1673,8 @@ asmlinkage long sys_sethostname(char __user *name, int len) | |||
1693 | down_write(&uts_sem); | 1673 | down_write(&uts_sem); |
1694 | errno = -EFAULT; | 1674 | errno = -EFAULT; |
1695 | if (!copy_from_user(tmp, name, len)) { | 1675 | if (!copy_from_user(tmp, name, len)) { |
1696 | memcpy(system_utsname.nodename, tmp, len); | 1676 | memcpy(utsname()->nodename, tmp, len); |
1697 | system_utsname.nodename[len] = 0; | 1677 | utsname()->nodename[len] = 0; |
1698 | errno = 0; | 1678 | errno = 0; |
1699 | } | 1679 | } |
1700 | up_write(&uts_sem); | 1680 | up_write(&uts_sem); |
@@ -1710,11 +1690,11 @@ asmlinkage long sys_gethostname(char __user *name, int len) | |||
1710 | if (len < 0) | 1690 | if (len < 0) |
1711 | return -EINVAL; | 1691 | return -EINVAL; |
1712 | down_read(&uts_sem); | 1692 | down_read(&uts_sem); |
1713 | i = 1 + strlen(system_utsname.nodename); | 1693 | i = 1 + strlen(utsname()->nodename); |
1714 | if (i > len) | 1694 | if (i > len) |
1715 | i = len; | 1695 | i = len; |
1716 | errno = 0; | 1696 | errno = 0; |
1717 | if (copy_to_user(name, system_utsname.nodename, i)) | 1697 | if (copy_to_user(name, utsname()->nodename, i)) |
1718 | errno = -EFAULT; | 1698 | errno = -EFAULT; |
1719 | up_read(&uts_sem); | 1699 | up_read(&uts_sem); |
1720 | return errno; | 1700 | return errno; |
@@ -1739,8 +1719,8 @@ asmlinkage long sys_setdomainname(char __user *name, int len) | |||
1739 | down_write(&uts_sem); | 1719 | down_write(&uts_sem); |
1740 | errno = -EFAULT; | 1720 | errno = -EFAULT; |
1741 | if (!copy_from_user(tmp, name, len)) { | 1721 | if (!copy_from_user(tmp, name, len)) { |
1742 | memcpy(system_utsname.domainname, tmp, len); | 1722 | memcpy(utsname()->domainname, tmp, len); |
1743 | system_utsname.domainname[len] = 0; | 1723 | utsname()->domainname[len] = 0; |
1744 | errno = 0; | 1724 | errno = 0; |
1745 | } | 1725 | } |
1746 | up_write(&uts_sem); | 1726 | up_write(&uts_sem); |
@@ -1775,9 +1755,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r | |||
1775 | task_lock(current->group_leader); | 1755 | task_lock(current->group_leader); |
1776 | x = current->signal->rlim[resource]; | 1756 | x = current->signal->rlim[resource]; |
1777 | task_unlock(current->group_leader); | 1757 | task_unlock(current->group_leader); |
1778 | if(x.rlim_cur > 0x7FFFFFFF) | 1758 | if (x.rlim_cur > 0x7FFFFFFF) |
1779 | x.rlim_cur = 0x7FFFFFFF; | 1759 | x.rlim_cur = 0x7FFFFFFF; |
1780 | if(x.rlim_max > 0x7FFFFFFF) | 1760 | if (x.rlim_max > 0x7FFFFFFF) |
1781 | x.rlim_max = 0x7FFFFFFF; | 1761 | x.rlim_max = 0x7FFFFFFF; |
1782 | return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; | 1762 | return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; |
1783 | } | 1763 | } |
@@ -2083,12 +2063,12 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, | |||
2083 | * padding | 2063 | * padding |
2084 | */ | 2064 | */ |
2085 | unsigned long t0, t1; | 2065 | unsigned long t0, t1; |
2086 | get_user(t0, &cache->t0); | 2066 | get_user(t0, &cache->blob[0]); |
2087 | get_user(t1, &cache->t1); | 2067 | get_user(t1, &cache->blob[1]); |
2088 | t0++; | 2068 | t0++; |
2089 | t1++; | 2069 | t1++; |
2090 | put_user(t0, &cache->t0); | 2070 | put_user(t0, &cache->blob[0]); |
2091 | put_user(t1, &cache->t1); | 2071 | put_user(t1, &cache->blob[1]); |
2092 | } | 2072 | } |
2093 | return err ? -EFAULT : 0; | 2073 | return err ? -EFAULT : 0; |
2094 | } | 2074 | } |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 6991bece67..7a3b2e75f0 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -134,3 +134,8 @@ cond_syscall(sys_madvise); | |||
134 | cond_syscall(sys_mremap); | 134 | cond_syscall(sys_mremap); |
135 | cond_syscall(sys_remap_file_pages); | 135 | cond_syscall(sys_remap_file_pages); |
136 | cond_syscall(compat_sys_move_pages); | 136 | cond_syscall(compat_sys_move_pages); |
137 | |||
138 | /* block-layer dependent */ | ||
139 | cond_syscall(sys_bdflush); | ||
140 | cond_syscall(sys_ioprio_set); | ||
141 | cond_syscall(sys_ioprio_get); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9535a38399..8020fb273c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -52,6 +52,10 @@ | |||
52 | extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | 52 | extern int proc_nr_files(ctl_table *table, int write, struct file *filp, |
53 | void __user *buffer, size_t *lenp, loff_t *ppos); | 53 | void __user *buffer, size_t *lenp, loff_t *ppos); |
54 | 54 | ||
55 | #ifdef CONFIG_X86 | ||
56 | #include <asm/nmi.h> | ||
57 | #endif | ||
58 | |||
55 | #if defined(CONFIG_SYSCTL) | 59 | #if defined(CONFIG_SYSCTL) |
56 | 60 | ||
57 | /* External variables not in a header file. */ | 61 | /* External variables not in a header file. */ |
@@ -64,7 +68,6 @@ extern int sysrq_enabled; | |||
64 | extern int core_uses_pid; | 68 | extern int core_uses_pid; |
65 | extern int suid_dumpable; | 69 | extern int suid_dumpable; |
66 | extern char core_pattern[]; | 70 | extern char core_pattern[]; |
67 | extern int cad_pid; | ||
68 | extern int pid_max; | 71 | extern int pid_max; |
69 | extern int min_free_kbytes; | 72 | extern int min_free_kbytes; |
70 | extern int printk_ratelimit_jiffies; | 73 | extern int printk_ratelimit_jiffies; |
@@ -74,13 +77,6 @@ extern int sysctl_drop_caches; | |||
74 | extern int percpu_pagelist_fraction; | 77 | extern int percpu_pagelist_fraction; |
75 | extern int compat_log; | 78 | extern int compat_log; |
76 | 79 | ||
77 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | ||
78 | int unknown_nmi_panic; | ||
79 | int nmi_watchdog_enabled; | ||
80 | extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | ||
81 | void __user *, size_t *, loff_t *); | ||
82 | #endif | ||
83 | |||
84 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 80 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
85 | static int maxolduid = 65535; | 81 | static int maxolduid = 65535; |
86 | static int minolduid; | 82 | static int minolduid; |
@@ -95,13 +91,8 @@ extern char modprobe_path[]; | |||
95 | extern int sg_big_buff; | 91 | extern int sg_big_buff; |
96 | #endif | 92 | #endif |
97 | #ifdef CONFIG_SYSVIPC | 93 | #ifdef CONFIG_SYSVIPC |
98 | extern size_t shm_ctlmax; | 94 | static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, |
99 | extern size_t shm_ctlall; | 95 | void __user *buffer, size_t *lenp, loff_t *ppos); |
100 | extern int shm_ctlmni; | ||
101 | extern int msg_ctlmax; | ||
102 | extern int msg_ctlmnb; | ||
103 | extern int msg_ctlmni; | ||
104 | extern int sem_ctls[]; | ||
105 | #endif | 96 | #endif |
106 | 97 | ||
107 | #ifdef __sparc__ | 98 | #ifdef __sparc__ |
@@ -142,7 +133,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *, | |||
142 | void __user *, size_t, ctl_table *, void **); | 133 | void __user *, size_t, ctl_table *, void **); |
143 | #endif | 134 | #endif |
144 | 135 | ||
145 | static int proc_doutsstring(ctl_table *table, int write, struct file *filp, | 136 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, |
137 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
138 | |||
139 | static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, | ||
146 | void __user *buffer, size_t *lenp, loff_t *ppos); | 140 | void __user *buffer, size_t *lenp, loff_t *ppos); |
147 | 141 | ||
148 | static ctl_table root_table[]; | 142 | static ctl_table root_table[]; |
@@ -232,51 +226,100 @@ static ctl_table root_table[] = { | |||
232 | }; | 226 | }; |
233 | 227 | ||
234 | static ctl_table kern_table[] = { | 228 | static ctl_table kern_table[] = { |
229 | #ifndef CONFIG_UTS_NS | ||
230 | { | ||
231 | .ctl_name = KERN_OSTYPE, | ||
232 | .procname = "ostype", | ||
233 | .data = init_uts_ns.name.sysname, | ||
234 | .maxlen = sizeof(init_uts_ns.name.sysname), | ||
235 | .mode = 0444, | ||
236 | .proc_handler = &proc_do_uts_string, | ||
237 | .strategy = &sysctl_string, | ||
238 | }, | ||
239 | { | ||
240 | .ctl_name = KERN_OSRELEASE, | ||
241 | .procname = "osrelease", | ||
242 | .data = init_uts_ns.name.release, | ||
243 | .maxlen = sizeof(init_uts_ns.name.release), | ||
244 | .mode = 0444, | ||
245 | .proc_handler = &proc_do_uts_string, | ||
246 | .strategy = &sysctl_string, | ||
247 | }, | ||
248 | { | ||
249 | .ctl_name = KERN_VERSION, | ||
250 | .procname = "version", | ||
251 | .data = init_uts_ns.name.version, | ||
252 | .maxlen = sizeof(init_uts_ns.name.version), | ||
253 | .mode = 0444, | ||
254 | .proc_handler = &proc_do_uts_string, | ||
255 | .strategy = &sysctl_string, | ||
256 | }, | ||
257 | { | ||
258 | .ctl_name = KERN_NODENAME, | ||
259 | .procname = "hostname", | ||
260 | .data = init_uts_ns.name.nodename, | ||
261 | .maxlen = sizeof(init_uts_ns.name.nodename), | ||
262 | .mode = 0644, | ||
263 | .proc_handler = &proc_do_uts_string, | ||
264 | .strategy = &sysctl_string, | ||
265 | }, | ||
266 | { | ||
267 | .ctl_name = KERN_DOMAINNAME, | ||
268 | .procname = "domainname", | ||
269 | .data = init_uts_ns.name.domainname, | ||
270 | .maxlen = sizeof(init_uts_ns.name.domainname), | ||
271 | .mode = 0644, | ||
272 | .proc_handler = &proc_do_uts_string, | ||
273 | .strategy = &sysctl_string, | ||
274 | }, | ||
275 | #else /* !CONFIG_UTS_NS */ | ||
235 | { | 276 | { |
236 | .ctl_name = KERN_OSTYPE, | 277 | .ctl_name = KERN_OSTYPE, |
237 | .procname = "ostype", | 278 | .procname = "ostype", |
238 | .data = system_utsname.sysname, | 279 | .data = NULL, |
239 | .maxlen = sizeof(system_utsname.sysname), | 280 | /* could maybe use __NEW_UTS_LEN here? */ |
281 | .maxlen = FIELD_SIZEOF(struct new_utsname, sysname), | ||
240 | .mode = 0444, | 282 | .mode = 0444, |
241 | .proc_handler = &proc_doutsstring, | 283 | .proc_handler = &proc_do_uts_string, |
242 | .strategy = &sysctl_string, | 284 | .strategy = &sysctl_string, |
243 | }, | 285 | }, |
244 | { | 286 | { |
245 | .ctl_name = KERN_OSRELEASE, | 287 | .ctl_name = KERN_OSRELEASE, |
246 | .procname = "osrelease", | 288 | .procname = "osrelease", |
247 | .data = system_utsname.release, | 289 | .data = NULL, |
248 | .maxlen = sizeof(system_utsname.release), | 290 | .maxlen = FIELD_SIZEOF(struct new_utsname, release), |
249 | .mode = 0444, | 291 | .mode = 0444, |
250 | .proc_handler = &proc_doutsstring, | 292 | .proc_handler = &proc_do_uts_string, |
251 | .strategy = &sysctl_string, | 293 | .strategy = &sysctl_string, |
252 | }, | 294 | }, |
253 | { | 295 | { |
254 | .ctl_name = KERN_VERSION, | 296 | .ctl_name = KERN_VERSION, |
255 | .procname = "version", | 297 | .procname = "version", |
256 | .data = system_utsname.version, | 298 | .data = NULL, |
257 | .maxlen = sizeof(system_utsname.version), | 299 | .maxlen = FIELD_SIZEOF(struct new_utsname, version), |
258 | .mode = 0444, | 300 | .mode = 0444, |
259 | .proc_handler = &proc_doutsstring, | 301 | .proc_handler = &proc_do_uts_string, |
260 | .strategy = &sysctl_string, | 302 | .strategy = &sysctl_string, |
261 | }, | 303 | }, |
262 | { | 304 | { |
263 | .ctl_name = KERN_NODENAME, | 305 | .ctl_name = KERN_NODENAME, |
264 | .procname = "hostname", | 306 | .procname = "hostname", |
265 | .data = system_utsname.nodename, | 307 | .data = NULL, |
266 | .maxlen = sizeof(system_utsname.nodename), | 308 | .maxlen = FIELD_SIZEOF(struct new_utsname, nodename), |
267 | .mode = 0644, | 309 | .mode = 0644, |
268 | .proc_handler = &proc_doutsstring, | 310 | .proc_handler = &proc_do_uts_string, |
269 | .strategy = &sysctl_string, | 311 | .strategy = &sysctl_string, |
270 | }, | 312 | }, |
271 | { | 313 | { |
272 | .ctl_name = KERN_DOMAINNAME, | 314 | .ctl_name = KERN_DOMAINNAME, |
273 | .procname = "domainname", | 315 | .procname = "domainname", |
274 | .data = system_utsname.domainname, | 316 | .data = NULL, |
275 | .maxlen = sizeof(system_utsname.domainname), | 317 | .maxlen = FIELD_SIZEOF(struct new_utsname, domainname), |
276 | .mode = 0644, | 318 | .mode = 0644, |
277 | .proc_handler = &proc_doutsstring, | 319 | .proc_handler = &proc_do_uts_string, |
278 | .strategy = &sysctl_string, | 320 | .strategy = &sysctl_string, |
279 | }, | 321 | }, |
322 | #endif /* !CONFIG_UTS_NS */ | ||
280 | { | 323 | { |
281 | .ctl_name = KERN_PANIC, | 324 | .ctl_name = KERN_PANIC, |
282 | .procname = "panic", | 325 | .procname = "panic", |
@@ -297,7 +340,7 @@ static ctl_table kern_table[] = { | |||
297 | .ctl_name = KERN_CORE_PATTERN, | 340 | .ctl_name = KERN_CORE_PATTERN, |
298 | .procname = "core_pattern", | 341 | .procname = "core_pattern", |
299 | .data = core_pattern, | 342 | .data = core_pattern, |
300 | .maxlen = 64, | 343 | .maxlen = 128, |
301 | .mode = 0644, | 344 | .mode = 0644, |
302 | .proc_handler = &proc_dostring, | 345 | .proc_handler = &proc_dostring, |
303 | .strategy = &sysctl_string, | 346 | .strategy = &sysctl_string, |
@@ -435,58 +478,58 @@ static ctl_table kern_table[] = { | |||
435 | { | 478 | { |
436 | .ctl_name = KERN_SHMMAX, | 479 | .ctl_name = KERN_SHMMAX, |
437 | .procname = "shmmax", | 480 | .procname = "shmmax", |
438 | .data = &shm_ctlmax, | 481 | .data = NULL, |
439 | .maxlen = sizeof (size_t), | 482 | .maxlen = sizeof (size_t), |
440 | .mode = 0644, | 483 | .mode = 0644, |
441 | .proc_handler = &proc_doulongvec_minmax, | 484 | .proc_handler = &proc_do_ipc_string, |
442 | }, | 485 | }, |
443 | { | 486 | { |
444 | .ctl_name = KERN_SHMALL, | 487 | .ctl_name = KERN_SHMALL, |
445 | .procname = "shmall", | 488 | .procname = "shmall", |
446 | .data = &shm_ctlall, | 489 | .data = NULL, |
447 | .maxlen = sizeof (size_t), | 490 | .maxlen = sizeof (size_t), |
448 | .mode = 0644, | 491 | .mode = 0644, |
449 | .proc_handler = &proc_doulongvec_minmax, | 492 | .proc_handler = &proc_do_ipc_string, |
450 | }, | 493 | }, |
451 | { | 494 | { |
452 | .ctl_name = KERN_SHMMNI, | 495 | .ctl_name = KERN_SHMMNI, |
453 | .procname = "shmmni", | 496 | .procname = "shmmni", |
454 | .data = &shm_ctlmni, | 497 | .data = NULL, |
455 | .maxlen = sizeof (int), | 498 | .maxlen = sizeof (int), |
456 | .mode = 0644, | 499 | .mode = 0644, |
457 | .proc_handler = &proc_dointvec, | 500 | .proc_handler = &proc_do_ipc_string, |
458 | }, | 501 | }, |
459 | { | 502 | { |
460 | .ctl_name = KERN_MSGMAX, | 503 | .ctl_name = KERN_MSGMAX, |
461 | .procname = "msgmax", | 504 | .procname = "msgmax", |
462 | .data = &msg_ctlmax, | 505 | .data = NULL, |
463 | .maxlen = sizeof (int), | 506 | .maxlen = sizeof (int), |
464 | .mode = 0644, | 507 | .mode = 0644, |
465 | .proc_handler = &proc_dointvec, | 508 | .proc_handler = &proc_do_ipc_string, |
466 | }, | 509 | }, |
467 | { | 510 | { |
468 | .ctl_name = KERN_MSGMNI, | 511 | .ctl_name = KERN_MSGMNI, |
469 | .procname = "msgmni", | 512 | .procname = "msgmni", |
470 | .data = &msg_ctlmni, | 513 | .data = NULL, |
471 | .maxlen = sizeof (int), | 514 | .maxlen = sizeof (int), |
472 | .mode = 0644, | 515 | .mode = 0644, |
473 | .proc_handler = &proc_dointvec, | 516 | .proc_handler = &proc_do_ipc_string, |
474 | }, | 517 | }, |
475 | { | 518 | { |
476 | .ctl_name = KERN_MSGMNB, | 519 | .ctl_name = KERN_MSGMNB, |
477 | .procname = "msgmnb", | 520 | .procname = "msgmnb", |
478 | .data = &msg_ctlmnb, | 521 | .data = NULL, |
479 | .maxlen = sizeof (int), | 522 | .maxlen = sizeof (int), |
480 | .mode = 0644, | 523 | .mode = 0644, |
481 | .proc_handler = &proc_dointvec, | 524 | .proc_handler = &proc_do_ipc_string, |
482 | }, | 525 | }, |
483 | { | 526 | { |
484 | .ctl_name = KERN_SEM, | 527 | .ctl_name = KERN_SEM, |
485 | .procname = "sem", | 528 | .procname = "sem", |
486 | .data = &sem_ctls, | 529 | .data = NULL, |
487 | .maxlen = 4*sizeof (int), | 530 | .maxlen = 4*sizeof (int), |
488 | .mode = 0644, | 531 | .mode = 0644, |
489 | .proc_handler = &proc_dointvec, | 532 | .proc_handler = &proc_do_ipc_string, |
490 | }, | 533 | }, |
491 | #endif | 534 | #endif |
492 | #ifdef CONFIG_MAGIC_SYSRQ | 535 | #ifdef CONFIG_MAGIC_SYSRQ |
@@ -502,10 +545,10 @@ static ctl_table kern_table[] = { | |||
502 | { | 545 | { |
503 | .ctl_name = KERN_CADPID, | 546 | .ctl_name = KERN_CADPID, |
504 | .procname = "cad_pid", | 547 | .procname = "cad_pid", |
505 | .data = &cad_pid, | 548 | .data = NULL, |
506 | .maxlen = sizeof (int), | 549 | .maxlen = sizeof (int), |
507 | .mode = 0600, | 550 | .mode = 0600, |
508 | .proc_handler = &proc_dointvec, | 551 | .proc_handler = &proc_do_cad_pid, |
509 | }, | 552 | }, |
510 | { | 553 | { |
511 | .ctl_name = KERN_MAX_THREADS, | 554 | .ctl_name = KERN_MAX_THREADS, |
@@ -1627,32 +1670,15 @@ static ssize_t proc_writesys(struct file * file, const char __user * buf, | |||
1627 | return do_rw_proc(1, file, (char __user *) buf, count, ppos); | 1670 | return do_rw_proc(1, file, (char __user *) buf, count, ppos); |
1628 | } | 1671 | } |
1629 | 1672 | ||
1630 | /** | 1673 | static int _proc_do_string(void* data, int maxlen, int write, |
1631 | * proc_dostring - read a string sysctl | 1674 | struct file *filp, void __user *buffer, |
1632 | * @table: the sysctl table | 1675 | size_t *lenp, loff_t *ppos) |
1633 | * @write: %TRUE if this is a write to the sysctl file | ||
1634 | * @filp: the file structure | ||
1635 | * @buffer: the user buffer | ||
1636 | * @lenp: the size of the user buffer | ||
1637 | * @ppos: file position | ||
1638 | * | ||
1639 | * Reads/writes a string from/to the user buffer. If the kernel | ||
1640 | * buffer provided is not large enough to hold the string, the | ||
1641 | * string is truncated. The copied string is %NULL-terminated. | ||
1642 | * If the string is being read by the user process, it is copied | ||
1643 | * and a newline '\n' is added. It is truncated if the buffer is | ||
1644 | * not large enough. | ||
1645 | * | ||
1646 | * Returns 0 on success. | ||
1647 | */ | ||
1648 | int proc_dostring(ctl_table *table, int write, struct file *filp, | ||
1649 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1650 | { | 1676 | { |
1651 | size_t len; | 1677 | size_t len; |
1652 | char __user *p; | 1678 | char __user *p; |
1653 | char c; | 1679 | char c; |
1654 | 1680 | ||
1655 | if (!table->data || !table->maxlen || !*lenp || | 1681 | if (!data || !maxlen || !*lenp || |
1656 | (*ppos && !write)) { | 1682 | (*ppos && !write)) { |
1657 | *lenp = 0; | 1683 | *lenp = 0; |
1658 | return 0; | 1684 | return 0; |
@@ -1668,20 +1694,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, | |||
1668 | break; | 1694 | break; |
1669 | len++; | 1695 | len++; |
1670 | } | 1696 | } |
1671 | if (len >= table->maxlen) | 1697 | if (len >= maxlen) |
1672 | len = table->maxlen-1; | 1698 | len = maxlen-1; |
1673 | if(copy_from_user(table->data, buffer, len)) | 1699 | if(copy_from_user(data, buffer, len)) |
1674 | return -EFAULT; | 1700 | return -EFAULT; |
1675 | ((char *) table->data)[len] = 0; | 1701 | ((char *) data)[len] = 0; |
1676 | *ppos += *lenp; | 1702 | *ppos += *lenp; |
1677 | } else { | 1703 | } else { |
1678 | len = strlen(table->data); | 1704 | len = strlen(data); |
1679 | if (len > table->maxlen) | 1705 | if (len > maxlen) |
1680 | len = table->maxlen; | 1706 | len = maxlen; |
1681 | if (len > *lenp) | 1707 | if (len > *lenp) |
1682 | len = *lenp; | 1708 | len = *lenp; |
1683 | if (len) | 1709 | if (len) |
1684 | if(copy_to_user(buffer, table->data, len)) | 1710 | if(copy_to_user(buffer, data, len)) |
1685 | return -EFAULT; | 1711 | return -EFAULT; |
1686 | if (len < *lenp) { | 1712 | if (len < *lenp) { |
1687 | if(put_user('\n', ((char __user *) buffer) + len)) | 1713 | if(put_user('\n', ((char __user *) buffer) + len)) |
@@ -1694,12 +1720,38 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, | |||
1694 | return 0; | 1720 | return 0; |
1695 | } | 1721 | } |
1696 | 1722 | ||
1723 | /** | ||
1724 | * proc_dostring - read a string sysctl | ||
1725 | * @table: the sysctl table | ||
1726 | * @write: %TRUE if this is a write to the sysctl file | ||
1727 | * @filp: the file structure | ||
1728 | * @buffer: the user buffer | ||
1729 | * @lenp: the size of the user buffer | ||
1730 | * @ppos: file position | ||
1731 | * | ||
1732 | * Reads/writes a string from/to the user buffer. If the kernel | ||
1733 | * buffer provided is not large enough to hold the string, the | ||
1734 | * string is truncated. The copied string is %NULL-terminated. | ||
1735 | * If the string is being read by the user process, it is copied | ||
1736 | * and a newline '\n' is added. It is truncated if the buffer is | ||
1737 | * not large enough. | ||
1738 | * | ||
1739 | * Returns 0 on success. | ||
1740 | */ | ||
1741 | int proc_dostring(ctl_table *table, int write, struct file *filp, | ||
1742 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1743 | { | ||
1744 | return _proc_do_string(table->data, table->maxlen, write, filp, | ||
1745 | buffer, lenp, ppos); | ||
1746 | } | ||
1747 | |||
1697 | /* | 1748 | /* |
1698 | * Special case of dostring for the UTS structure. This has locks | 1749 | * Special case of dostring for the UTS structure. This has locks |
1699 | * to observe. Should this be in kernel/sys.c ???? | 1750 | * to observe. Should this be in kernel/sys.c ???? |
1700 | */ | 1751 | */ |
1701 | 1752 | ||
1702 | static int proc_doutsstring(ctl_table *table, int write, struct file *filp, | 1753 | #ifndef CONFIG_UTS_NS |
1754 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, | ||
1703 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1755 | void __user *buffer, size_t *lenp, loff_t *ppos) |
1704 | { | 1756 | { |
1705 | int r; | 1757 | int r; |
@@ -1715,6 +1767,48 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp, | |||
1715 | } | 1767 | } |
1716 | return r; | 1768 | return r; |
1717 | } | 1769 | } |
1770 | #else /* !CONFIG_UTS_NS */ | ||
1771 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, | ||
1772 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1773 | { | ||
1774 | int r; | ||
1775 | struct uts_namespace* uts_ns = current->nsproxy->uts_ns; | ||
1776 | char* which; | ||
1777 | |||
1778 | switch (table->ctl_name) { | ||
1779 | case KERN_OSTYPE: | ||
1780 | which = uts_ns->name.sysname; | ||
1781 | break; | ||
1782 | case KERN_NODENAME: | ||
1783 | which = uts_ns->name.nodename; | ||
1784 | break; | ||
1785 | case KERN_OSRELEASE: | ||
1786 | which = uts_ns->name.release; | ||
1787 | break; | ||
1788 | case KERN_VERSION: | ||
1789 | which = uts_ns->name.version; | ||
1790 | break; | ||
1791 | case KERN_DOMAINNAME: | ||
1792 | which = uts_ns->name.domainname; | ||
1793 | break; | ||
1794 | default: | ||
1795 | r = -EINVAL; | ||
1796 | goto out; | ||
1797 | } | ||
1798 | |||
1799 | if (!write) { | ||
1800 | down_read(&uts_sem); | ||
1801 | r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos); | ||
1802 | up_read(&uts_sem); | ||
1803 | } else { | ||
1804 | down_write(&uts_sem); | ||
1805 | r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos); | ||
1806 | up_write(&uts_sem); | ||
1807 | } | ||
1808 | out: | ||
1809 | return r; | ||
1810 | } | ||
1811 | #endif /* !CONFIG_UTS_NS */ | ||
1718 | 1812 | ||
1719 | static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, | 1813 | static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, |
1720 | int *valp, | 1814 | int *valp, |
@@ -1735,8 +1829,9 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, | |||
1735 | return 0; | 1829 | return 0; |
1736 | } | 1830 | } |
1737 | 1831 | ||
1738 | static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, | 1832 | static int __do_proc_dointvec(void *tbl_data, ctl_table *table, |
1739 | void __user *buffer, size_t *lenp, loff_t *ppos, | 1833 | int write, struct file *filp, void __user *buffer, |
1834 | size_t *lenp, loff_t *ppos, | ||
1740 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, | 1835 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, |
1741 | int write, void *data), | 1836 | int write, void *data), |
1742 | void *data) | 1837 | void *data) |
@@ -1749,13 +1844,13 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, | |||
1749 | char buf[TMPBUFLEN], *p; | 1844 | char buf[TMPBUFLEN], *p; |
1750 | char __user *s = buffer; | 1845 | char __user *s = buffer; |
1751 | 1846 | ||
1752 | if (!table->data || !table->maxlen || !*lenp || | 1847 | if (!tbl_data || !table->maxlen || !*lenp || |
1753 | (*ppos && !write)) { | 1848 | (*ppos && !write)) { |
1754 | *lenp = 0; | 1849 | *lenp = 0; |
1755 | return 0; | 1850 | return 0; |
1756 | } | 1851 | } |
1757 | 1852 | ||
1758 | i = (int *) table->data; | 1853 | i = (int *) tbl_data; |
1759 | vleft = table->maxlen / sizeof(*i); | 1854 | vleft = table->maxlen / sizeof(*i); |
1760 | left = *lenp; | 1855 | left = *lenp; |
1761 | 1856 | ||
@@ -1844,6 +1939,16 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, | |||
1844 | #undef TMPBUFLEN | 1939 | #undef TMPBUFLEN |
1845 | } | 1940 | } |
1846 | 1941 | ||
1942 | static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, | ||
1943 | void __user *buffer, size_t *lenp, loff_t *ppos, | ||
1944 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, | ||
1945 | int write, void *data), | ||
1946 | void *data) | ||
1947 | { | ||
1948 | return __do_proc_dointvec(table->data, table, write, filp, | ||
1949 | buffer, lenp, ppos, conv, data); | ||
1950 | } | ||
1951 | |||
1847 | /** | 1952 | /** |
1848 | * proc_dointvec - read a vector of integers | 1953 | * proc_dointvec - read a vector of integers |
1849 | * @table: the sysctl table | 1954 | * @table: the sysctl table |
@@ -1977,7 +2082,7 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, | |||
1977 | do_proc_dointvec_minmax_conv, ¶m); | 2082 | do_proc_dointvec_minmax_conv, ¶m); |
1978 | } | 2083 | } |
1979 | 2084 | ||
1980 | static int do_proc_doulongvec_minmax(ctl_table *table, int write, | 2085 | static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write, |
1981 | struct file *filp, | 2086 | struct file *filp, |
1982 | void __user *buffer, | 2087 | void __user *buffer, |
1983 | size_t *lenp, loff_t *ppos, | 2088 | size_t *lenp, loff_t *ppos, |
@@ -1991,13 +2096,13 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, | |||
1991 | char buf[TMPBUFLEN], *p; | 2096 | char buf[TMPBUFLEN], *p; |
1992 | char __user *s = buffer; | 2097 | char __user *s = buffer; |
1993 | 2098 | ||
1994 | if (!table->data || !table->maxlen || !*lenp || | 2099 | if (!data || !table->maxlen || !*lenp || |
1995 | (*ppos && !write)) { | 2100 | (*ppos && !write)) { |
1996 | *lenp = 0; | 2101 | *lenp = 0; |
1997 | return 0; | 2102 | return 0; |
1998 | } | 2103 | } |
1999 | 2104 | ||
2000 | i = (unsigned long *) table->data; | 2105 | i = (unsigned long *) data; |
2001 | min = (unsigned long *) table->extra1; | 2106 | min = (unsigned long *) table->extra1; |
2002 | max = (unsigned long *) table->extra2; | 2107 | max = (unsigned long *) table->extra2; |
2003 | vleft = table->maxlen / sizeof(unsigned long); | 2108 | vleft = table->maxlen / sizeof(unsigned long); |
@@ -2082,6 +2187,17 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, | |||
2082 | #undef TMPBUFLEN | 2187 | #undef TMPBUFLEN |
2083 | } | 2188 | } |
2084 | 2189 | ||
2190 | static int do_proc_doulongvec_minmax(ctl_table *table, int write, | ||
2191 | struct file *filp, | ||
2192 | void __user *buffer, | ||
2193 | size_t *lenp, loff_t *ppos, | ||
2194 | unsigned long convmul, | ||
2195 | unsigned long convdiv) | ||
2196 | { | ||
2197 | return __do_proc_doulongvec_minmax(table->data, table, write, | ||
2198 | filp, buffer, lenp, ppos, convmul, convdiv); | ||
2199 | } | ||
2200 | |||
2085 | /** | 2201 | /** |
2086 | * proc_doulongvec_minmax - read a vector of long integers with min/max values | 2202 | * proc_doulongvec_minmax - read a vector of long integers with min/max values |
2087 | * @table: the sysctl table | 2203 | * @table: the sysctl table |
@@ -2270,6 +2386,71 @@ int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, | |||
2270 | do_proc_dointvec_ms_jiffies_conv, NULL); | 2386 | do_proc_dointvec_ms_jiffies_conv, NULL); |
2271 | } | 2387 | } |
2272 | 2388 | ||
2389 | #ifdef CONFIG_SYSVIPC | ||
2390 | static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, | ||
2391 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2392 | { | ||
2393 | void *data; | ||
2394 | struct ipc_namespace *ns; | ||
2395 | |||
2396 | ns = current->nsproxy->ipc_ns; | ||
2397 | |||
2398 | switch (table->ctl_name) { | ||
2399 | case KERN_SHMMAX: | ||
2400 | data = &ns->shm_ctlmax; | ||
2401 | goto proc_minmax; | ||
2402 | case KERN_SHMALL: | ||
2403 | data = &ns->shm_ctlall; | ||
2404 | goto proc_minmax; | ||
2405 | case KERN_SHMMNI: | ||
2406 | data = &ns->shm_ctlmni; | ||
2407 | break; | ||
2408 | case KERN_MSGMAX: | ||
2409 | data = &ns->msg_ctlmax; | ||
2410 | break; | ||
2411 | case KERN_MSGMNI: | ||
2412 | data = &ns->msg_ctlmni; | ||
2413 | break; | ||
2414 | case KERN_MSGMNB: | ||
2415 | data = &ns->msg_ctlmnb; | ||
2416 | break; | ||
2417 | case KERN_SEM: | ||
2418 | data = &ns->sem_ctls; | ||
2419 | break; | ||
2420 | default: | ||
2421 | return -EINVAL; | ||
2422 | } | ||
2423 | |||
2424 | return __do_proc_dointvec(data, table, write, filp, buffer, | ||
2425 | lenp, ppos, NULL, NULL); | ||
2426 | proc_minmax: | ||
2427 | return __do_proc_doulongvec_minmax(data, table, write, filp, buffer, | ||
2428 | lenp, ppos, 1l, 1l); | ||
2429 | } | ||
2430 | #endif | ||
2431 | |||
2432 | static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, | ||
2433 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2434 | { | ||
2435 | struct pid *new_pid; | ||
2436 | pid_t tmp; | ||
2437 | int r; | ||
2438 | |||
2439 | tmp = pid_nr(cad_pid); | ||
2440 | |||
2441 | r = __do_proc_dointvec(&tmp, table, write, filp, buffer, | ||
2442 | lenp, ppos, NULL, NULL); | ||
2443 | if (r || !write) | ||
2444 | return r; | ||
2445 | |||
2446 | new_pid = find_get_pid(tmp); | ||
2447 | if (!new_pid) | ||
2448 | return -ESRCH; | ||
2449 | |||
2450 | put_pid(xchg(&cad_pid, new_pid)); | ||
2451 | return 0; | ||
2452 | } | ||
2453 | |||
2273 | #else /* CONFIG_PROC_FS */ | 2454 | #else /* CONFIG_PROC_FS */ |
2274 | 2455 | ||
2275 | int proc_dostring(ctl_table *table, int write, struct file *filp, | 2456 | int proc_dostring(ctl_table *table, int write, struct file *filp, |
@@ -2278,12 +2459,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, | |||
2278 | return -ENOSYS; | 2459 | return -ENOSYS; |
2279 | } | 2460 | } |
2280 | 2461 | ||
2281 | static int proc_doutsstring(ctl_table *table, int write, struct file *filp, | 2462 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, |
2282 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2463 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2283 | { | 2464 | { |
2284 | return -ENOSYS; | 2465 | return -ENOSYS; |
2285 | } | 2466 | } |
2286 | 2467 | ||
2468 | #ifdef CONFIG_SYSVIPC | ||
2469 | static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, | ||
2470 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2471 | { | ||
2472 | return -ENOSYS; | ||
2473 | } | ||
2474 | #endif | ||
2475 | |||
2287 | int proc_dointvec(ctl_table *table, int write, struct file *filp, | 2476 | int proc_dointvec(ctl_table *table, int write, struct file *filp, |
2288 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2477 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2289 | { | 2478 | { |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 2ed4040d0d..5d6a8c54ee 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -18,7 +18,9 @@ | |||
18 | 18 | ||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/taskstats_kern.h> | 20 | #include <linux/taskstats_kern.h> |
21 | #include <linux/tsacct_kern.h> | ||
21 | #include <linux/delayacct.h> | 22 | #include <linux/delayacct.h> |
23 | #include <linux/tsacct_kern.h> | ||
22 | #include <linux/cpumask.h> | 24 | #include <linux/cpumask.h> |
23 | #include <linux/percpu.h> | 25 | #include <linux/percpu.h> |
24 | #include <net/genetlink.h> | 26 | #include <net/genetlink.h> |
@@ -75,7 +77,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | |||
75 | /* | 77 | /* |
76 | * If new attributes are added, please revisit this allocation | 78 | * If new attributes are added, please revisit this allocation |
77 | */ | 79 | */ |
78 | skb = nlmsg_new(size, GFP_KERNEL); | 80 | skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL); |
79 | if (!skb) | 81 | if (!skb) |
80 | return -ENOMEM; | 82 | return -ENOMEM; |
81 | 83 | ||
@@ -198,7 +200,13 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, | |||
198 | */ | 200 | */ |
199 | 201 | ||
200 | delayacct_add_tsk(stats, tsk); | 202 | delayacct_add_tsk(stats, tsk); |
203 | |||
204 | /* fill in basic acct fields */ | ||
201 | stats->version = TASKSTATS_VERSION; | 205 | stats->version = TASKSTATS_VERSION; |
206 | bacct_add_tsk(stats, tsk); | ||
207 | |||
208 | /* fill in extended acct fields */ | ||
209 | xacct_add_tsk(stats, tsk); | ||
202 | 210 | ||
203 | /* Define err: label here if needed */ | 211 | /* Define err: label here if needed */ |
204 | put_task_struct(tsk); | 212 | put_task_struct(tsk); |
diff --git a/kernel/time.c b/kernel/time.c index 5bd4897476..0e017bff4c 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -202,179 +202,6 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv, | |||
202 | return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); | 202 | return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); |
203 | } | 203 | } |
204 | 204 | ||
205 | /* we call this to notify the arch when the clock is being | ||
206 | * controlled. If no such arch routine, do nothing. | ||
207 | */ | ||
208 | void __attribute__ ((weak)) notify_arch_cmos_timer(void) | ||
209 | { | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | /* adjtimex mainly allows reading (and writing, if superuser) of | ||
214 | * kernel time-keeping variables. used by xntpd. | ||
215 | */ | ||
216 | int do_adjtimex(struct timex *txc) | ||
217 | { | ||
218 | long ltemp, mtemp, save_adjust; | ||
219 | int result; | ||
220 | |||
221 | /* In order to modify anything, you gotta be super-user! */ | ||
222 | if (txc->modes && !capable(CAP_SYS_TIME)) | ||
223 | return -EPERM; | ||
224 | |||
225 | /* Now we validate the data before disabling interrupts */ | ||
226 | |||
227 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | ||
228 | /* singleshot must not be used with any other mode bits */ | ||
229 | if (txc->modes != ADJ_OFFSET_SINGLESHOT) | ||
230 | return -EINVAL; | ||
231 | |||
232 | if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) | ||
233 | /* adjustment Offset limited to +- .512 seconds */ | ||
234 | if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) | ||
235 | return -EINVAL; | ||
236 | |||
237 | /* if the quartz is off by more than 10% something is VERY wrong ! */ | ||
238 | if (txc->modes & ADJ_TICK) | ||
239 | if (txc->tick < 900000/USER_HZ || | ||
240 | txc->tick > 1100000/USER_HZ) | ||
241 | return -EINVAL; | ||
242 | |||
243 | write_seqlock_irq(&xtime_lock); | ||
244 | result = time_state; /* mostly `TIME_OK' */ | ||
245 | |||
246 | /* Save for later - semantics of adjtime is to return old value */ | ||
247 | save_adjust = time_next_adjust ? time_next_adjust : time_adjust; | ||
248 | |||
249 | #if 0 /* STA_CLOCKERR is never set yet */ | ||
250 | time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ | ||
251 | #endif | ||
252 | /* If there are input parameters, then process them */ | ||
253 | if (txc->modes) | ||
254 | { | ||
255 | if (txc->modes & ADJ_STATUS) /* only set allowed bits */ | ||
256 | time_status = (txc->status & ~STA_RONLY) | | ||
257 | (time_status & STA_RONLY); | ||
258 | |||
259 | if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ | ||
260 | if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { | ||
261 | result = -EINVAL; | ||
262 | goto leave; | ||
263 | } | ||
264 | time_freq = txc->freq; | ||
265 | } | ||
266 | |||
267 | if (txc->modes & ADJ_MAXERROR) { | ||
268 | if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { | ||
269 | result = -EINVAL; | ||
270 | goto leave; | ||
271 | } | ||
272 | time_maxerror = txc->maxerror; | ||
273 | } | ||
274 | |||
275 | if (txc->modes & ADJ_ESTERROR) { | ||
276 | if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { | ||
277 | result = -EINVAL; | ||
278 | goto leave; | ||
279 | } | ||
280 | time_esterror = txc->esterror; | ||
281 | } | ||
282 | |||
283 | if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ | ||
284 | if (txc->constant < 0) { /* NTP v4 uses values > 6 */ | ||
285 | result = -EINVAL; | ||
286 | goto leave; | ||
287 | } | ||
288 | time_constant = txc->constant; | ||
289 | } | ||
290 | |||
291 | if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ | ||
292 | if (txc->modes == ADJ_OFFSET_SINGLESHOT) { | ||
293 | /* adjtime() is independent from ntp_adjtime() */ | ||
294 | if ((time_next_adjust = txc->offset) == 0) | ||
295 | time_adjust = 0; | ||
296 | } | ||
297 | else if (time_status & STA_PLL) { | ||
298 | ltemp = txc->offset; | ||
299 | |||
300 | /* | ||
301 | * Scale the phase adjustment and | ||
302 | * clamp to the operating range. | ||
303 | */ | ||
304 | if (ltemp > MAXPHASE) | ||
305 | time_offset = MAXPHASE << SHIFT_UPDATE; | ||
306 | else if (ltemp < -MAXPHASE) | ||
307 | time_offset = -(MAXPHASE << SHIFT_UPDATE); | ||
308 | else | ||
309 | time_offset = ltemp << SHIFT_UPDATE; | ||
310 | |||
311 | /* | ||
312 | * Select whether the frequency is to be controlled | ||
313 | * and in which mode (PLL or FLL). Clamp to the operating | ||
314 | * range. Ugly multiply/divide should be replaced someday. | ||
315 | */ | ||
316 | |||
317 | if (time_status & STA_FREQHOLD || time_reftime == 0) | ||
318 | time_reftime = xtime.tv_sec; | ||
319 | mtemp = xtime.tv_sec - time_reftime; | ||
320 | time_reftime = xtime.tv_sec; | ||
321 | if (time_status & STA_FLL) { | ||
322 | if (mtemp >= MINSEC) { | ||
323 | ltemp = (time_offset / mtemp) << (SHIFT_USEC - | ||
324 | SHIFT_UPDATE); | ||
325 | time_freq += shift_right(ltemp, SHIFT_KH); | ||
326 | } else /* calibration interval too short (p. 12) */ | ||
327 | result = TIME_ERROR; | ||
328 | } else { /* PLL mode */ | ||
329 | if (mtemp < MAXSEC) { | ||
330 | ltemp *= mtemp; | ||
331 | time_freq += shift_right(ltemp,(time_constant + | ||
332 | time_constant + | ||
333 | SHIFT_KF - SHIFT_USEC)); | ||
334 | } else /* calibration interval too long (p. 12) */ | ||
335 | result = TIME_ERROR; | ||
336 | } | ||
337 | time_freq = min(time_freq, time_tolerance); | ||
338 | time_freq = max(time_freq, -time_tolerance); | ||
339 | } /* STA_PLL */ | ||
340 | } /* txc->modes & ADJ_OFFSET */ | ||
341 | if (txc->modes & ADJ_TICK) { | ||
342 | tick_usec = txc->tick; | ||
343 | tick_nsec = TICK_USEC_TO_NSEC(tick_usec); | ||
344 | } | ||
345 | } /* txc->modes */ | ||
346 | leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | ||
347 | result = TIME_ERROR; | ||
348 | |||
349 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | ||
350 | txc->offset = save_adjust; | ||
351 | else { | ||
352 | txc->offset = shift_right(time_offset, SHIFT_UPDATE); | ||
353 | } | ||
354 | txc->freq = time_freq; | ||
355 | txc->maxerror = time_maxerror; | ||
356 | txc->esterror = time_esterror; | ||
357 | txc->status = time_status; | ||
358 | txc->constant = time_constant; | ||
359 | txc->precision = time_precision; | ||
360 | txc->tolerance = time_tolerance; | ||
361 | txc->tick = tick_usec; | ||
362 | |||
363 | /* PPS is not implemented, so these are zero */ | ||
364 | txc->ppsfreq = 0; | ||
365 | txc->jitter = 0; | ||
366 | txc->shift = 0; | ||
367 | txc->stabil = 0; | ||
368 | txc->jitcnt = 0; | ||
369 | txc->calcnt = 0; | ||
370 | txc->errcnt = 0; | ||
371 | txc->stbcnt = 0; | ||
372 | write_sequnlock_irq(&xtime_lock); | ||
373 | do_gettimeofday(&txc->time); | ||
374 | notify_arch_cmos_timer(); | ||
375 | return(result); | ||
376 | } | ||
377 | |||
378 | asmlinkage long sys_adjtimex(struct timex __user *txc_p) | 205 | asmlinkage long sys_adjtimex(struct timex __user *txc_p) |
379 | { | 206 | { |
380 | struct timex txc; /* Local copy of parameter */ | 207 | struct timex txc; /* Local copy of parameter */ |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index e1dfd8e86c..61a3907d16 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
@@ -1 +1 @@ | |||
obj-y += clocksource.o jiffies.o | obj-y += ntp.o clocksource.o jiffies.o | ||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c new file mode 100644 index 0000000000..47195fa0ec --- /dev/null +++ b/kernel/time/ntp.c | |||
@@ -0,0 +1,350 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/ntp.c | ||
3 | * | ||
4 | * NTP state machine interfaces and logic. | ||
5 | * | ||
6 | * This code was mainly moved from kernel/timer.c and kernel/time.c | ||
7 | * Please see those files for relevant copyright info and historical | ||
8 | * changelogs. | ||
9 | */ | ||
10 | |||
11 | #include <linux/mm.h> | ||
12 | #include <linux/time.h> | ||
13 | #include <linux/timex.h> | ||
14 | |||
15 | #include <asm/div64.h> | ||
16 | #include <asm/timex.h> | ||
17 | |||
18 | /* | ||
19 | * Timekeeping variables | ||
20 | */ | ||
21 | unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ | ||
22 | unsigned long tick_nsec; /* ACTHZ period (nsec) */ | ||
23 | static u64 tick_length, tick_length_base; | ||
24 | |||
25 | #define MAX_TICKADJ 500 /* microsecs */ | ||
26 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ | ||
27 | TICK_LENGTH_SHIFT) / HZ) | ||
28 | |||
29 | /* | ||
30 | * phase-lock loop variables | ||
31 | */ | ||
32 | /* TIME_ERROR prevents overwriting the CMOS clock */ | ||
33 | static int time_state = TIME_OK; /* clock synchronization status */ | ||
34 | int time_status = STA_UNSYNC; /* clock status bits */ | ||
35 | static long time_offset; /* time adjustment (ns) */ | ||
36 | static long time_constant = 2; /* pll time constant */ | ||
37 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | ||
38 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | ||
39 | long time_freq; /* frequency offset (scaled ppm)*/ | ||
40 | static long time_reftime; /* time at last adjustment (s) */ | ||
41 | long time_adjust; | ||
42 | |||
43 | #define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE) | ||
44 | #define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \ | ||
45 | (s64)CLOCK_TICK_RATE) | ||
46 | |||
47 | static void ntp_update_frequency(void) | ||
48 | { | ||
49 | tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; | ||
50 | tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; | ||
51 | tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | ||
52 | |||
53 | do_div(tick_length_base, HZ); | ||
54 | |||
55 | tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; | ||
56 | } | ||
57 | |||
58 | /** | ||
59 | * ntp_clear - Clears the NTP state variables | ||
60 | * | ||
61 | * Must be called while holding a write on the xtime_lock | ||
62 | */ | ||
63 | void ntp_clear(void) | ||
64 | { | ||
65 | time_adjust = 0; /* stop active adjtime() */ | ||
66 | time_status |= STA_UNSYNC; | ||
67 | time_maxerror = NTP_PHASE_LIMIT; | ||
68 | time_esterror = NTP_PHASE_LIMIT; | ||
69 | |||
70 | ntp_update_frequency(); | ||
71 | |||
72 | tick_length = tick_length_base; | ||
73 | time_offset = 0; | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * this routine handles the overflow of the microsecond field | ||
78 | * | ||
79 | * The tricky bits of code to handle the accurate clock support | ||
80 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
81 | * They were originally developed for SUN and DEC kernels. | ||
82 | * All the kudos should go to Dave for this stuff. | ||
83 | */ | ||
84 | void second_overflow(void) | ||
85 | { | ||
86 | long time_adj; | ||
87 | |||
88 | /* Bump the maxerror field */ | ||
89 | time_maxerror += MAXFREQ >> SHIFT_USEC; | ||
90 | if (time_maxerror > NTP_PHASE_LIMIT) { | ||
91 | time_maxerror = NTP_PHASE_LIMIT; | ||
92 | time_status |= STA_UNSYNC; | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Leap second processing. If in leap-insert state at the end of the | ||
97 | * day, the system clock is set back one second; if in leap-delete | ||
98 | * state, the system clock is set ahead one second. The microtime() | ||
99 | * routine or external clock driver will insure that reported time is | ||
100 | * always monotonic. The ugly divides should be replaced. | ||
101 | */ | ||
102 | switch (time_state) { | ||
103 | case TIME_OK: | ||
104 | if (time_status & STA_INS) | ||
105 | time_state = TIME_INS; | ||
106 | else if (time_status & STA_DEL) | ||
107 | time_state = TIME_DEL; | ||
108 | break; | ||
109 | case TIME_INS: | ||
110 | if (xtime.tv_sec % 86400 == 0) { | ||
111 | xtime.tv_sec--; | ||
112 | wall_to_monotonic.tv_sec++; | ||
113 | /* | ||
114 | * The timer interpolator will make time change | ||
115 | * gradually instead of an immediate jump by one second | ||
116 | */ | ||
117 | time_interpolator_update(-NSEC_PER_SEC); | ||
118 | time_state = TIME_OOP; | ||
119 | clock_was_set(); | ||
120 | printk(KERN_NOTICE "Clock: inserting leap second " | ||
121 | "23:59:60 UTC\n"); | ||
122 | } | ||
123 | break; | ||
124 | case TIME_DEL: | ||
125 | if ((xtime.tv_sec + 1) % 86400 == 0) { | ||
126 | xtime.tv_sec++; | ||
127 | wall_to_monotonic.tv_sec--; | ||
128 | /* | ||
129 | * Use of time interpolator for a gradual change of | ||
130 | * time | ||
131 | */ | ||
132 | time_interpolator_update(NSEC_PER_SEC); | ||
133 | time_state = TIME_WAIT; | ||
134 | clock_was_set(); | ||
135 | printk(KERN_NOTICE "Clock: deleting leap second " | ||
136 | "23:59:59 UTC\n"); | ||
137 | } | ||
138 | break; | ||
139 | case TIME_OOP: | ||
140 | time_state = TIME_WAIT; | ||
141 | break; | ||
142 | case TIME_WAIT: | ||
143 | if (!(time_status & (STA_INS | STA_DEL))) | ||
144 | time_state = TIME_OK; | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Compute the phase adjustment for the next second. The offset is | ||
149 | * reduced by a fixed factor times the time constant. | ||
150 | */ | ||
151 | tick_length = tick_length_base; | ||
152 | time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); | ||
153 | time_offset -= time_adj; | ||
154 | tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE); | ||
155 | |||
156 | if (unlikely(time_adjust)) { | ||
157 | if (time_adjust > MAX_TICKADJ) { | ||
158 | time_adjust -= MAX_TICKADJ; | ||
159 | tick_length += MAX_TICKADJ_SCALED; | ||
160 | } else if (time_adjust < -MAX_TICKADJ) { | ||
161 | time_adjust += MAX_TICKADJ; | ||
162 | tick_length -= MAX_TICKADJ_SCALED; | ||
163 | } else { | ||
164 | time_adjust = 0; | ||
165 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / | ||
166 | HZ) << TICK_LENGTH_SHIFT; | ||
167 | } | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Return how long ticks are at the moment, that is, how much time | ||
173 | * update_wall_time_one_tick will add to xtime next time we call it | ||
174 | * (assuming no calls to do_adjtimex in the meantime). | ||
175 | * The return value is in fixed-point nanoseconds shifted by the | ||
176 | * specified number of bits to the right of the binary point. | ||
177 | * This function has no side-effects. | ||
178 | */ | ||
179 | u64 current_tick_length(void) | ||
180 | { | ||
181 | return tick_length; | ||
182 | } | ||
183 | |||
184 | |||
185 | void __attribute__ ((weak)) notify_arch_cmos_timer(void) | ||
186 | { | ||
187 | return; | ||
188 | } | ||
189 | |||
190 | /* adjtimex mainly allows reading (and writing, if superuser) of | ||
191 | * kernel time-keeping variables. used by xntpd. | ||
192 | */ | ||
193 | int do_adjtimex(struct timex *txc) | ||
194 | { | ||
195 | long ltemp, mtemp, save_adjust; | ||
196 | s64 freq_adj, temp64; | ||
197 | int result; | ||
198 | |||
199 | /* In order to modify anything, you gotta be super-user! */ | ||
200 | if (txc->modes && !capable(CAP_SYS_TIME)) | ||
201 | return -EPERM; | ||
202 | |||
203 | /* Now we validate the data before disabling interrupts */ | ||
204 | |||
205 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | ||
206 | /* singleshot must not be used with any other mode bits */ | ||
207 | if (txc->modes != ADJ_OFFSET_SINGLESHOT) | ||
208 | return -EINVAL; | ||
209 | |||
210 | if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) | ||
211 | /* adjustment Offset limited to +- .512 seconds */ | ||
212 | if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) | ||
213 | return -EINVAL; | ||
214 | |||
215 | /* if the quartz is off by more than 10% something is VERY wrong ! */ | ||
216 | if (txc->modes & ADJ_TICK) | ||
217 | if (txc->tick < 900000/USER_HZ || | ||
218 | txc->tick > 1100000/USER_HZ) | ||
219 | return -EINVAL; | ||
220 | |||
221 | write_seqlock_irq(&xtime_lock); | ||
222 | result = time_state; /* mostly `TIME_OK' */ | ||
223 | |||
224 | /* Save for later - semantics of adjtime is to return old value */ | ||
225 | save_adjust = time_adjust; | ||
226 | |||
227 | #if 0 /* STA_CLOCKERR is never set yet */ | ||
228 | time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ | ||
229 | #endif | ||
230 | /* If there are input parameters, then process them */ | ||
231 | if (txc->modes) | ||
232 | { | ||
233 | if (txc->modes & ADJ_STATUS) /* only set allowed bits */ | ||
234 | time_status = (txc->status & ~STA_RONLY) | | ||
235 | (time_status & STA_RONLY); | ||
236 | |||
237 | if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ | ||
238 | if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { | ||
239 | result = -EINVAL; | ||
240 | goto leave; | ||
241 | } | ||
242 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC); | ||
243 | } | ||
244 | |||
245 | if (txc->modes & ADJ_MAXERROR) { | ||
246 | if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { | ||
247 | result = -EINVAL; | ||
248 | goto leave; | ||
249 | } | ||
250 | time_maxerror = txc->maxerror; | ||
251 | } | ||
252 | |||
253 | if (txc->modes & ADJ_ESTERROR) { | ||
254 | if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { | ||
255 | result = -EINVAL; | ||
256 | goto leave; | ||
257 | } | ||
258 | time_esterror = txc->esterror; | ||
259 | } | ||
260 | |||
261 | if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ | ||
262 | if (txc->constant < 0) { /* NTP v4 uses values > 6 */ | ||
263 | result = -EINVAL; | ||
264 | goto leave; | ||
265 | } | ||
266 | time_constant = min(txc->constant + 4, (long)MAXTC); | ||
267 | } | ||
268 | |||
269 | if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ | ||
270 | if (txc->modes == ADJ_OFFSET_SINGLESHOT) { | ||
271 | /* adjtime() is independent from ntp_adjtime() */ | ||
272 | time_adjust = txc->offset; | ||
273 | } | ||
274 | else if (time_status & STA_PLL) { | ||
275 | ltemp = txc->offset * NSEC_PER_USEC; | ||
276 | |||
277 | /* | ||
278 | * Scale the phase adjustment and | ||
279 | * clamp to the operating range. | ||
280 | */ | ||
281 | time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC); | ||
282 | time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC); | ||
283 | |||
284 | /* | ||
285 | * Select whether the frequency is to be controlled | ||
286 | * and in which mode (PLL or FLL). Clamp to the operating | ||
287 | * range. Ugly multiply/divide should be replaced someday. | ||
288 | */ | ||
289 | |||
290 | if (time_status & STA_FREQHOLD || time_reftime == 0) | ||
291 | time_reftime = xtime.tv_sec; | ||
292 | mtemp = xtime.tv_sec - time_reftime; | ||
293 | time_reftime = xtime.tv_sec; | ||
294 | |||
295 | freq_adj = (s64)time_offset * mtemp; | ||
296 | freq_adj = shift_right(freq_adj, time_constant * 2 + | ||
297 | (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); | ||
298 | if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { | ||
299 | temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL); | ||
300 | if (time_offset < 0) { | ||
301 | temp64 = -temp64; | ||
302 | do_div(temp64, mtemp); | ||
303 | freq_adj -= temp64; | ||
304 | } else { | ||
305 | do_div(temp64, mtemp); | ||
306 | freq_adj += temp64; | ||
307 | } | ||
308 | } | ||
309 | freq_adj += time_freq; | ||
310 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); | ||
311 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); | ||
312 | time_offset = (time_offset / HZ) << SHIFT_UPDATE; | ||
313 | } /* STA_PLL */ | ||
314 | } /* txc->modes & ADJ_OFFSET */ | ||
315 | if (txc->modes & ADJ_TICK) | ||
316 | tick_usec = txc->tick; | ||
317 | |||
318 | if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) | ||
319 | ntp_update_frequency(); | ||
320 | } /* txc->modes */ | ||
321 | leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | ||
322 | result = TIME_ERROR; | ||
323 | |||
324 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | ||
325 | txc->offset = save_adjust; | ||
326 | else | ||
327 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; | ||
328 | txc->freq = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC); | ||
329 | txc->maxerror = time_maxerror; | ||
330 | txc->esterror = time_esterror; | ||
331 | txc->status = time_status; | ||
332 | txc->constant = time_constant; | ||
333 | txc->precision = 1; | ||
334 | txc->tolerance = MAXFREQ; | ||
335 | txc->tick = tick_usec; | ||
336 | |||
337 | /* PPS is not implemented, so these are zero */ | ||
338 | txc->ppsfreq = 0; | ||
339 | txc->jitter = 0; | ||
340 | txc->shift = 0; | ||
341 | txc->stabil = 0; | ||
342 | txc->jitcnt = 0; | ||
343 | txc->calcnt = 0; | ||
344 | txc->errcnt = 0; | ||
345 | txc->stbcnt = 0; | ||
346 | write_sequnlock_irq(&xtime_lock); | ||
347 | do_gettimeofday(&txc->time); | ||
348 | notify_arch_cmos_timer(); | ||
349 | return(result); | ||
350 | } | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 4f55622b0d..c1c7fbcffe 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -41,12 +41,6 @@ | |||
41 | #include <asm/timex.h> | 41 | #include <asm/timex.h> |
42 | #include <asm/io.h> | 42 | #include <asm/io.h> |
43 | 43 | ||
44 | #ifdef CONFIG_TIME_INTERPOLATION | ||
45 | static void time_interpolator_update(long delta_nsec); | ||
46 | #else | ||
47 | #define time_interpolator_update(x) | ||
48 | #endif | ||
49 | |||
50 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; | 44 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; |
51 | 45 | ||
52 | EXPORT_SYMBOL(jiffies_64); | 46 | EXPORT_SYMBOL(jiffies_64); |
@@ -568,12 +562,6 @@ found: | |||
568 | 562 | ||
569 | /******************************************************************/ | 563 | /******************************************************************/ |
570 | 564 | ||
571 | /* | ||
572 | * Timekeeping variables | ||
573 | */ | ||
574 | unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ | ||
575 | unsigned long tick_nsec = TICK_NSEC; /* ACTHZ period (nsec) */ | ||
576 | |||
577 | /* | 565 | /* |
578 | * The current time | 566 | * The current time |
579 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected | 567 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected |
@@ -587,209 +575,6 @@ struct timespec wall_to_monotonic __attribute__ ((aligned (16))); | |||
587 | 575 | ||
588 | EXPORT_SYMBOL(xtime); | 576 | EXPORT_SYMBOL(xtime); |
589 | 577 | ||
590 | /* Don't completely fail for HZ > 500. */ | ||
591 | int tickadj = 500/HZ ? : 1; /* microsecs */ | ||
592 | |||
593 | |||
594 | /* | ||
595 | * phase-lock loop variables | ||
596 | */ | ||
597 | /* TIME_ERROR prevents overwriting the CMOS clock */ | ||
598 | int time_state = TIME_OK; /* clock synchronization status */ | ||
599 | int time_status = STA_UNSYNC; /* clock status bits */ | ||
600 | long time_offset; /* time adjustment (us) */ | ||
601 | long time_constant = 2; /* pll time constant */ | ||
602 | long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ | ||
603 | long time_precision = 1; /* clock precision (us) */ | ||
604 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | ||
605 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | ||
606 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; | ||
607 | /* frequency offset (scaled ppm)*/ | ||
608 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ | ||
609 | long time_reftime; /* time at last adjustment (s) */ | ||
610 | long time_adjust; | ||
611 | long time_next_adjust; | ||
612 | |||
613 | /* | ||
614 | * this routine handles the overflow of the microsecond field | ||
615 | * | ||
616 | * The tricky bits of code to handle the accurate clock support | ||
617 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
618 | * They were originally developed for SUN and DEC kernels. | ||
619 | * All the kudos should go to Dave for this stuff. | ||
620 | * | ||
621 | */ | ||
622 | static void second_overflow(void) | ||
623 | { | ||
624 | long ltemp; | ||
625 | |||
626 | /* Bump the maxerror field */ | ||
627 | time_maxerror += time_tolerance >> SHIFT_USEC; | ||
628 | if (time_maxerror > NTP_PHASE_LIMIT) { | ||
629 | time_maxerror = NTP_PHASE_LIMIT; | ||
630 | time_status |= STA_UNSYNC; | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * Leap second processing. If in leap-insert state at the end of the | ||
635 | * day, the system clock is set back one second; if in leap-delete | ||
636 | * state, the system clock is set ahead one second. The microtime() | ||
637 | * routine or external clock driver will insure that reported time is | ||
638 | * always monotonic. The ugly divides should be replaced. | ||
639 | */ | ||
640 | switch (time_state) { | ||
641 | case TIME_OK: | ||
642 | if (time_status & STA_INS) | ||
643 | time_state = TIME_INS; | ||
644 | else if (time_status & STA_DEL) | ||
645 | time_state = TIME_DEL; | ||
646 | break; | ||
647 | case TIME_INS: | ||
648 | if (xtime.tv_sec % 86400 == 0) { | ||
649 | xtime.tv_sec--; | ||
650 | wall_to_monotonic.tv_sec++; | ||
651 | /* | ||
652 | * The timer interpolator will make time change | ||
653 | * gradually instead of an immediate jump by one second | ||
654 | */ | ||
655 | time_interpolator_update(-NSEC_PER_SEC); | ||
656 | time_state = TIME_OOP; | ||
657 | clock_was_set(); | ||
658 | printk(KERN_NOTICE "Clock: inserting leap second " | ||
659 | "23:59:60 UTC\n"); | ||
660 | } | ||
661 | break; | ||
662 | case TIME_DEL: | ||
663 | if ((xtime.tv_sec + 1) % 86400 == 0) { | ||
664 | xtime.tv_sec++; | ||
665 | wall_to_monotonic.tv_sec--; | ||
666 | /* | ||
667 | * Use of time interpolator for a gradual change of | ||
668 | * time | ||
669 | */ | ||
670 | time_interpolator_update(NSEC_PER_SEC); | ||
671 | time_state = TIME_WAIT; | ||
672 | clock_was_set(); | ||
673 | printk(KERN_NOTICE "Clock: deleting leap second " | ||
674 | "23:59:59 UTC\n"); | ||
675 | } | ||
676 | break; | ||
677 | case TIME_OOP: | ||
678 | time_state = TIME_WAIT; | ||
679 | break; | ||
680 | case TIME_WAIT: | ||
681 | if (!(time_status & (STA_INS | STA_DEL))) | ||
682 | time_state = TIME_OK; | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Compute the phase adjustment for the next second. In PLL mode, the | ||
687 | * offset is reduced by a fixed factor times the time constant. In FLL | ||
688 | * mode the offset is used directly. In either mode, the maximum phase | ||
689 | * adjustment for each second is clamped so as to spread the adjustment | ||
690 | * over not more than the number of seconds between updates. | ||
691 | */ | ||
692 | ltemp = time_offset; | ||
693 | if (!(time_status & STA_FLL)) | ||
694 | ltemp = shift_right(ltemp, SHIFT_KG + time_constant); | ||
695 | ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE); | ||
696 | ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE); | ||
697 | time_offset -= ltemp; | ||
698 | time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); | ||
699 | |||
700 | /* | ||
701 | * Compute the frequency estimate and additional phase adjustment due | ||
702 | * to frequency error for the next second. | ||
703 | */ | ||
704 | ltemp = time_freq; | ||
705 | time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE)); | ||
706 | |||
707 | #if HZ == 100 | ||
708 | /* | ||
709 | * Compensate for (HZ==100) != (1 << SHIFT_HZ). Add 25% and 3.125% to | ||
710 | * get 128.125; => only 0.125% error (p. 14) | ||
711 | */ | ||
712 | time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5); | ||
713 | #endif | ||
714 | #if HZ == 250 | ||
715 | /* | ||
716 | * Compensate for (HZ==250) != (1 << SHIFT_HZ). Add 1.5625% and | ||
717 | * 0.78125% to get 255.85938; => only 0.05% error (p. 14) | ||
718 | */ | ||
719 | time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); | ||
720 | #endif | ||
721 | #if HZ == 1000 | ||
722 | /* | ||
723 | * Compensate for (HZ==1000) != (1 << SHIFT_HZ). Add 1.5625% and | ||
724 | * 0.78125% to get 1023.4375; => only 0.05% error (p. 14) | ||
725 | */ | ||
726 | time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); | ||
727 | #endif | ||
728 | } | ||
729 | |||
730 | /* | ||
731 | * Returns how many microseconds we need to add to xtime this tick | ||
732 | * in doing an adjustment requested with adjtime. | ||
733 | */ | ||
734 | static long adjtime_adjustment(void) | ||
735 | { | ||
736 | long time_adjust_step; | ||
737 | |||
738 | time_adjust_step = time_adjust; | ||
739 | if (time_adjust_step) { | ||
740 | /* | ||
741 | * We are doing an adjtime thing. Prepare time_adjust_step to | ||
742 | * be within bounds. Note that a positive time_adjust means we | ||
743 | * want the clock to run faster. | ||
744 | * | ||
745 | * Limit the amount of the step to be in the range | ||
746 | * -tickadj .. +tickadj | ||
747 | */ | ||
748 | time_adjust_step = min(time_adjust_step, (long)tickadj); | ||
749 | time_adjust_step = max(time_adjust_step, (long)-tickadj); | ||
750 | } | ||
751 | return time_adjust_step; | ||
752 | } | ||
753 | |||
754 | /* in the NTP reference this is called "hardclock()" */ | ||
755 | static void update_ntp_one_tick(void) | ||
756 | { | ||
757 | long time_adjust_step; | ||
758 | |||
759 | time_adjust_step = adjtime_adjustment(); | ||
760 | if (time_adjust_step) | ||
761 | /* Reduce by this step the amount of time left */ | ||
762 | time_adjust -= time_adjust_step; | ||
763 | |||
764 | /* Changes by adjtime() do not take effect till next tick. */ | ||
765 | if (time_next_adjust != 0) { | ||
766 | time_adjust = time_next_adjust; | ||
767 | time_next_adjust = 0; | ||
768 | } | ||
769 | } | ||
770 | |||
771 | /* | ||
772 | * Return how long ticks are at the moment, that is, how much time | ||
773 | * update_wall_time_one_tick will add to xtime next time we call it | ||
774 | * (assuming no calls to do_adjtimex in the meantime). | ||
775 | * The return value is in fixed-point nanoseconds shifted by the | ||
776 | * specified number of bits to the right of the binary point. | ||
777 | * This function has no side-effects. | ||
778 | */ | ||
779 | u64 current_tick_length(void) | ||
780 | { | ||
781 | long delta_nsec; | ||
782 | u64 ret; | ||
783 | |||
784 | /* calculate the finest interval NTP will allow. | ||
785 | * ie: nanosecond value shifted by (SHIFT_SCALE - 10) | ||
786 | */ | ||
787 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; | ||
788 | ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; | ||
789 | ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); | ||
790 | |||
791 | return ret; | ||
792 | } | ||
793 | 578 | ||
794 | /* XXX - all of this timekeeping code should be later moved to time.c */ | 579 | /* XXX - all of this timekeeping code should be later moved to time.c */ |
795 | #include <linux/clocksource.h> | 580 | #include <linux/clocksource.h> |
@@ -966,10 +751,13 @@ void __init timekeeping_init(void) | |||
966 | unsigned long flags; | 751 | unsigned long flags; |
967 | 752 | ||
968 | write_seqlock_irqsave(&xtime_lock, flags); | 753 | write_seqlock_irqsave(&xtime_lock, flags); |
754 | |||
755 | ntp_clear(); | ||
756 | |||
969 | clock = clocksource_get_next(); | 757 | clock = clocksource_get_next(); |
970 | clocksource_calculate_interval(clock, tick_nsec); | 758 | clocksource_calculate_interval(clock, tick_nsec); |
971 | clock->cycle_last = clocksource_read(clock); | 759 | clock->cycle_last = clocksource_read(clock); |
972 | ntp_clear(); | 760 | |
973 | write_sequnlock_irqrestore(&xtime_lock, flags); | 761 | write_sequnlock_irqrestore(&xtime_lock, flags); |
974 | } | 762 | } |
975 | 763 | ||
@@ -980,7 +768,7 @@ static int timekeeping_suspended; | |||
980 | * @dev: unused | 768 | * @dev: unused |
981 | * | 769 | * |
982 | * This is for the generic clocksource timekeeping. | 770 | * This is for the generic clocksource timekeeping. |
983 | * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are | 771 | * xtime/wall_to_monotonic/jiffies/etc are |
984 | * still managed by arch specific suspend/resume code. | 772 | * still managed by arch specific suspend/resume code. |
985 | */ | 773 | */ |
986 | static int timekeeping_resume(struct sys_device *dev) | 774 | static int timekeeping_resume(struct sys_device *dev) |
@@ -1149,8 +937,6 @@ static void update_wall_time(void) | |||
1149 | /* interpolator bits */ | 937 | /* interpolator bits */ |
1150 | time_interpolator_update(clock->xtime_interval | 938 | time_interpolator_update(clock->xtime_interval |
1151 | >> clock->shift); | 939 | >> clock->shift); |
1152 | /* increment the NTP state machine */ | ||
1153 | update_ntp_one_tick(); | ||
1154 | 940 | ||
1155 | /* accumulate error between NTP and clock interval */ | 941 | /* accumulate error between NTP and clock interval */ |
1156 | clock->error += current_tick_length(); | 942 | clock->error += current_tick_length(); |
@@ -1230,9 +1016,6 @@ static inline void calc_load(unsigned long ticks) | |||
1230 | } | 1016 | } |
1231 | } | 1017 | } |
1232 | 1018 | ||
1233 | /* jiffies at the most recent update of wall time */ | ||
1234 | unsigned long wall_jiffies = INITIAL_JIFFIES; | ||
1235 | |||
1236 | /* | 1019 | /* |
1237 | * This read-write spinlock protects us from races in SMP while | 1020 | * This read-write spinlock protects us from races in SMP while |
1238 | * playing with xtime and avenrun. | 1021 | * playing with xtime and avenrun. |
@@ -1270,7 +1053,6 @@ void run_local_timers(void) | |||
1270 | */ | 1053 | */ |
1271 | static inline void update_times(unsigned long ticks) | 1054 | static inline void update_times(unsigned long ticks) |
1272 | { | 1055 | { |
1273 | wall_jiffies += ticks; | ||
1274 | update_wall_time(); | 1056 | update_wall_time(); |
1275 | calc_load(ticks); | 1057 | calc_load(ticks); |
1276 | } | 1058 | } |
@@ -1775,7 +1557,7 @@ unsigned long time_interpolator_get_offset(void) | |||
1775 | #define INTERPOLATOR_ADJUST 65536 | 1557 | #define INTERPOLATOR_ADJUST 65536 |
1776 | #define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST | 1558 | #define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST |
1777 | 1559 | ||
1778 | static void time_interpolator_update(long delta_nsec) | 1560 | void time_interpolator_update(long delta_nsec) |
1779 | { | 1561 | { |
1780 | u64 counter; | 1562 | u64 counter; |
1781 | unsigned long offset; | 1563 | unsigned long offset; |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c new file mode 100644 index 0000000000..db443221ba --- /dev/null +++ b/kernel/tsacct.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * tsacct.c - System accounting over taskstats interface | ||
3 | * | ||
4 | * Copyright (C) Jay Lan, <jlan@sgi.com> | ||
5 | * | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/tsacct_kern.h> | ||
22 | #include <linux/acct.h> | ||
23 | #include <linux/jiffies.h> | ||
24 | |||
25 | |||
26 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) | ||
27 | /* | ||
28 | * fill in basic accounting fields | ||
29 | */ | ||
30 | void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | ||
31 | { | ||
32 | struct timespec uptime, ts; | ||
33 | s64 ac_etime; | ||
34 | |||
35 | BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); | ||
36 | |||
37 | /* calculate task elapsed time in timespec */ | ||
38 | do_posix_clock_monotonic_gettime(&uptime); | ||
39 | ts = timespec_sub(uptime, current->group_leader->start_time); | ||
40 | /* rebase elapsed time to usec */ | ||
41 | ac_etime = timespec_to_ns(&ts); | ||
42 | do_div(ac_etime, NSEC_PER_USEC); | ||
43 | stats->ac_etime = ac_etime; | ||
44 | stats->ac_btime = xtime.tv_sec - ts.tv_sec; | ||
45 | if (thread_group_leader(tsk)) { | ||
46 | stats->ac_exitcode = tsk->exit_code; | ||
47 | if (tsk->flags & PF_FORKNOEXEC) | ||
48 | stats->ac_flag |= AFORK; | ||
49 | } | ||
50 | if (tsk->flags & PF_SUPERPRIV) | ||
51 | stats->ac_flag |= ASU; | ||
52 | if (tsk->flags & PF_DUMPCORE) | ||
53 | stats->ac_flag |= ACORE; | ||
54 | if (tsk->flags & PF_SIGNALED) | ||
55 | stats->ac_flag |= AXSIG; | ||
56 | stats->ac_nice = task_nice(tsk); | ||
57 | stats->ac_sched = tsk->policy; | ||
58 | stats->ac_uid = tsk->uid; | ||
59 | stats->ac_gid = tsk->gid; | ||
60 | stats->ac_pid = tsk->pid; | ||
61 | stats->ac_ppid = (tsk->parent) ? tsk->parent->pid : 0; | ||
62 | stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; | ||
63 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; | ||
64 | stats->ac_minflt = tsk->min_flt; | ||
65 | stats->ac_majflt = tsk->maj_flt; | ||
66 | |||
67 | strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); | ||
68 | } | ||
69 | |||
70 | |||
71 | #ifdef CONFIG_TASK_XACCT | ||
72 | |||
73 | #define KB 1024 | ||
74 | #define MB (1024*KB) | ||
75 | /* | ||
76 | * fill in extended accounting fields | ||
77 | */ | ||
78 | void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | ||
79 | { | ||
80 | /* convert pages-jiffies to Mbyte-usec */ | ||
81 | stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; | ||
82 | stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; | ||
83 | if (p->mm) { | ||
84 | /* adjust to KB unit */ | ||
85 | stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB; | ||
86 | stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB; | ||
87 | } | ||
88 | stats->read_char = p->rchar; | ||
89 | stats->write_char = p->wchar; | ||
90 | stats->read_syscalls = p->syscr; | ||
91 | stats->write_syscalls = p->syscw; | ||
92 | } | ||
93 | #undef KB | ||
94 | #undef MB | ||
95 | |||
96 | /** | ||
97 | * acct_update_integrals - update mm integral fields in task_struct | ||
98 | * @tsk: task_struct for accounting | ||
99 | */ | ||
100 | void acct_update_integrals(struct task_struct *tsk) | ||
101 | { | ||
102 | if (likely(tsk->mm)) { | ||
103 | long delta = cputime_to_jiffies( | ||
104 | cputime_sub(tsk->stime, tsk->acct_stimexpd)); | ||
105 | |||
106 | if (delta == 0) | ||
107 | return; | ||
108 | tsk->acct_stimexpd = tsk->stime; | ||
109 | tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); | ||
110 | tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; | ||
111 | } | ||
112 | } | ||
113 | |||
114 | /** | ||
115 | * acct_clear_integrals - clear the mm integral fields in task_struct | ||
116 | * @tsk: task_struct whose accounting fields are cleared | ||
117 | */ | ||
118 | void acct_clear_integrals(struct task_struct *tsk) | ||
119 | { | ||
120 | tsk->acct_stimexpd = 0; | ||
121 | tsk->acct_rss_mem1 = 0; | ||
122 | tsk->acct_vm_mem1 = 0; | ||
123 | } | ||
124 | #endif | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c new file mode 100644 index 0000000000..c859164a69 --- /dev/null +++ b/kernel/utsname.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004 IBM Corporation | ||
3 | * | ||
4 | * Author: Serge Hallyn <serue@us.ibm.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation, version 2 of the | ||
9 | * License. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/uts.h> | ||
14 | #include <linux/utsname.h> | ||
15 | #include <linux/version.h> | ||
16 | |||
17 | /* | ||
18 | * Clone a new ns copying an original utsname, setting refcount to 1 | ||
19 | * @old_ns: namespace to clone | ||
20 | * Return NULL on error (failure to kmalloc), new ns otherwise | ||
21 | */ | ||
22 | static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | ||
23 | { | ||
24 | struct uts_namespace *ns; | ||
25 | |||
26 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); | ||
27 | if (ns) { | ||
28 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | ||
29 | kref_init(&ns->kref); | ||
30 | } | ||
31 | return ns; | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * unshare the current process' utsname namespace. | ||
36 | * called only in sys_unshare() | ||
37 | */ | ||
38 | int unshare_utsname(unsigned long unshare_flags, struct uts_namespace **new_uts) | ||
39 | { | ||
40 | if (unshare_flags & CLONE_NEWUTS) { | ||
41 | if (!capable(CAP_SYS_ADMIN)) | ||
42 | return -EPERM; | ||
43 | |||
44 | *new_uts = clone_uts_ns(current->nsproxy->uts_ns); | ||
45 | if (!*new_uts) | ||
46 | return -ENOMEM; | ||
47 | } | ||
48 | |||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Copy task tsk's utsname namespace, or clone it if flags | ||
54 | * specifies CLONE_NEWUTS. In latter case, changes to the | ||
55 | * utsname of this process won't be seen by parent, and vice | ||
56 | * versa. | ||
57 | */ | ||
58 | int copy_utsname(int flags, struct task_struct *tsk) | ||
59 | { | ||
60 | struct uts_namespace *old_ns = tsk->nsproxy->uts_ns; | ||
61 | struct uts_namespace *new_ns; | ||
62 | int err = 0; | ||
63 | |||
64 | if (!old_ns) | ||
65 | return 0; | ||
66 | |||
67 | get_uts_ns(old_ns); | ||
68 | |||
69 | if (!(flags & CLONE_NEWUTS)) | ||
70 | return 0; | ||
71 | |||
72 | if (!capable(CAP_SYS_ADMIN)) { | ||
73 | err = -EPERM; | ||
74 | goto out; | ||
75 | } | ||
76 | |||
77 | new_ns = clone_uts_ns(old_ns); | ||
78 | if (!new_ns) { | ||
79 | err = -ENOMEM; | ||
80 | goto out; | ||
81 | } | ||
82 | tsk->nsproxy->uts_ns = new_ns; | ||
83 | |||
84 | out: | ||
85 | put_uts_ns(old_ns); | ||
86 | return err; | ||
87 | } | ||
88 | |||
89 | void free_uts_ns(struct kref *kref) | ||
90 | { | ||
91 | struct uts_namespace *ns; | ||
92 | |||
93 | ns = container_of(kref, struct uts_namespace, kref); | ||
94 | kfree(ns); | ||
95 | } | ||