diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-10-27 05:50:54 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-10-27 05:50:54 -0400 |
| commit | 4944dd62de21230af039eda7cd218e9a09021d11 (patch) | |
| tree | bac70f7bab8506c7e1b0408bacbdb0b1d77262e9 /kernel | |
| parent | f17845e5d97ead8fbdadfd40039e058ec7cf4a42 (diff) | |
| parent | 0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff) | |
Merge commit 'v2.6.28-rc2' into tracing/urgent
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/audit_tree.c | 48 | ||||
| -rw-r--r-- | kernel/cgroup.c | 2 | ||||
| -rw-r--r-- | kernel/exec_domain.c | 33 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/futex.c | 11 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 206 | ||||
| -rw-r--r-- | kernel/irq/chip.c | 3 | ||||
| -rw-r--r-- | kernel/module.c | 343 | ||||
| -rw-r--r-- | kernel/panic.c | 17 | ||||
| -rw-r--r-- | kernel/params.c | 276 | ||||
| -rw-r--r-- | kernel/posix-timers.c | 10 | ||||
| -rw-r--r-- | kernel/power/disk.c | 2 | ||||
| -rw-r--r-- | kernel/power/power.h | 2 | ||||
| -rw-r--r-- | kernel/power/swap.c | 14 | ||||
| -rw-r--r-- | kernel/rcupdate.c | 19 | ||||
| -rw-r--r-- | kernel/rtmutex.c | 3 | ||||
| -rw-r--r-- | kernel/sched.c | 59 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 62 | ||||
| -rw-r--r-- | kernel/sched_features.h | 2 | ||||
| -rw-r--r-- | kernel/sched_stats.h | 11 | ||||
| -rw-r--r-- | kernel/stop_machine.c | 120 | ||||
| -rw-r--r-- | kernel/sys.c | 10 | ||||
| -rw-r--r-- | kernel/sysctl.c | 10 | ||||
| -rw-r--r-- | kernel/time.c | 18 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 3 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 35 | ||||
| -rw-r--r-- | kernel/time/timer_list.c | 8 | ||||
| -rw-r--r-- | kernel/workqueue.c | 7 |
28 files changed, 785 insertions, 551 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f7921a2ecf16..8ba0e0d934f2 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
| @@ -532,7 +532,7 @@ void audit_trim_trees(void) | |||
| 532 | list_add(&cursor, &tree_list); | 532 | list_add(&cursor, &tree_list); |
| 533 | while (cursor.next != &tree_list) { | 533 | while (cursor.next != &tree_list) { |
| 534 | struct audit_tree *tree; | 534 | struct audit_tree *tree; |
| 535 | struct nameidata nd; | 535 | struct path path; |
| 536 | struct vfsmount *root_mnt; | 536 | struct vfsmount *root_mnt; |
| 537 | struct node *node; | 537 | struct node *node; |
| 538 | struct list_head list; | 538 | struct list_head list; |
| @@ -544,12 +544,12 @@ void audit_trim_trees(void) | |||
| 544 | list_add(&cursor, &tree->list); | 544 | list_add(&cursor, &tree->list); |
| 545 | mutex_unlock(&audit_filter_mutex); | 545 | mutex_unlock(&audit_filter_mutex); |
| 546 | 546 | ||
| 547 | err = path_lookup(tree->pathname, 0, &nd); | 547 | err = kern_path(tree->pathname, 0, &path); |
| 548 | if (err) | 548 | if (err) |
| 549 | goto skip_it; | 549 | goto skip_it; |
| 550 | 550 | ||
| 551 | root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry); | 551 | root_mnt = collect_mounts(path.mnt, path.dentry); |
| 552 | path_put(&nd.path); | 552 | path_put(&path); |
| 553 | if (!root_mnt) | 553 | if (!root_mnt) |
| 554 | goto skip_it; | 554 | goto skip_it; |
| 555 | 555 | ||
| @@ -580,19 +580,19 @@ skip_it: | |||
| 580 | } | 580 | } |
| 581 | 581 | ||
| 582 | static int is_under(struct vfsmount *mnt, struct dentry *dentry, | 582 | static int is_under(struct vfsmount *mnt, struct dentry *dentry, |
| 583 | struct nameidata *nd) | 583 | struct path *path) |
| 584 | { | 584 | { |
| 585 | if (mnt != nd->path.mnt) { | 585 | if (mnt != path->mnt) { |
| 586 | for (;;) { | 586 | for (;;) { |
| 587 | if (mnt->mnt_parent == mnt) | 587 | if (mnt->mnt_parent == mnt) |
| 588 | return 0; | 588 | return 0; |
| 589 | if (mnt->mnt_parent == nd->path.mnt) | 589 | if (mnt->mnt_parent == path->mnt) |
| 590 | break; | 590 | break; |
| 591 | mnt = mnt->mnt_parent; | 591 | mnt = mnt->mnt_parent; |
| 592 | } | 592 | } |
| 593 | dentry = mnt->mnt_mountpoint; | 593 | dentry = mnt->mnt_mountpoint; |
| 594 | } | 594 | } |
| 595 | return is_subdir(dentry, nd->path.dentry); | 595 | return is_subdir(dentry, path->dentry); |
| 596 | } | 596 | } |
| 597 | 597 | ||
| 598 | int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) | 598 | int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) |
| @@ -618,7 +618,7 @@ void audit_put_tree(struct audit_tree *tree) | |||
| 618 | int audit_add_tree_rule(struct audit_krule *rule) | 618 | int audit_add_tree_rule(struct audit_krule *rule) |
| 619 | { | 619 | { |
| 620 | struct audit_tree *seed = rule->tree, *tree; | 620 | struct audit_tree *seed = rule->tree, *tree; |
| 621 | struct nameidata nd; | 621 | struct path path; |
| 622 | struct vfsmount *mnt, *p; | 622 | struct vfsmount *mnt, *p; |
| 623 | struct list_head list; | 623 | struct list_head list; |
| 624 | int err; | 624 | int err; |
| @@ -637,11 +637,11 @@ int audit_add_tree_rule(struct audit_krule *rule) | |||
| 637 | /* do not set rule->tree yet */ | 637 | /* do not set rule->tree yet */ |
| 638 | mutex_unlock(&audit_filter_mutex); | 638 | mutex_unlock(&audit_filter_mutex); |
| 639 | 639 | ||
| 640 | err = path_lookup(tree->pathname, 0, &nd); | 640 | err = kern_path(tree->pathname, 0, &path); |
| 641 | if (err) | 641 | if (err) |
| 642 | goto Err; | 642 | goto Err; |
| 643 | mnt = collect_mounts(nd.path.mnt, nd.path.dentry); | 643 | mnt = collect_mounts(path.mnt, path.dentry); |
| 644 | path_put(&nd.path); | 644 | path_put(&path); |
| 645 | if (!mnt) { | 645 | if (!mnt) { |
| 646 | err = -ENOMEM; | 646 | err = -ENOMEM; |
| 647 | goto Err; | 647 | goto Err; |
| @@ -690,29 +690,29 @@ int audit_tag_tree(char *old, char *new) | |||
| 690 | { | 690 | { |
| 691 | struct list_head cursor, barrier; | 691 | struct list_head cursor, barrier; |
| 692 | int failed = 0; | 692 | int failed = 0; |
| 693 | struct nameidata nd; | 693 | struct path path; |
| 694 | struct vfsmount *tagged; | 694 | struct vfsmount *tagged; |
| 695 | struct list_head list; | 695 | struct list_head list; |
| 696 | struct vfsmount *mnt; | 696 | struct vfsmount *mnt; |
| 697 | struct dentry *dentry; | 697 | struct dentry *dentry; |
| 698 | int err; | 698 | int err; |
| 699 | 699 | ||
| 700 | err = path_lookup(new, 0, &nd); | 700 | err = kern_path(new, 0, &path); |
| 701 | if (err) | 701 | if (err) |
| 702 | return err; | 702 | return err; |
| 703 | tagged = collect_mounts(nd.path.mnt, nd.path.dentry); | 703 | tagged = collect_mounts(path.mnt, path.dentry); |
| 704 | path_put(&nd.path); | 704 | path_put(&path); |
| 705 | if (!tagged) | 705 | if (!tagged) |
| 706 | return -ENOMEM; | 706 | return -ENOMEM; |
| 707 | 707 | ||
| 708 | err = path_lookup(old, 0, &nd); | 708 | err = kern_path(old, 0, &path); |
| 709 | if (err) { | 709 | if (err) { |
| 710 | drop_collected_mounts(tagged); | 710 | drop_collected_mounts(tagged); |
| 711 | return err; | 711 | return err; |
| 712 | } | 712 | } |
| 713 | mnt = mntget(nd.path.mnt); | 713 | mnt = mntget(path.mnt); |
| 714 | dentry = dget(nd.path.dentry); | 714 | dentry = dget(path.dentry); |
| 715 | path_put(&nd.path); | 715 | path_put(&path); |
| 716 | 716 | ||
| 717 | if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) | 717 | if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) |
| 718 | follow_up(&mnt, &dentry); | 718 | follow_up(&mnt, &dentry); |
| @@ -733,7 +733,7 @@ int audit_tag_tree(char *old, char *new) | |||
| 733 | list_add(&cursor, &tree->list); | 733 | list_add(&cursor, &tree->list); |
| 734 | mutex_unlock(&audit_filter_mutex); | 734 | mutex_unlock(&audit_filter_mutex); |
| 735 | 735 | ||
| 736 | err = path_lookup(tree->pathname, 0, &nd); | 736 | err = kern_path(tree->pathname, 0, &path); |
| 737 | if (err) { | 737 | if (err) { |
| 738 | put_tree(tree); | 738 | put_tree(tree); |
| 739 | mutex_lock(&audit_filter_mutex); | 739 | mutex_lock(&audit_filter_mutex); |
| @@ -741,15 +741,15 @@ int audit_tag_tree(char *old, char *new) | |||
| 741 | } | 741 | } |
| 742 | 742 | ||
| 743 | spin_lock(&vfsmount_lock); | 743 | spin_lock(&vfsmount_lock); |
| 744 | if (!is_under(mnt, dentry, &nd)) { | 744 | if (!is_under(mnt, dentry, &path)) { |
| 745 | spin_unlock(&vfsmount_lock); | 745 | spin_unlock(&vfsmount_lock); |
| 746 | path_put(&nd.path); | 746 | path_put(&path); |
| 747 | put_tree(tree); | 747 | put_tree(tree); |
| 748 | mutex_lock(&audit_filter_mutex); | 748 | mutex_lock(&audit_filter_mutex); |
| 749 | continue; | 749 | continue; |
| 750 | } | 750 | } |
| 751 | spin_unlock(&vfsmount_lock); | 751 | spin_unlock(&vfsmount_lock); |
| 752 | path_put(&nd.path); | 752 | path_put(&path); |
| 753 | 753 | ||
| 754 | list_for_each_entry(p, &list, mnt_list) { | 754 | list_for_each_entry(p, &list, mnt_list) { |
| 755 | failed = tag_chunk(p->mnt_root->d_inode, tree); | 755 | failed = tag_chunk(p->mnt_root->d_inode, tree); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 046c1609606b..35eebd5510c2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -2104,7 +2104,7 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | |||
| 2104 | down_read(&cgrp->pids_mutex); | 2104 | down_read(&cgrp->pids_mutex); |
| 2105 | if (pid) { | 2105 | if (pid) { |
| 2106 | int end = cgrp->pids_length; | 2106 | int end = cgrp->pids_length; |
| 2107 | int i; | 2107 | |
| 2108 | while (index < end) { | 2108 | while (index < end) { |
| 2109 | int mid = (index + end) / 2; | 2109 | int mid = (index + end) / 2; |
| 2110 | if (cgrp->tasks_pids[mid] == pid) { | 2110 | if (cgrp->tasks_pids[mid] == pid) { |
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0d407e886735..0511716e9424 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c | |||
| @@ -12,7 +12,9 @@ | |||
| 12 | #include <linux/kmod.h> | 12 | #include <linux/kmod.h> |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
| 15 | #include <linux/proc_fs.h> | ||
| 15 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
| 17 | #include <linux/seq_file.h> | ||
| 16 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
| 17 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
| 18 | #include <linux/types.h> | 20 | #include <linux/types.h> |
| @@ -173,20 +175,39 @@ __set_personality(u_long personality) | |||
| 173 | return 0; | 175 | return 0; |
| 174 | } | 176 | } |
| 175 | 177 | ||
| 176 | int | 178 | #ifdef CONFIG_PROC_FS |
| 177 | get_exec_domain_list(char *page) | 179 | static int execdomains_proc_show(struct seq_file *m, void *v) |
| 178 | { | 180 | { |
| 179 | struct exec_domain *ep; | 181 | struct exec_domain *ep; |
| 180 | int len = 0; | ||
| 181 | 182 | ||
| 182 | read_lock(&exec_domains_lock); | 183 | read_lock(&exec_domains_lock); |
| 183 | for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next) | 184 | for (ep = exec_domains; ep; ep = ep->next) |
| 184 | len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n", | 185 | seq_printf(m, "%d-%d\t%-16s\t[%s]\n", |
| 185 | ep->pers_low, ep->pers_high, ep->name, | 186 | ep->pers_low, ep->pers_high, ep->name, |
| 186 | module_name(ep->module)); | 187 | module_name(ep->module)); |
| 187 | read_unlock(&exec_domains_lock); | 188 | read_unlock(&exec_domains_lock); |
| 188 | return (len); | 189 | return 0; |
| 190 | } | ||
| 191 | |||
| 192 | static int execdomains_proc_open(struct inode *inode, struct file *file) | ||
| 193 | { | ||
| 194 | return single_open(file, execdomains_proc_show, NULL); | ||
| 195 | } | ||
| 196 | |||
| 197 | static const struct file_operations execdomains_proc_fops = { | ||
| 198 | .open = execdomains_proc_open, | ||
| 199 | .read = seq_read, | ||
| 200 | .llseek = seq_lseek, | ||
| 201 | .release = single_release, | ||
| 202 | }; | ||
| 203 | |||
| 204 | static int __init proc_execdomains_init(void) | ||
| 205 | { | ||
| 206 | proc_create("execdomains", 0, NULL, &execdomains_proc_fops); | ||
| 207 | return 0; | ||
| 189 | } | 208 | } |
| 209 | module_init(proc_execdomains_init); | ||
| 210 | #endif | ||
| 190 | 211 | ||
| 191 | asmlinkage long | 212 | asmlinkage long |
| 192 | sys_personality(u_long personality) | 213 | sys_personality(u_long personality) |
diff --git a/kernel/fork.c b/kernel/fork.c index 4d093552dd6e..f6083561dfe0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -1018,6 +1018,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1018 | p->prev_utime = cputime_zero; | 1018 | p->prev_utime = cputime_zero; |
| 1019 | p->prev_stime = cputime_zero; | 1019 | p->prev_stime = cputime_zero; |
| 1020 | 1020 | ||
| 1021 | p->default_timer_slack_ns = current->timer_slack_ns; | ||
| 1022 | |||
| 1021 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 1023 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
| 1022 | p->last_switch_count = 0; | 1024 | p->last_switch_count = 0; |
| 1023 | p->last_switch_timestamp = 0; | 1025 | p->last_switch_timestamp = 0; |
diff --git a/kernel/futex.c b/kernel/futex.c index 7d1136e97c14..8af10027514b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -1296,13 +1296,16 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1296 | if (!abs_time) | 1296 | if (!abs_time) |
| 1297 | schedule(); | 1297 | schedule(); |
| 1298 | else { | 1298 | else { |
| 1299 | unsigned long slack; | ||
| 1300 | slack = current->timer_slack_ns; | ||
| 1301 | if (rt_task(current)) | ||
| 1302 | slack = 0; | ||
| 1299 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, | 1303 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, |
| 1300 | HRTIMER_MODE_ABS); | 1304 | HRTIMER_MODE_ABS); |
| 1301 | hrtimer_init_sleeper(&t, current); | 1305 | hrtimer_init_sleeper(&t, current); |
| 1302 | t.timer.expires = *abs_time; | 1306 | hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); |
| 1303 | 1307 | ||
| 1304 | hrtimer_start(&t.timer, t.timer.expires, | 1308 | hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); |
| 1305 | HRTIMER_MODE_ABS); | ||
| 1306 | if (!hrtimer_active(&t.timer)) | 1309 | if (!hrtimer_active(&t.timer)) |
| 1307 | t.task = NULL; | 1310 | t.task = NULL; |
| 1308 | 1311 | ||
| @@ -1404,7 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1404 | hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, | 1407 | hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, |
| 1405 | HRTIMER_MODE_ABS); | 1408 | HRTIMER_MODE_ABS); |
| 1406 | hrtimer_init_sleeper(to, current); | 1409 | hrtimer_init_sleeper(to, current); |
| 1407 | to->timer.expires = *time; | 1410 | hrtimer_set_expires(&to->timer, *time); |
| 1408 | } | 1411 | } |
| 1409 | 1412 | ||
| 1410 | q.pi_state = NULL; | 1413 | q.pi_state = NULL; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 95978f48e039..2b465dfde426 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -517,7 +517,7 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) | |||
| 517 | if (!base->first) | 517 | if (!base->first) |
| 518 | continue; | 518 | continue; |
| 519 | timer = rb_entry(base->first, struct hrtimer, node); | 519 | timer = rb_entry(base->first, struct hrtimer, node); |
| 520 | expires = ktime_sub(timer->expires, base->offset); | 520 | expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
| 521 | if (expires.tv64 < cpu_base->expires_next.tv64) | 521 | if (expires.tv64 < cpu_base->expires_next.tv64) |
| 522 | cpu_base->expires_next = expires; | 522 | cpu_base->expires_next = expires; |
| 523 | } | 523 | } |
| @@ -539,10 +539,10 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
| 539 | struct hrtimer_clock_base *base) | 539 | struct hrtimer_clock_base *base) |
| 540 | { | 540 | { |
| 541 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | 541 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; |
| 542 | ktime_t expires = ktime_sub(timer->expires, base->offset); | 542 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
| 543 | int res; | 543 | int res; |
| 544 | 544 | ||
| 545 | WARN_ON_ONCE(timer->expires.tv64 < 0); | 545 | WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); |
| 546 | 546 | ||
| 547 | /* | 547 | /* |
| 548 | * When the callback is running, we do not reprogram the clock event | 548 | * When the callback is running, we do not reprogram the clock event |
| @@ -795,7 +795,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
| 795 | u64 orun = 1; | 795 | u64 orun = 1; |
| 796 | ktime_t delta; | 796 | ktime_t delta; |
| 797 | 797 | ||
| 798 | delta = ktime_sub(now, timer->expires); | 798 | delta = ktime_sub(now, hrtimer_get_expires(timer)); |
| 799 | 799 | ||
| 800 | if (delta.tv64 < 0) | 800 | if (delta.tv64 < 0) |
| 801 | return 0; | 801 | return 0; |
| @@ -807,8 +807,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
| 807 | s64 incr = ktime_to_ns(interval); | 807 | s64 incr = ktime_to_ns(interval); |
| 808 | 808 | ||
| 809 | orun = ktime_divns(delta, incr); | 809 | orun = ktime_divns(delta, incr); |
| 810 | timer->expires = ktime_add_ns(timer->expires, incr * orun); | 810 | hrtimer_add_expires_ns(timer, incr * orun); |
| 811 | if (timer->expires.tv64 > now.tv64) | 811 | if (hrtimer_get_expires_tv64(timer) > now.tv64) |
| 812 | return orun; | 812 | return orun; |
| 813 | /* | 813 | /* |
| 814 | * This (and the ktime_add() below) is the | 814 | * This (and the ktime_add() below) is the |
| @@ -816,7 +816,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
| 816 | */ | 816 | */ |
| 817 | orun++; | 817 | orun++; |
| 818 | } | 818 | } |
| 819 | timer->expires = ktime_add_safe(timer->expires, interval); | 819 | hrtimer_add_expires(timer, interval); |
| 820 | 820 | ||
| 821 | return orun; | 821 | return orun; |
| 822 | } | 822 | } |
| @@ -848,7 +848,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, | |||
| 848 | * We dont care about collisions. Nodes with | 848 | * We dont care about collisions. Nodes with |
| 849 | * the same expiry time stay together. | 849 | * the same expiry time stay together. |
| 850 | */ | 850 | */ |
| 851 | if (timer->expires.tv64 < entry->expires.tv64) { | 851 | if (hrtimer_get_expires_tv64(timer) < |
| 852 | hrtimer_get_expires_tv64(entry)) { | ||
| 852 | link = &(*link)->rb_left; | 853 | link = &(*link)->rb_left; |
| 853 | } else { | 854 | } else { |
| 854 | link = &(*link)->rb_right; | 855 | link = &(*link)->rb_right; |
| @@ -945,9 +946,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
| 945 | } | 946 | } |
| 946 | 947 | ||
| 947 | /** | 948 | /** |
| 948 | * hrtimer_start - (re)start an relative timer on the current CPU | 949 | * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU |
| 949 | * @timer: the timer to be added | 950 | * @timer: the timer to be added |
| 950 | * @tim: expiry time | 951 | * @tim: expiry time |
| 952 | * @delta_ns: "slack" range for the timer | ||
| 951 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | 953 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) |
| 952 | * | 954 | * |
| 953 | * Returns: | 955 | * Returns: |
| @@ -955,7 +957,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
| 955 | * 1 when the timer was active | 957 | * 1 when the timer was active |
| 956 | */ | 958 | */ |
| 957 | int | 959 | int |
| 958 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | 960 | hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, |
| 961 | const enum hrtimer_mode mode) | ||
| 959 | { | 962 | { |
| 960 | struct hrtimer_clock_base *base, *new_base; | 963 | struct hrtimer_clock_base *base, *new_base; |
| 961 | unsigned long flags; | 964 | unsigned long flags; |
| @@ -983,7 +986,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
| 983 | #endif | 986 | #endif |
| 984 | } | 987 | } |
| 985 | 988 | ||
| 986 | timer->expires = tim; | 989 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); |
| 987 | 990 | ||
| 988 | timer_stats_hrtimer_set_start_info(timer); | 991 | timer_stats_hrtimer_set_start_info(timer); |
| 989 | 992 | ||
| @@ -1016,8 +1019,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
| 1016 | 1019 | ||
| 1017 | return ret; | 1020 | return ret; |
| 1018 | } | 1021 | } |
| 1022 | EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); | ||
| 1023 | |||
| 1024 | /** | ||
| 1025 | * hrtimer_start - (re)start an hrtimer on the current CPU | ||
| 1026 | * @timer: the timer to be added | ||
| 1027 | * @tim: expiry time | ||
| 1028 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | ||
| 1029 | * | ||
| 1030 | * Returns: | ||
| 1031 | * 0 on success | ||
| 1032 | * 1 when the timer was active | ||
| 1033 | */ | ||
| 1034 | int | ||
| 1035 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | ||
| 1036 | { | ||
| 1037 | return hrtimer_start_range_ns(timer, tim, 0, mode); | ||
| 1038 | } | ||
| 1019 | EXPORT_SYMBOL_GPL(hrtimer_start); | 1039 | EXPORT_SYMBOL_GPL(hrtimer_start); |
| 1020 | 1040 | ||
| 1041 | |||
| 1021 | /** | 1042 | /** |
| 1022 | * hrtimer_try_to_cancel - try to deactivate a timer | 1043 | * hrtimer_try_to_cancel - try to deactivate a timer |
| 1023 | * @timer: hrtimer to stop | 1044 | * @timer: hrtimer to stop |
| @@ -1077,7 +1098,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | |||
| 1077 | ktime_t rem; | 1098 | ktime_t rem; |
| 1078 | 1099 | ||
| 1079 | base = lock_hrtimer_base(timer, &flags); | 1100 | base = lock_hrtimer_base(timer, &flags); |
| 1080 | rem = ktime_sub(timer->expires, base->get_time()); | 1101 | rem = hrtimer_expires_remaining(timer); |
| 1081 | unlock_hrtimer_base(timer, &flags); | 1102 | unlock_hrtimer_base(timer, &flags); |
| 1082 | 1103 | ||
| 1083 | return rem; | 1104 | return rem; |
| @@ -1109,7 +1130,7 @@ ktime_t hrtimer_get_next_event(void) | |||
| 1109 | continue; | 1130 | continue; |
| 1110 | 1131 | ||
| 1111 | timer = rb_entry(base->first, struct hrtimer, node); | 1132 | timer = rb_entry(base->first, struct hrtimer, node); |
| 1112 | delta.tv64 = timer->expires.tv64; | 1133 | delta.tv64 = hrtimer_get_expires_tv64(timer); |
| 1113 | delta = ktime_sub(delta, base->get_time()); | 1134 | delta = ktime_sub(delta, base->get_time()); |
| 1114 | if (delta.tv64 < mindelta.tv64) | 1135 | if (delta.tv64 < mindelta.tv64) |
| 1115 | mindelta.tv64 = delta.tv64; | 1136 | mindelta.tv64 = delta.tv64; |
| @@ -1310,10 +1331,23 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
| 1310 | 1331 | ||
| 1311 | timer = rb_entry(node, struct hrtimer, node); | 1332 | timer = rb_entry(node, struct hrtimer, node); |
| 1312 | 1333 | ||
| 1313 | if (basenow.tv64 < timer->expires.tv64) { | 1334 | /* |
| 1335 | * The immediate goal for using the softexpires is | ||
| 1336 | * minimizing wakeups, not running timers at the | ||
| 1337 | * earliest interrupt after their soft expiration. | ||
| 1338 | * This allows us to avoid using a Priority Search | ||
| 1339 | * Tree, which can answer a stabbing querry for | ||
| 1340 | * overlapping intervals and instead use the simple | ||
| 1341 | * BST we already have. | ||
| 1342 | * We don't add extra wakeups by delaying timers that | ||
| 1343 | * are right-of a not yet expired timer, because that | ||
| 1344 | * timer will have to trigger a wakeup anyway. | ||
| 1345 | */ | ||
| 1346 | |||
| 1347 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { | ||
| 1314 | ktime_t expires; | 1348 | ktime_t expires; |
| 1315 | 1349 | ||
| 1316 | expires = ktime_sub(timer->expires, | 1350 | expires = ktime_sub(hrtimer_get_expires(timer), |
| 1317 | base->offset); | 1351 | base->offset); |
| 1318 | if (expires.tv64 < expires_next.tv64) | 1352 | if (expires.tv64 < expires_next.tv64) |
| 1319 | expires_next = expires; | 1353 | expires_next = expires; |
| @@ -1349,6 +1383,30 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
| 1349 | raise_softirq(HRTIMER_SOFTIRQ); | 1383 | raise_softirq(HRTIMER_SOFTIRQ); |
| 1350 | } | 1384 | } |
| 1351 | 1385 | ||
| 1386 | /** | ||
| 1387 | * hrtimer_peek_ahead_timers -- run soft-expired timers now | ||
| 1388 | * | ||
| 1389 | * hrtimer_peek_ahead_timers will peek at the timer queue of | ||
| 1390 | * the current cpu and check if there are any timers for which | ||
| 1391 | * the soft expires time has passed. If any such timers exist, | ||
| 1392 | * they are run immediately and then removed from the timer queue. | ||
| 1393 | * | ||
| 1394 | */ | ||
| 1395 | void hrtimer_peek_ahead_timers(void) | ||
| 1396 | { | ||
| 1397 | struct tick_device *td; | ||
| 1398 | unsigned long flags; | ||
| 1399 | |||
| 1400 | if (!hrtimer_hres_active()) | ||
| 1401 | return; | ||
| 1402 | |||
| 1403 | local_irq_save(flags); | ||
| 1404 | td = &__get_cpu_var(tick_cpu_device); | ||
| 1405 | if (td && td->evtdev) | ||
| 1406 | hrtimer_interrupt(td->evtdev); | ||
| 1407 | local_irq_restore(flags); | ||
| 1408 | } | ||
| 1409 | |||
| 1352 | static void run_hrtimer_softirq(struct softirq_action *h) | 1410 | static void run_hrtimer_softirq(struct softirq_action *h) |
| 1353 | { | 1411 | { |
| 1354 | run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); | 1412 | run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); |
| @@ -1414,7 +1472,8 @@ void hrtimer_run_queues(void) | |||
| 1414 | struct hrtimer *timer; | 1472 | struct hrtimer *timer; |
| 1415 | 1473 | ||
| 1416 | timer = rb_entry(node, struct hrtimer, node); | 1474 | timer = rb_entry(node, struct hrtimer, node); |
| 1417 | if (base->softirq_time.tv64 <= timer->expires.tv64) | 1475 | if (base->softirq_time.tv64 <= |
| 1476 | hrtimer_get_expires_tv64(timer)) | ||
| 1418 | break; | 1477 | break; |
| 1419 | 1478 | ||
| 1420 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { | 1479 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { |
| @@ -1462,7 +1521,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
| 1462 | 1521 | ||
| 1463 | do { | 1522 | do { |
| 1464 | set_current_state(TASK_INTERRUPTIBLE); | 1523 | set_current_state(TASK_INTERRUPTIBLE); |
| 1465 | hrtimer_start(&t->timer, t->timer.expires, mode); | 1524 | hrtimer_start_expires(&t->timer, mode); |
| 1466 | if (!hrtimer_active(&t->timer)) | 1525 | if (!hrtimer_active(&t->timer)) |
| 1467 | t->task = NULL; | 1526 | t->task = NULL; |
| 1468 | 1527 | ||
| @@ -1484,7 +1543,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) | |||
| 1484 | struct timespec rmt; | 1543 | struct timespec rmt; |
| 1485 | ktime_t rem; | 1544 | ktime_t rem; |
| 1486 | 1545 | ||
| 1487 | rem = ktime_sub(timer->expires, timer->base->get_time()); | 1546 | rem = hrtimer_expires_remaining(timer); |
| 1488 | if (rem.tv64 <= 0) | 1547 | if (rem.tv64 <= 0) |
| 1489 | return 0; | 1548 | return 0; |
| 1490 | rmt = ktime_to_timespec(rem); | 1549 | rmt = ktime_to_timespec(rem); |
| @@ -1503,7 +1562,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
| 1503 | 1562 | ||
| 1504 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, | 1563 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, |
| 1505 | HRTIMER_MODE_ABS); | 1564 | HRTIMER_MODE_ABS); |
| 1506 | t.timer.expires.tv64 = restart->nanosleep.expires; | 1565 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); |
| 1507 | 1566 | ||
| 1508 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | 1567 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
| 1509 | goto out; | 1568 | goto out; |
| @@ -1528,9 +1587,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
| 1528 | struct restart_block *restart; | 1587 | struct restart_block *restart; |
| 1529 | struct hrtimer_sleeper t; | 1588 | struct hrtimer_sleeper t; |
| 1530 | int ret = 0; | 1589 | int ret = 0; |
| 1590 | unsigned long slack; | ||
| 1591 | |||
| 1592 | slack = current->timer_slack_ns; | ||
| 1593 | if (rt_task(current)) | ||
| 1594 | slack = 0; | ||
| 1531 | 1595 | ||
| 1532 | hrtimer_init_on_stack(&t.timer, clockid, mode); | 1596 | hrtimer_init_on_stack(&t.timer, clockid, mode); |
| 1533 | t.timer.expires = timespec_to_ktime(*rqtp); | 1597 | hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); |
| 1534 | if (do_nanosleep(&t, mode)) | 1598 | if (do_nanosleep(&t, mode)) |
| 1535 | goto out; | 1599 | goto out; |
| 1536 | 1600 | ||
| @@ -1550,7 +1614,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
| 1550 | restart->fn = hrtimer_nanosleep_restart; | 1614 | restart->fn = hrtimer_nanosleep_restart; |
| 1551 | restart->nanosleep.index = t.timer.base->index; | 1615 | restart->nanosleep.index = t.timer.base->index; |
| 1552 | restart->nanosleep.rmtp = rmtp; | 1616 | restart->nanosleep.rmtp = rmtp; |
| 1553 | restart->nanosleep.expires = t.timer.expires.tv64; | 1617 | restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); |
| 1554 | 1618 | ||
| 1555 | ret = -ERESTART_RESTARTBLOCK; | 1619 | ret = -ERESTART_RESTARTBLOCK; |
| 1556 | out: | 1620 | out: |
| @@ -1752,3 +1816,103 @@ void __init hrtimers_init(void) | |||
| 1752 | #endif | 1816 | #endif |
| 1753 | } | 1817 | } |
| 1754 | 1818 | ||
| 1819 | /** | ||
| 1820 | * schedule_hrtimeout_range - sleep until timeout | ||
| 1821 | * @expires: timeout value (ktime_t) | ||
| 1822 | * @delta: slack in expires timeout (ktime_t) | ||
| 1823 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
| 1824 | * | ||
| 1825 | * Make the current task sleep until the given expiry time has | ||
| 1826 | * elapsed. The routine will return immediately unless | ||
| 1827 | * the current task state has been set (see set_current_state()). | ||
| 1828 | * | ||
| 1829 | * The @delta argument gives the kernel the freedom to schedule the | ||
| 1830 | * actual wakeup to a time that is both power and performance friendly. | ||
| 1831 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
| 1832 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
| 1833 | * | ||
| 1834 | * You can set the task state as follows - | ||
| 1835 | * | ||
| 1836 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
| 1837 | * pass before the routine returns. | ||
| 1838 | * | ||
| 1839 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
| 1840 | * delivered to the current task. | ||
| 1841 | * | ||
| 1842 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
| 1843 | * routine returns. | ||
| 1844 | * | ||
| 1845 | * Returns 0 when the timer has expired otherwise -EINTR | ||
| 1846 | */ | ||
| 1847 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | ||
| 1848 | const enum hrtimer_mode mode) | ||
| 1849 | { | ||
| 1850 | struct hrtimer_sleeper t; | ||
| 1851 | |||
| 1852 | /* | ||
| 1853 | * Optimize when a zero timeout value is given. It does not | ||
| 1854 | * matter whether this is an absolute or a relative time. | ||
| 1855 | */ | ||
| 1856 | if (expires && !expires->tv64) { | ||
| 1857 | __set_current_state(TASK_RUNNING); | ||
| 1858 | return 0; | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | /* | ||
| 1862 | * A NULL parameter means "inifinte" | ||
| 1863 | */ | ||
| 1864 | if (!expires) { | ||
| 1865 | schedule(); | ||
| 1866 | __set_current_state(TASK_RUNNING); | ||
| 1867 | return -EINTR; | ||
| 1868 | } | ||
| 1869 | |||
| 1870 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); | ||
| 1871 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | ||
| 1872 | |||
| 1873 | hrtimer_init_sleeper(&t, current); | ||
| 1874 | |||
| 1875 | hrtimer_start_expires(&t.timer, mode); | ||
| 1876 | if (!hrtimer_active(&t.timer)) | ||
| 1877 | t.task = NULL; | ||
| 1878 | |||
| 1879 | if (likely(t.task)) | ||
| 1880 | schedule(); | ||
| 1881 | |||
| 1882 | hrtimer_cancel(&t.timer); | ||
| 1883 | destroy_hrtimer_on_stack(&t.timer); | ||
| 1884 | |||
| 1885 | __set_current_state(TASK_RUNNING); | ||
| 1886 | |||
| 1887 | return !t.task ? 0 : -EINTR; | ||
| 1888 | } | ||
| 1889 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); | ||
| 1890 | |||
| 1891 | /** | ||
| 1892 | * schedule_hrtimeout - sleep until timeout | ||
| 1893 | * @expires: timeout value (ktime_t) | ||
| 1894 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
| 1895 | * | ||
| 1896 | * Make the current task sleep until the given expiry time has | ||
| 1897 | * elapsed. The routine will return immediately unless | ||
| 1898 | * the current task state has been set (see set_current_state()). | ||
| 1899 | * | ||
| 1900 | * You can set the task state as follows - | ||
| 1901 | * | ||
| 1902 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
| 1903 | * pass before the routine returns. | ||
| 1904 | * | ||
| 1905 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
| 1906 | * delivered to the current task. | ||
| 1907 | * | ||
| 1908 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
| 1909 | * routine returns. | ||
| 1910 | * | ||
| 1911 | * Returns 0 when the timer has expired otherwise -EINTR | ||
| 1912 | */ | ||
| 1913 | int __sched schedule_hrtimeout(ktime_t *expires, | ||
| 1914 | const enum hrtimer_mode mode) | ||
| 1915 | { | ||
| 1916 | return schedule_hrtimeout_range(expires, 0, mode); | ||
| 1917 | } | ||
| 1918 | EXPORT_SYMBOL_GPL(schedule_hrtimeout); | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 4895fde4eb93..10b5092e9bfe 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -76,6 +76,7 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
| 76 | desc->chip_data = NULL; | 76 | desc->chip_data = NULL; |
| 77 | desc->handle_irq = handle_bad_irq; | 77 | desc->handle_irq = handle_bad_irq; |
| 78 | desc->chip = &no_irq_chip; | 78 | desc->chip = &no_irq_chip; |
| 79 | desc->name = NULL; | ||
| 79 | spin_unlock_irqrestore(&desc->lock, flags); | 80 | spin_unlock_irqrestore(&desc->lock, flags); |
| 80 | } | 81 | } |
| 81 | 82 | ||
| @@ -127,7 +128,7 @@ int set_irq_type(unsigned int irq, unsigned int type) | |||
| 127 | return 0; | 128 | return 0; |
| 128 | 129 | ||
| 129 | spin_lock_irqsave(&desc->lock, flags); | 130 | spin_lock_irqsave(&desc->lock, flags); |
| 130 | ret = __irq_set_trigger(desc, irq, flags); | 131 | ret = __irq_set_trigger(desc, irq, type); |
| 131 | spin_unlock_irqrestore(&desc->lock, flags); | 132 | spin_unlock_irqrestore(&desc->lock, flags); |
| 132 | return ret; | 133 | return ret; |
| 133 | } | 134 | } |
diff --git a/kernel/module.c b/kernel/module.c index 0d8d21ee792c..1f4cc00e0c20 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -20,11 +20,13 @@ | |||
| 20 | #include <linux/moduleloader.h> | 20 | #include <linux/moduleloader.h> |
| 21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
| 22 | #include <linux/kallsyms.h> | 22 | #include <linux/kallsyms.h> |
| 23 | #include <linux/fs.h> | ||
| 23 | #include <linux/sysfs.h> | 24 | #include <linux/sysfs.h> |
| 24 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
| 25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
| 26 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
| 27 | #include <linux/elf.h> | 28 | #include <linux/elf.h> |
| 29 | #include <linux/proc_fs.h> | ||
| 28 | #include <linux/seq_file.h> | 30 | #include <linux/seq_file.h> |
| 29 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
| 30 | #include <linux/fcntl.h> | 32 | #include <linux/fcntl.h> |
| @@ -42,6 +44,7 @@ | |||
| 42 | #include <linux/string.h> | 44 | #include <linux/string.h> |
| 43 | #include <linux/mutex.h> | 45 | #include <linux/mutex.h> |
| 44 | #include <linux/unwind.h> | 46 | #include <linux/unwind.h> |
| 47 | #include <linux/rculist.h> | ||
| 45 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
| 46 | #include <asm/cacheflush.h> | 49 | #include <asm/cacheflush.h> |
| 47 | #include <linux/license.h> | 50 | #include <linux/license.h> |
| @@ -63,7 +66,7 @@ | |||
| 63 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 66 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
| 64 | 67 | ||
| 65 | /* List of modules, protected by module_mutex or preempt_disable | 68 | /* List of modules, protected by module_mutex or preempt_disable |
| 66 | * (add/delete uses stop_machine). */ | 69 | * (delete uses stop_machine/add uses RCU list operations). */ |
| 67 | static DEFINE_MUTEX(module_mutex); | 70 | static DEFINE_MUTEX(module_mutex); |
| 68 | static LIST_HEAD(modules); | 71 | static LIST_HEAD(modules); |
| 69 | 72 | ||
| @@ -132,6 +135,29 @@ static unsigned int find_sec(Elf_Ehdr *hdr, | |||
| 132 | return 0; | 135 | return 0; |
| 133 | } | 136 | } |
| 134 | 137 | ||
| 138 | /* Find a module section, or NULL. */ | ||
| 139 | static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs, | ||
| 140 | const char *secstrings, const char *name) | ||
| 141 | { | ||
| 142 | /* Section 0 has sh_addr 0. */ | ||
| 143 | return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr; | ||
| 144 | } | ||
| 145 | |||
| 146 | /* Find a module section, or NULL. Fill in number of "objects" in section. */ | ||
| 147 | static void *section_objs(Elf_Ehdr *hdr, | ||
| 148 | Elf_Shdr *sechdrs, | ||
| 149 | const char *secstrings, | ||
| 150 | const char *name, | ||
| 151 | size_t object_size, | ||
| 152 | unsigned int *num) | ||
| 153 | { | ||
| 154 | unsigned int sec = find_sec(hdr, sechdrs, secstrings, name); | ||
| 155 | |||
| 156 | /* Section 0 has sh_addr 0 and sh_size 0. */ | ||
| 157 | *num = sechdrs[sec].sh_size / object_size; | ||
| 158 | return (void *)sechdrs[sec].sh_addr; | ||
| 159 | } | ||
| 160 | |||
| 135 | /* Provided by the linker */ | 161 | /* Provided by the linker */ |
| 136 | extern const struct kernel_symbol __start___ksymtab[]; | 162 | extern const struct kernel_symbol __start___ksymtab[]; |
| 137 | extern const struct kernel_symbol __stop___ksymtab[]; | 163 | extern const struct kernel_symbol __stop___ksymtab[]; |
| @@ -218,7 +244,7 @@ static bool each_symbol(bool (*fn)(const struct symsearch *arr, | |||
| 218 | if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) | 244 | if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) |
| 219 | return true; | 245 | return true; |
| 220 | 246 | ||
| 221 | list_for_each_entry(mod, &modules, list) { | 247 | list_for_each_entry_rcu(mod, &modules, list) { |
| 222 | struct symsearch arr[] = { | 248 | struct symsearch arr[] = { |
| 223 | { mod->syms, mod->syms + mod->num_syms, mod->crcs, | 249 | { mod->syms, mod->syms + mod->num_syms, mod->crcs, |
| 224 | NOT_GPL_ONLY, false }, | 250 | NOT_GPL_ONLY, false }, |
| @@ -1394,17 +1420,6 @@ static void mod_kobject_remove(struct module *mod) | |||
| 1394 | } | 1420 | } |
| 1395 | 1421 | ||
| 1396 | /* | 1422 | /* |
| 1397 | * link the module with the whole machine is stopped with interrupts off | ||
| 1398 | * - this defends against kallsyms not taking locks | ||
| 1399 | */ | ||
| 1400 | static int __link_module(void *_mod) | ||
| 1401 | { | ||
| 1402 | struct module *mod = _mod; | ||
| 1403 | list_add(&mod->list, &modules); | ||
| 1404 | return 0; | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | /* | ||
| 1408 | * unlink the module with the whole machine is stopped with interrupts off | 1423 | * unlink the module with the whole machine is stopped with interrupts off |
| 1409 | * - this defends against kallsyms not taking locks | 1424 | * - this defends against kallsyms not taking locks |
| 1410 | */ | 1425 | */ |
| @@ -1789,32 +1804,20 @@ static inline void add_kallsyms(struct module *mod, | |||
| 1789 | } | 1804 | } |
| 1790 | #endif /* CONFIG_KALLSYMS */ | 1805 | #endif /* CONFIG_KALLSYMS */ |
| 1791 | 1806 | ||
| 1792 | #ifdef CONFIG_DYNAMIC_PRINTK_DEBUG | 1807 | static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num) |
| 1793 | static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex) | ||
| 1794 | { | 1808 | { |
| 1795 | struct mod_debug *debug_info; | 1809 | #ifdef CONFIG_DYNAMIC_PRINTK_DEBUG |
| 1796 | unsigned long pos, end; | 1810 | unsigned int i; |
| 1797 | unsigned int num_verbose; | ||
| 1798 | |||
| 1799 | pos = sechdrs[verboseindex].sh_addr; | ||
| 1800 | num_verbose = sechdrs[verboseindex].sh_size / | ||
| 1801 | sizeof(struct mod_debug); | ||
| 1802 | end = pos + (num_verbose * sizeof(struct mod_debug)); | ||
| 1803 | 1811 | ||
| 1804 | for (; pos < end; pos += sizeof(struct mod_debug)) { | 1812 | for (i = 0; i < num; i++) { |
| 1805 | debug_info = (struct mod_debug *)pos; | 1813 | register_dynamic_debug_module(debug[i].modname, |
| 1806 | register_dynamic_debug_module(debug_info->modname, | 1814 | debug[i].type, |
| 1807 | debug_info->type, debug_info->logical_modname, | 1815 | debug[i].logical_modname, |
| 1808 | debug_info->flag_names, debug_info->hash, | 1816 | debug[i].flag_names, |
| 1809 | debug_info->hash2); | 1817 | debug[i].hash, debug[i].hash2); |
| 1810 | } | 1818 | } |
| 1811 | } | ||
| 1812 | #else | ||
| 1813 | static inline void dynamic_printk_setup(Elf_Shdr *sechdrs, | ||
| 1814 | unsigned int verboseindex) | ||
| 1815 | { | ||
| 1816 | } | ||
| 1817 | #endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ | 1819 | #endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ |
| 1820 | } | ||
| 1818 | 1821 | ||
| 1819 | static void *module_alloc_update_bounds(unsigned long size) | 1822 | static void *module_alloc_update_bounds(unsigned long size) |
| 1820 | { | 1823 | { |
| @@ -1843,37 +1846,14 @@ static noinline struct module *load_module(void __user *umod, | |||
| 1843 | unsigned int i; | 1846 | unsigned int i; |
| 1844 | unsigned int symindex = 0; | 1847 | unsigned int symindex = 0; |
| 1845 | unsigned int strindex = 0; | 1848 | unsigned int strindex = 0; |
| 1846 | unsigned int setupindex; | 1849 | unsigned int modindex, versindex, infoindex, pcpuindex; |
| 1847 | unsigned int exindex; | ||
| 1848 | unsigned int exportindex; | ||
| 1849 | unsigned int modindex; | ||
| 1850 | unsigned int obsparmindex; | ||
| 1851 | unsigned int infoindex; | ||
| 1852 | unsigned int gplindex; | ||
| 1853 | unsigned int crcindex; | ||
| 1854 | unsigned int gplcrcindex; | ||
| 1855 | unsigned int versindex; | ||
| 1856 | unsigned int pcpuindex; | ||
| 1857 | unsigned int gplfutureindex; | ||
| 1858 | unsigned int gplfuturecrcindex; | ||
| 1859 | unsigned int unwindex = 0; | 1850 | unsigned int unwindex = 0; |
| 1860 | #ifdef CONFIG_UNUSED_SYMBOLS | 1851 | unsigned int num_kp, num_mcount; |
| 1861 | unsigned int unusedindex; | 1852 | struct kernel_param *kp; |
| 1862 | unsigned int unusedcrcindex; | ||
| 1863 | unsigned int unusedgplindex; | ||
| 1864 | unsigned int unusedgplcrcindex; | ||
| 1865 | #endif | ||
| 1866 | unsigned int markersindex; | ||
| 1867 | unsigned int markersstringsindex; | ||
| 1868 | unsigned int verboseindex; | ||
| 1869 | unsigned int tracepointsindex; | ||
| 1870 | unsigned int tracepointsstringsindex; | ||
| 1871 | unsigned int mcountindex; | ||
| 1872 | struct module *mod; | 1853 | struct module *mod; |
| 1873 | long err = 0; | 1854 | long err = 0; |
| 1874 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1855 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
| 1875 | void *mseg; | 1856 | unsigned long *mseg; |
| 1876 | struct exception_table_entry *extable; | ||
| 1877 | mm_segment_t old_fs; | 1857 | mm_segment_t old_fs; |
| 1878 | 1858 | ||
| 1879 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", | 1859 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", |
| @@ -1937,6 +1917,7 @@ static noinline struct module *load_module(void __user *umod, | |||
| 1937 | err = -ENOEXEC; | 1917 | err = -ENOEXEC; |
| 1938 | goto free_hdr; | 1918 | goto free_hdr; |
| 1939 | } | 1919 | } |
| 1920 | /* This is temporary: point mod into copy of data. */ | ||
| 1940 | mod = (void *)sechdrs[modindex].sh_addr; | 1921 | mod = (void *)sechdrs[modindex].sh_addr; |
| 1941 | 1922 | ||
| 1942 | if (symindex == 0) { | 1923 | if (symindex == 0) { |
| @@ -1946,22 +1927,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 1946 | goto free_hdr; | 1927 | goto free_hdr; |
| 1947 | } | 1928 | } |
| 1948 | 1929 | ||
| 1949 | /* Optional sections */ | ||
| 1950 | exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); | ||
| 1951 | gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); | ||
| 1952 | gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); | ||
| 1953 | crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); | ||
| 1954 | gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); | ||
| 1955 | gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); | ||
| 1956 | #ifdef CONFIG_UNUSED_SYMBOLS | ||
| 1957 | unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused"); | ||
| 1958 | unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl"); | ||
| 1959 | unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); | ||
| 1960 | unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); | ||
| 1961 | #endif | ||
| 1962 | setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); | ||
| 1963 | exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); | ||
| 1964 | obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); | ||
| 1965 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1930 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
| 1966 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1931 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
| 1967 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1932 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
| @@ -2117,42 +2082,57 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2117 | if (err < 0) | 2082 | if (err < 0) |
| 2118 | goto cleanup; | 2083 | goto cleanup; |
| 2119 | 2084 | ||
| 2120 | /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ | 2085 | /* Now we've got everything in the final locations, we can |
| 2121 | mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); | 2086 | * find optional sections. */ |
| 2122 | mod->syms = (void *)sechdrs[exportindex].sh_addr; | 2087 | kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*kp), |
| 2123 | if (crcindex) | 2088 | &num_kp); |
| 2124 | mod->crcs = (void *)sechdrs[crcindex].sh_addr; | 2089 | mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab", |
| 2125 | mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); | 2090 | sizeof(*mod->syms), &mod->num_syms); |
| 2126 | mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; | 2091 | mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab"); |
| 2127 | if (gplcrcindex) | 2092 | mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl", |
| 2128 | mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; | 2093 | sizeof(*mod->gpl_syms), |
| 2129 | mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / | 2094 | &mod->num_gpl_syms); |
| 2130 | sizeof(*mod->gpl_future_syms); | 2095 | mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl"); |
| 2131 | mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; | 2096 | mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings, |
| 2132 | if (gplfuturecrcindex) | 2097 | "__ksymtab_gpl_future", |
| 2133 | mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; | 2098 | sizeof(*mod->gpl_future_syms), |
| 2099 | &mod->num_gpl_future_syms); | ||
| 2100 | mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings, | ||
| 2101 | "__kcrctab_gpl_future"); | ||
| 2134 | 2102 | ||
| 2135 | #ifdef CONFIG_UNUSED_SYMBOLS | 2103 | #ifdef CONFIG_UNUSED_SYMBOLS |
| 2136 | mod->num_unused_syms = sechdrs[unusedindex].sh_size / | 2104 | mod->unused_syms = section_objs(hdr, sechdrs, secstrings, |
| 2137 | sizeof(*mod->unused_syms); | 2105 | "__ksymtab_unused", |
| 2138 | mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / | 2106 | sizeof(*mod->unused_syms), |
| 2139 | sizeof(*mod->unused_gpl_syms); | 2107 | &mod->num_unused_syms); |
| 2140 | mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; | 2108 | mod->unused_crcs = section_addr(hdr, sechdrs, secstrings, |
| 2141 | if (unusedcrcindex) | 2109 | "__kcrctab_unused"); |
| 2142 | mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; | 2110 | mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings, |
| 2143 | mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; | 2111 | "__ksymtab_unused_gpl", |
| 2144 | if (unusedgplcrcindex) | 2112 | sizeof(*mod->unused_gpl_syms), |
| 2145 | mod->unused_gpl_crcs | 2113 | &mod->num_unused_gpl_syms); |
| 2146 | = (void *)sechdrs[unusedgplcrcindex].sh_addr; | 2114 | mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, |
| 2115 | "__kcrctab_unused_gpl"); | ||
| 2116 | #endif | ||
| 2117 | |||
| 2118 | #ifdef CONFIG_MARKERS | ||
| 2119 | mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", | ||
| 2120 | sizeof(*mod->markers), &mod->num_markers); | ||
| 2121 | #endif | ||
| 2122 | #ifdef CONFIG_TRACEPOINTS | ||
| 2123 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, | ||
| 2124 | "__tracepoints", | ||
| 2125 | sizeof(*mod->tracepoints), | ||
| 2126 | &mod->num_tracepoints); | ||
| 2147 | #endif | 2127 | #endif |
| 2148 | 2128 | ||
| 2149 | #ifdef CONFIG_MODVERSIONS | 2129 | #ifdef CONFIG_MODVERSIONS |
| 2150 | if ((mod->num_syms && !crcindex) | 2130 | if ((mod->num_syms && !mod->crcs) |
| 2151 | || (mod->num_gpl_syms && !gplcrcindex) | 2131 | || (mod->num_gpl_syms && !mod->gpl_crcs) |
| 2152 | || (mod->num_gpl_future_syms && !gplfuturecrcindex) | 2132 | || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) |
| 2153 | #ifdef CONFIG_UNUSED_SYMBOLS | 2133 | #ifdef CONFIG_UNUSED_SYMBOLS |
| 2154 | || (mod->num_unused_syms && !unusedcrcindex) | 2134 | || (mod->num_unused_syms && !mod->unused_crcs) |
| 2155 | || (mod->num_unused_gpl_syms && !unusedgplcrcindex) | 2135 | || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) |
| 2156 | #endif | 2136 | #endif |
| 2157 | ) { | 2137 | ) { |
| 2158 | printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); | 2138 | printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); |
| @@ -2161,16 +2141,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2161 | goto cleanup; | 2141 | goto cleanup; |
| 2162 | } | 2142 | } |
| 2163 | #endif | 2143 | #endif |
| 2164 | markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); | ||
| 2165 | markersstringsindex = find_sec(hdr, sechdrs, secstrings, | ||
| 2166 | "__markers_strings"); | ||
| 2167 | verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose"); | ||
| 2168 | tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints"); | ||
| 2169 | tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings, | ||
| 2170 | "__tracepoints_strings"); | ||
| 2171 | |||
| 2172 | mcountindex = find_sec(hdr, sechdrs, secstrings, | ||
| 2173 | "__mcount_loc"); | ||
| 2174 | 2144 | ||
| 2175 | /* Now do relocations. */ | 2145 | /* Now do relocations. */ |
| 2176 | for (i = 1; i < hdr->e_shnum; i++) { | 2146 | for (i = 1; i < hdr->e_shnum; i++) { |
| @@ -2193,28 +2163,16 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2193 | if (err < 0) | 2163 | if (err < 0) |
| 2194 | goto cleanup; | 2164 | goto cleanup; |
| 2195 | } | 2165 | } |
| 2196 | #ifdef CONFIG_MARKERS | ||
| 2197 | mod->markers = (void *)sechdrs[markersindex].sh_addr; | ||
| 2198 | mod->num_markers = | ||
| 2199 | sechdrs[markersindex].sh_size / sizeof(*mod->markers); | ||
| 2200 | #endif | ||
| 2201 | #ifdef CONFIG_TRACEPOINTS | ||
| 2202 | mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr; | ||
| 2203 | mod->num_tracepoints = | ||
| 2204 | sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints); | ||
| 2205 | #endif | ||
| 2206 | |||
| 2207 | 2166 | ||
| 2208 | /* Find duplicate symbols */ | 2167 | /* Find duplicate symbols */ |
| 2209 | err = verify_export_symbols(mod); | 2168 | err = verify_export_symbols(mod); |
| 2210 | |||
| 2211 | if (err < 0) | 2169 | if (err < 0) |
| 2212 | goto cleanup; | 2170 | goto cleanup; |
| 2213 | 2171 | ||
| 2214 | /* Set up and sort exception table */ | 2172 | /* Set up and sort exception table */ |
| 2215 | mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); | 2173 | mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", |
| 2216 | mod->extable = extable = (void *)sechdrs[exindex].sh_addr; | 2174 | sizeof(*mod->extable), &mod->num_exentries); |
| 2217 | sort_extable(extable, extable + mod->num_exentries); | 2175 | sort_extable(mod->extable, mod->extable + mod->num_exentries); |
| 2218 | 2176 | ||
| 2219 | /* Finally, copy percpu area over. */ | 2177 | /* Finally, copy percpu area over. */ |
| 2220 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, | 2178 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, |
| @@ -2223,11 +2181,17 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2223 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); | 2181 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); |
| 2224 | 2182 | ||
| 2225 | if (!mod->taints) { | 2183 | if (!mod->taints) { |
| 2184 | struct mod_debug *debug; | ||
| 2185 | unsigned int num_debug; | ||
| 2186 | |||
| 2226 | #ifdef CONFIG_MARKERS | 2187 | #ifdef CONFIG_MARKERS |
| 2227 | marker_update_probe_range(mod->markers, | 2188 | marker_update_probe_range(mod->markers, |
| 2228 | mod->markers + mod->num_markers); | 2189 | mod->markers + mod->num_markers); |
| 2229 | #endif | 2190 | #endif |
| 2230 | dynamic_printk_setup(sechdrs, verboseindex); | 2191 | debug = section_objs(hdr, sechdrs, secstrings, "__verbose", |
| 2192 | sizeof(*debug), &num_debug); | ||
| 2193 | dynamic_printk_setup(debug, num_debug); | ||
| 2194 | |||
| 2231 | #ifdef CONFIG_TRACEPOINTS | 2195 | #ifdef CONFIG_TRACEPOINTS |
| 2232 | tracepoint_update_probe_range(mod->tracepoints, | 2196 | tracepoint_update_probe_range(mod->tracepoints, |
| 2233 | mod->tracepoints + mod->num_tracepoints); | 2197 | mod->tracepoints + mod->num_tracepoints); |
| @@ -2235,8 +2199,9 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2235 | } | 2199 | } |
| 2236 | 2200 | ||
| 2237 | /* sechdrs[0].sh_size is always zero */ | 2201 | /* sechdrs[0].sh_size is always zero */ |
| 2238 | mseg = (void *)sechdrs[mcountindex].sh_addr; | 2202 | mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", |
| 2239 | ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size); | 2203 | sizeof(*mseg), &num_mcount); |
| 2204 | ftrace_init_module(mseg, mseg + num_mcount); | ||
| 2240 | 2205 | ||
| 2241 | err = module_finalize(hdr, sechdrs, mod); | 2206 | err = module_finalize(hdr, sechdrs, mod); |
| 2242 | if (err < 0) | 2207 | if (err < 0) |
| @@ -2261,30 +2226,24 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2261 | set_fs(old_fs); | 2226 | set_fs(old_fs); |
| 2262 | 2227 | ||
| 2263 | mod->args = args; | 2228 | mod->args = args; |
| 2264 | if (obsparmindex) | 2229 | if (section_addr(hdr, sechdrs, secstrings, "__obsparm")) |
| 2265 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", | 2230 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", |
| 2266 | mod->name); | 2231 | mod->name); |
| 2267 | 2232 | ||
| 2268 | /* Now sew it into the lists so we can get lockdep and oops | 2233 | /* Now sew it into the lists so we can get lockdep and oops |
| 2269 | * info during argument parsing. Noone should access us, since | 2234 | * info during argument parsing. Noone should access us, since |
| 2270 | * strong_try_module_get() will fail. */ | 2235 | * strong_try_module_get() will fail. |
| 2271 | stop_machine(__link_module, mod, NULL); | 2236 | * lockdep/oops can run asynchronous, so use the RCU list insertion |
| 2272 | 2237 | * function to insert in a way safe to concurrent readers. | |
| 2273 | /* Size of section 0 is 0, so this works well if no params */ | 2238 | * The mutex protects against concurrent writers. |
| 2274 | err = parse_args(mod->name, mod->args, | 2239 | */ |
| 2275 | (struct kernel_param *) | 2240 | list_add_rcu(&mod->list, &modules); |
| 2276 | sechdrs[setupindex].sh_addr, | 2241 | |
| 2277 | sechdrs[setupindex].sh_size | 2242 | err = parse_args(mod->name, mod->args, kp, num_kp, NULL); |
| 2278 | / sizeof(struct kernel_param), | ||
| 2279 | NULL); | ||
| 2280 | if (err < 0) | 2243 | if (err < 0) |
| 2281 | goto unlink; | 2244 | goto unlink; |
| 2282 | 2245 | ||
| 2283 | err = mod_sysfs_setup(mod, | 2246 | err = mod_sysfs_setup(mod, kp, num_kp); |
| 2284 | (struct kernel_param *) | ||
| 2285 | sechdrs[setupindex].sh_addr, | ||
| 2286 | sechdrs[setupindex].sh_size | ||
| 2287 | / sizeof(struct kernel_param)); | ||
| 2288 | if (err < 0) | 2247 | if (err < 0) |
| 2289 | goto unlink; | 2248 | goto unlink; |
| 2290 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 2249 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
| @@ -2473,7 +2432,7 @@ const char *module_address_lookup(unsigned long addr, | |||
| 2473 | const char *ret = NULL; | 2432 | const char *ret = NULL; |
| 2474 | 2433 | ||
| 2475 | preempt_disable(); | 2434 | preempt_disable(); |
| 2476 | list_for_each_entry(mod, &modules, list) { | 2435 | list_for_each_entry_rcu(mod, &modules, list) { |
| 2477 | if (within(addr, mod->module_init, mod->init_size) | 2436 | if (within(addr, mod->module_init, mod->init_size) |
| 2478 | || within(addr, mod->module_core, mod->core_size)) { | 2437 | || within(addr, mod->module_core, mod->core_size)) { |
| 2479 | if (modname) | 2438 | if (modname) |
| @@ -2496,7 +2455,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) | |||
| 2496 | struct module *mod; | 2455 | struct module *mod; |
| 2497 | 2456 | ||
| 2498 | preempt_disable(); | 2457 | preempt_disable(); |
| 2499 | list_for_each_entry(mod, &modules, list) { | 2458 | list_for_each_entry_rcu(mod, &modules, list) { |
| 2500 | if (within(addr, mod->module_init, mod->init_size) || | 2459 | if (within(addr, mod->module_init, mod->init_size) || |
| 2501 | within(addr, mod->module_core, mod->core_size)) { | 2460 | within(addr, mod->module_core, mod->core_size)) { |
| 2502 | const char *sym; | 2461 | const char *sym; |
| @@ -2520,7 +2479,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | |||
| 2520 | struct module *mod; | 2479 | struct module *mod; |
| 2521 | 2480 | ||
| 2522 | preempt_disable(); | 2481 | preempt_disable(); |
| 2523 | list_for_each_entry(mod, &modules, list) { | 2482 | list_for_each_entry_rcu(mod, &modules, list) { |
| 2524 | if (within(addr, mod->module_init, mod->init_size) || | 2483 | if (within(addr, mod->module_init, mod->init_size) || |
| 2525 | within(addr, mod->module_core, mod->core_size)) { | 2484 | within(addr, mod->module_core, mod->core_size)) { |
| 2526 | const char *sym; | 2485 | const char *sym; |
| @@ -2547,7 +2506,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | |||
| 2547 | struct module *mod; | 2506 | struct module *mod; |
| 2548 | 2507 | ||
| 2549 | preempt_disable(); | 2508 | preempt_disable(); |
| 2550 | list_for_each_entry(mod, &modules, list) { | 2509 | list_for_each_entry_rcu(mod, &modules, list) { |
| 2551 | if (symnum < mod->num_symtab) { | 2510 | if (symnum < mod->num_symtab) { |
| 2552 | *value = mod->symtab[symnum].st_value; | 2511 | *value = mod->symtab[symnum].st_value; |
| 2553 | *type = mod->symtab[symnum].st_info; | 2512 | *type = mod->symtab[symnum].st_info; |
| @@ -2590,7 +2549,7 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
| 2590 | ret = mod_find_symname(mod, colon+1); | 2549 | ret = mod_find_symname(mod, colon+1); |
| 2591 | *colon = ':'; | 2550 | *colon = ':'; |
| 2592 | } else { | 2551 | } else { |
| 2593 | list_for_each_entry(mod, &modules, list) | 2552 | list_for_each_entry_rcu(mod, &modules, list) |
| 2594 | if ((ret = mod_find_symname(mod, name)) != 0) | 2553 | if ((ret = mod_find_symname(mod, name)) != 0) |
| 2595 | break; | 2554 | break; |
| 2596 | } | 2555 | } |
| @@ -2599,23 +2558,6 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
| 2599 | } | 2558 | } |
| 2600 | #endif /* CONFIG_KALLSYMS */ | 2559 | #endif /* CONFIG_KALLSYMS */ |
| 2601 | 2560 | ||
| 2602 | /* Called by the /proc file system to return a list of modules. */ | ||
| 2603 | static void *m_start(struct seq_file *m, loff_t *pos) | ||
| 2604 | { | ||
| 2605 | mutex_lock(&module_mutex); | ||
| 2606 | return seq_list_start(&modules, *pos); | ||
| 2607 | } | ||
| 2608 | |||
| 2609 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | ||
| 2610 | { | ||
| 2611 | return seq_list_next(p, &modules, pos); | ||
| 2612 | } | ||
| 2613 | |||
| 2614 | static void m_stop(struct seq_file *m, void *p) | ||
| 2615 | { | ||
| 2616 | mutex_unlock(&module_mutex); | ||
| 2617 | } | ||
| 2618 | |||
| 2619 | static char *module_flags(struct module *mod, char *buf) | 2561 | static char *module_flags(struct module *mod, char *buf) |
| 2620 | { | 2562 | { |
| 2621 | int bx = 0; | 2563 | int bx = 0; |
| @@ -2649,6 +2591,24 @@ static char *module_flags(struct module *mod, char *buf) | |||
| 2649 | return buf; | 2591 | return buf; |
| 2650 | } | 2592 | } |
| 2651 | 2593 | ||
| 2594 | #ifdef CONFIG_PROC_FS | ||
| 2595 | /* Called by the /proc file system to return a list of modules. */ | ||
| 2596 | static void *m_start(struct seq_file *m, loff_t *pos) | ||
| 2597 | { | ||
| 2598 | mutex_lock(&module_mutex); | ||
| 2599 | return seq_list_start(&modules, *pos); | ||
| 2600 | } | ||
| 2601 | |||
| 2602 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | ||
| 2603 | { | ||
| 2604 | return seq_list_next(p, &modules, pos); | ||
| 2605 | } | ||
| 2606 | |||
| 2607 | static void m_stop(struct seq_file *m, void *p) | ||
| 2608 | { | ||
| 2609 | mutex_unlock(&module_mutex); | ||
| 2610 | } | ||
| 2611 | |||
| 2652 | static int m_show(struct seq_file *m, void *p) | 2612 | static int m_show(struct seq_file *m, void *p) |
| 2653 | { | 2613 | { |
| 2654 | struct module *mod = list_entry(p, struct module, list); | 2614 | struct module *mod = list_entry(p, struct module, list); |
| @@ -2679,13 +2639,33 @@ static int m_show(struct seq_file *m, void *p) | |||
| 2679 | Where refcount is a number or -, and deps is a comma-separated list | 2639 | Where refcount is a number or -, and deps is a comma-separated list |
| 2680 | of depends or -. | 2640 | of depends or -. |
| 2681 | */ | 2641 | */ |
| 2682 | const struct seq_operations modules_op = { | 2642 | static const struct seq_operations modules_op = { |
| 2683 | .start = m_start, | 2643 | .start = m_start, |
| 2684 | .next = m_next, | 2644 | .next = m_next, |
| 2685 | .stop = m_stop, | 2645 | .stop = m_stop, |
| 2686 | .show = m_show | 2646 | .show = m_show |
| 2687 | }; | 2647 | }; |
| 2688 | 2648 | ||
| 2649 | static int modules_open(struct inode *inode, struct file *file) | ||
| 2650 | { | ||
| 2651 | return seq_open(file, &modules_op); | ||
| 2652 | } | ||
| 2653 | |||
| 2654 | static const struct file_operations proc_modules_operations = { | ||
| 2655 | .open = modules_open, | ||
| 2656 | .read = seq_read, | ||
| 2657 | .llseek = seq_lseek, | ||
| 2658 | .release = seq_release, | ||
| 2659 | }; | ||
| 2660 | |||
| 2661 | static int __init proc_modules_init(void) | ||
| 2662 | { | ||
| 2663 | proc_create("modules", 0, NULL, &proc_modules_operations); | ||
| 2664 | return 0; | ||
| 2665 | } | ||
| 2666 | module_init(proc_modules_init); | ||
| 2667 | #endif | ||
| 2668 | |||
| 2689 | /* Given an address, look for it in the module exception tables. */ | 2669 | /* Given an address, look for it in the module exception tables. */ |
| 2690 | const struct exception_table_entry *search_module_extables(unsigned long addr) | 2670 | const struct exception_table_entry *search_module_extables(unsigned long addr) |
| 2691 | { | 2671 | { |
| @@ -2693,7 +2673,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
| 2693 | struct module *mod; | 2673 | struct module *mod; |
| 2694 | 2674 | ||
| 2695 | preempt_disable(); | 2675 | preempt_disable(); |
| 2696 | list_for_each_entry(mod, &modules, list) { | 2676 | list_for_each_entry_rcu(mod, &modules, list) { |
| 2697 | if (mod->num_exentries == 0) | 2677 | if (mod->num_exentries == 0) |
| 2698 | continue; | 2678 | continue; |
| 2699 | 2679 | ||
| @@ -2719,7 +2699,7 @@ int is_module_address(unsigned long addr) | |||
| 2719 | 2699 | ||
| 2720 | preempt_disable(); | 2700 | preempt_disable(); |
| 2721 | 2701 | ||
| 2722 | list_for_each_entry(mod, &modules, list) { | 2702 | list_for_each_entry_rcu(mod, &modules, list) { |
| 2723 | if (within(addr, mod->module_core, mod->core_size)) { | 2703 | if (within(addr, mod->module_core, mod->core_size)) { |
| 2724 | preempt_enable(); | 2704 | preempt_enable(); |
| 2725 | return 1; | 2705 | return 1; |
| @@ -2740,7 +2720,7 @@ struct module *__module_text_address(unsigned long addr) | |||
| 2740 | if (addr < module_addr_min || addr > module_addr_max) | 2720 | if (addr < module_addr_min || addr > module_addr_max) |
| 2741 | return NULL; | 2721 | return NULL; |
| 2742 | 2722 | ||
| 2743 | list_for_each_entry(mod, &modules, list) | 2723 | list_for_each_entry_rcu(mod, &modules, list) |
| 2744 | if (within(addr, mod->module_init, mod->init_text_size) | 2724 | if (within(addr, mod->module_init, mod->init_text_size) |
| 2745 | || within(addr, mod->module_core, mod->core_text_size)) | 2725 | || within(addr, mod->module_core, mod->core_text_size)) |
| 2746 | return mod; | 2726 | return mod; |
| @@ -2765,8 +2745,11 @@ void print_modules(void) | |||
| 2765 | char buf[8]; | 2745 | char buf[8]; |
| 2766 | 2746 | ||
| 2767 | printk("Modules linked in:"); | 2747 | printk("Modules linked in:"); |
| 2768 | list_for_each_entry(mod, &modules, list) | 2748 | /* Most callers should already have preempt disabled, but make sure */ |
| 2749 | preempt_disable(); | ||
| 2750 | list_for_each_entry_rcu(mod, &modules, list) | ||
| 2769 | printk(" %s%s", mod->name, module_flags(mod, buf)); | 2751 | printk(" %s%s", mod->name, module_flags(mod, buf)); |
| 2752 | preempt_enable(); | ||
| 2770 | if (last_unloaded_module[0]) | 2753 | if (last_unloaded_module[0]) |
| 2771 | printk(" [last unloaded: %s]", last_unloaded_module); | 2754 | printk(" [last unloaded: %s]", last_unloaded_module); |
| 2772 | printk("\n"); | 2755 | printk("\n"); |
diff --git a/kernel/panic.c b/kernel/panic.c index bda561ef3cdf..6513aac8e992 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -34,13 +34,6 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); | |||
| 34 | 34 | ||
| 35 | EXPORT_SYMBOL(panic_notifier_list); | 35 | EXPORT_SYMBOL(panic_notifier_list); |
| 36 | 36 | ||
| 37 | static int __init panic_setup(char *str) | ||
| 38 | { | ||
| 39 | panic_timeout = simple_strtoul(str, NULL, 0); | ||
| 40 | return 1; | ||
| 41 | } | ||
| 42 | __setup("panic=", panic_setup); | ||
| 43 | |||
| 44 | static long no_blink(long time) | 37 | static long no_blink(long time) |
| 45 | { | 38 | { |
| 46 | return 0; | 39 | return 0; |
| @@ -218,13 +211,6 @@ void add_taint(unsigned flag) | |||
| 218 | } | 211 | } |
| 219 | EXPORT_SYMBOL(add_taint); | 212 | EXPORT_SYMBOL(add_taint); |
| 220 | 213 | ||
| 221 | static int __init pause_on_oops_setup(char *str) | ||
| 222 | { | ||
| 223 | pause_on_oops = simple_strtoul(str, NULL, 0); | ||
| 224 | return 1; | ||
| 225 | } | ||
| 226 | __setup("pause_on_oops=", pause_on_oops_setup); | ||
| 227 | |||
| 228 | static void spin_msec(int msecs) | 214 | static void spin_msec(int msecs) |
| 229 | { | 215 | { |
| 230 | int i; | 216 | int i; |
| @@ -384,3 +370,6 @@ void __stack_chk_fail(void) | |||
| 384 | } | 370 | } |
| 385 | EXPORT_SYMBOL(__stack_chk_fail); | 371 | EXPORT_SYMBOL(__stack_chk_fail); |
| 386 | #endif | 372 | #endif |
| 373 | |||
| 374 | core_param(panic, panic_timeout, int, 0644); | ||
| 375 | core_param(pause_on_oops, pause_on_oops, int, 0644); | ||
diff --git a/kernel/params.c b/kernel/params.c index afc46a23eb6d..a1e3025b19a9 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -373,6 +373,8 @@ int param_get_string(char *buffer, struct kernel_param *kp) | |||
| 373 | } | 373 | } |
| 374 | 374 | ||
| 375 | /* sysfs output in /sys/modules/XYZ/parameters/ */ | 375 | /* sysfs output in /sys/modules/XYZ/parameters/ */ |
| 376 | #define to_module_attr(n) container_of(n, struct module_attribute, attr); | ||
| 377 | #define to_module_kobject(n) container_of(n, struct module_kobject, kobj); | ||
| 376 | 378 | ||
| 377 | extern struct kernel_param __start___param[], __stop___param[]; | 379 | extern struct kernel_param __start___param[], __stop___param[]; |
| 378 | 380 | ||
| @@ -384,6 +386,7 @@ struct param_attribute | |||
| 384 | 386 | ||
| 385 | struct module_param_attrs | 387 | struct module_param_attrs |
| 386 | { | 388 | { |
| 389 | unsigned int num; | ||
| 387 | struct attribute_group grp; | 390 | struct attribute_group grp; |
| 388 | struct param_attribute attrs[0]; | 391 | struct param_attribute attrs[0]; |
| 389 | }; | 392 | }; |
| @@ -434,93 +437,120 @@ static ssize_t param_attr_store(struct module_attribute *mattr, | |||
| 434 | 437 | ||
| 435 | #ifdef CONFIG_SYSFS | 438 | #ifdef CONFIG_SYSFS |
| 436 | /* | 439 | /* |
| 437 | * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME | 440 | * add_sysfs_param - add a parameter to sysfs |
| 438 | * @mk: struct module_kobject (contains parent kobject) | 441 | * @mk: struct module_kobject |
| 439 | * @kparam: array of struct kernel_param, the actual parameter definitions | 442 | * @kparam: the actual parameter definition to add to sysfs |
| 440 | * @num_params: number of entries in array | 443 | * @name: name of parameter |
| 441 | * @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules" | ||
| 442 | * | 444 | * |
| 443 | * Create a kobject for a (per-module) group of parameters, and create files | 445 | * Create a kobject if for a (per-module) parameter if mp NULL, and |
| 444 | * in sysfs. A pointer to the param_kobject is returned on success, | 446 | * create file in sysfs. Returns an error on out of memory. Always cleans up |
| 445 | * NULL if there's no parameter to export, or other ERR_PTR(err). | 447 | * if there's an error. |
| 446 | */ | 448 | */ |
| 447 | static __modinit struct module_param_attrs * | 449 | static __modinit int add_sysfs_param(struct module_kobject *mk, |
| 448 | param_sysfs_setup(struct module_kobject *mk, | 450 | struct kernel_param *kp, |
| 449 | struct kernel_param *kparam, | 451 | const char *name) |
| 450 | unsigned int num_params, | ||
| 451 | unsigned int name_skip) | ||
| 452 | { | 452 | { |
| 453 | struct module_param_attrs *mp; | 453 | struct module_param_attrs *new; |
| 454 | unsigned int valid_attrs = 0; | 454 | struct attribute **attrs; |
| 455 | unsigned int i, size[2]; | 455 | int err, num; |
| 456 | struct param_attribute *pattr; | 456 | |
| 457 | struct attribute **gattr; | 457 | /* We don't bother calling this with invisible parameters. */ |
| 458 | int err; | 458 | BUG_ON(!kp->perm); |
| 459 | 459 | ||
| 460 | for (i=0; i<num_params; i++) { | 460 | if (!mk->mp) { |
| 461 | if (kparam[i].perm) | 461 | num = 0; |
| 462 | valid_attrs++; | 462 | attrs = NULL; |
| 463 | } else { | ||
| 464 | num = mk->mp->num; | ||
| 465 | attrs = mk->mp->grp.attrs; | ||
| 463 | } | 466 | } |
| 464 | 467 | ||
| 465 | if (!valid_attrs) | 468 | /* Enlarge. */ |
| 466 | return NULL; | 469 | new = krealloc(mk->mp, |
| 467 | 470 | sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1), | |
| 468 | size[0] = ALIGN(sizeof(*mp) + | 471 | GFP_KERNEL); |
| 469 | valid_attrs * sizeof(mp->attrs[0]), | 472 | if (!new) { |
| 470 | sizeof(mp->grp.attrs[0])); | 473 | kfree(mk->mp); |
| 471 | size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]); | 474 | err = -ENOMEM; |
| 472 | 475 | goto fail; | |
| 473 | mp = kzalloc(size[0] + size[1], GFP_KERNEL); | ||
| 474 | if (!mp) | ||
| 475 | return ERR_PTR(-ENOMEM); | ||
| 476 | |||
| 477 | mp->grp.name = "parameters"; | ||
| 478 | mp->grp.attrs = (void *)mp + size[0]; | ||
| 479 | |||
| 480 | pattr = &mp->attrs[0]; | ||
| 481 | gattr = &mp->grp.attrs[0]; | ||
| 482 | for (i = 0; i < num_params; i++) { | ||
| 483 | struct kernel_param *kp = &kparam[i]; | ||
| 484 | if (kp->perm) { | ||
| 485 | pattr->param = kp; | ||
| 486 | pattr->mattr.show = param_attr_show; | ||
| 487 | pattr->mattr.store = param_attr_store; | ||
| 488 | pattr->mattr.attr.name = (char *)&kp->name[name_skip]; | ||
| 489 | pattr->mattr.attr.mode = kp->perm; | ||
| 490 | *(gattr++) = &(pattr++)->mattr.attr; | ||
| 491 | } | ||
| 492 | } | 476 | } |
| 493 | *gattr = NULL; | 477 | attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL); |
| 494 | 478 | if (!attrs) { | |
| 495 | if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) { | 479 | err = -ENOMEM; |
| 496 | kfree(mp); | 480 | goto fail_free_new; |
| 497 | return ERR_PTR(err); | ||
| 498 | } | 481 | } |
| 499 | return mp; | 482 | |
| 483 | /* Sysfs wants everything zeroed. */ | ||
| 484 | memset(new, 0, sizeof(*new)); | ||
| 485 | memset(&new->attrs[num], 0, sizeof(new->attrs[num])); | ||
| 486 | memset(&attrs[num], 0, sizeof(attrs[num])); | ||
| 487 | new->grp.name = "parameters"; | ||
| 488 | new->grp.attrs = attrs; | ||
| 489 | |||
| 490 | /* Tack new one on the end. */ | ||
| 491 | new->attrs[num].param = kp; | ||
| 492 | new->attrs[num].mattr.show = param_attr_show; | ||
| 493 | new->attrs[num].mattr.store = param_attr_store; | ||
| 494 | new->attrs[num].mattr.attr.name = (char *)name; | ||
| 495 | new->attrs[num].mattr.attr.mode = kp->perm; | ||
| 496 | new->num = num+1; | ||
| 497 | |||
| 498 | /* Fix up all the pointers, since krealloc can move us */ | ||
| 499 | for (num = 0; num < new->num; num++) | ||
| 500 | new->grp.attrs[num] = &new->attrs[num].mattr.attr; | ||
| 501 | new->grp.attrs[num] = NULL; | ||
| 502 | |||
| 503 | mk->mp = new; | ||
| 504 | return 0; | ||
| 505 | |||
| 506 | fail_free_new: | ||
| 507 | kfree(new); | ||
| 508 | fail: | ||
| 509 | mk->mp = NULL; | ||
| 510 | return err; | ||
| 500 | } | 511 | } |
| 501 | 512 | ||
| 502 | #ifdef CONFIG_MODULES | 513 | #ifdef CONFIG_MODULES |
| 514 | static void free_module_param_attrs(struct module_kobject *mk) | ||
| 515 | { | ||
| 516 | kfree(mk->mp->grp.attrs); | ||
| 517 | kfree(mk->mp); | ||
| 518 | mk->mp = NULL; | ||
| 519 | } | ||
| 520 | |||
| 503 | /* | 521 | /* |
| 504 | * module_param_sysfs_setup - setup sysfs support for one module | 522 | * module_param_sysfs_setup - setup sysfs support for one module |
| 505 | * @mod: module | 523 | * @mod: module |
| 506 | * @kparam: module parameters (array) | 524 | * @kparam: module parameters (array) |
| 507 | * @num_params: number of module parameters | 525 | * @num_params: number of module parameters |
| 508 | * | 526 | * |
| 509 | * Adds sysfs entries for module parameters, and creates a link from | 527 | * Adds sysfs entries for module parameters under |
| 510 | * /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/ | 528 | * /sys/module/[mod->name]/parameters/ |
| 511 | */ | 529 | */ |
| 512 | int module_param_sysfs_setup(struct module *mod, | 530 | int module_param_sysfs_setup(struct module *mod, |
| 513 | struct kernel_param *kparam, | 531 | struct kernel_param *kparam, |
| 514 | unsigned int num_params) | 532 | unsigned int num_params) |
| 515 | { | 533 | { |
| 516 | struct module_param_attrs *mp; | 534 | int i, err; |
| 535 | bool params = false; | ||
| 536 | |||
| 537 | for (i = 0; i < num_params; i++) { | ||
| 538 | if (kparam[i].perm == 0) | ||
| 539 | continue; | ||
| 540 | err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name); | ||
| 541 | if (err) | ||
| 542 | return err; | ||
| 543 | params = true; | ||
| 544 | } | ||
| 517 | 545 | ||
| 518 | mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0); | 546 | if (!params) |
| 519 | if (IS_ERR(mp)) | 547 | return 0; |
| 520 | return PTR_ERR(mp); | ||
| 521 | 548 | ||
| 522 | mod->param_attrs = mp; | 549 | /* Create the param group. */ |
| 523 | return 0; | 550 | err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); |
| 551 | if (err) | ||
| 552 | free_module_param_attrs(&mod->mkobj); | ||
| 553 | return err; | ||
| 524 | } | 554 | } |
| 525 | 555 | ||
| 526 | /* | 556 | /* |
| @@ -532,43 +562,55 @@ int module_param_sysfs_setup(struct module *mod, | |||
| 532 | */ | 562 | */ |
| 533 | void module_param_sysfs_remove(struct module *mod) | 563 | void module_param_sysfs_remove(struct module *mod) |
| 534 | { | 564 | { |
| 535 | if (mod->param_attrs) { | 565 | if (mod->mkobj.mp) { |
| 536 | sysfs_remove_group(&mod->mkobj.kobj, | 566 | sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); |
| 537 | &mod->param_attrs->grp); | ||
| 538 | /* We are positive that no one is using any param | 567 | /* We are positive that no one is using any param |
| 539 | * attrs at this point. Deallocate immediately. */ | 568 | * attrs at this point. Deallocate immediately. */ |
| 540 | kfree(mod->param_attrs); | 569 | free_module_param_attrs(&mod->mkobj); |
| 541 | mod->param_attrs = NULL; | ||
| 542 | } | 570 | } |
| 543 | } | 571 | } |
| 544 | #endif | 572 | #endif |
| 545 | 573 | ||
| 546 | /* | 574 | static void __init kernel_add_sysfs_param(const char *name, |
| 547 | * kernel_param_sysfs_setup - wrapper for built-in params support | 575 | struct kernel_param *kparam, |
| 548 | */ | 576 | unsigned int name_skip) |
| 549 | static void __init kernel_param_sysfs_setup(const char *name, | ||
| 550 | struct kernel_param *kparam, | ||
| 551 | unsigned int num_params, | ||
| 552 | unsigned int name_skip) | ||
| 553 | { | 577 | { |
| 554 | struct module_kobject *mk; | 578 | struct module_kobject *mk; |
| 555 | int ret; | 579 | struct kobject *kobj; |
| 580 | int err; | ||
| 556 | 581 | ||
| 557 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); | 582 | kobj = kset_find_obj(module_kset, name); |
| 558 | BUG_ON(!mk); | 583 | if (kobj) { |
| 559 | 584 | /* We already have one. Remove params so we can add more. */ | |
| 560 | mk->mod = THIS_MODULE; | 585 | mk = to_module_kobject(kobj); |
| 561 | mk->kobj.kset = module_kset; | 586 | /* We need to remove it before adding parameters. */ |
| 562 | ret = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); | 587 | sysfs_remove_group(&mk->kobj, &mk->mp->grp); |
| 563 | if (ret) { | 588 | } else { |
| 564 | kobject_put(&mk->kobj); | 589 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); |
| 565 | printk(KERN_ERR "Module '%s' failed to be added to sysfs, " | 590 | BUG_ON(!mk); |
| 566 | "error number %d\n", name, ret); | 591 | |
| 567 | printk(KERN_ERR "The system will be unstable now.\n"); | 592 | mk->mod = THIS_MODULE; |
| 568 | return; | 593 | mk->kobj.kset = module_kset; |
| 594 | err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, | ||
| 595 | "%s", name); | ||
| 596 | if (err) { | ||
| 597 | kobject_put(&mk->kobj); | ||
| 598 | printk(KERN_ERR "Module '%s' failed add to sysfs, " | ||
| 599 | "error number %d\n", name, err); | ||
| 600 | printk(KERN_ERR "The system will be unstable now.\n"); | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | /* So that exit path is even. */ | ||
| 604 | kobject_get(&mk->kobj); | ||
| 569 | } | 605 | } |
| 570 | param_sysfs_setup(mk, kparam, num_params, name_skip); | 606 | |
| 607 | /* These should not fail at boot. */ | ||
| 608 | err = add_sysfs_param(mk, kparam, kparam->name + name_skip); | ||
| 609 | BUG_ON(err); | ||
| 610 | err = sysfs_create_group(&mk->kobj, &mk->mp->grp); | ||
| 611 | BUG_ON(err); | ||
| 571 | kobject_uevent(&mk->kobj, KOBJ_ADD); | 612 | kobject_uevent(&mk->kobj, KOBJ_ADD); |
| 613 | kobject_put(&mk->kobj); | ||
| 572 | } | 614 | } |
| 573 | 615 | ||
| 574 | /* | 616 | /* |
| @@ -579,60 +621,36 @@ static void __init kernel_param_sysfs_setup(const char *name, | |||
| 579 | * The "module" name (KBUILD_MODNAME) is stored before a dot, the | 621 | * The "module" name (KBUILD_MODNAME) is stored before a dot, the |
| 580 | * "parameter" name is stored behind a dot in kernel_param->name. So, | 622 | * "parameter" name is stored behind a dot in kernel_param->name. So, |
| 581 | * extract the "module" name for all built-in kernel_param-eters, | 623 | * extract the "module" name for all built-in kernel_param-eters, |
| 582 | * and for all who have the same, call kernel_param_sysfs_setup. | 624 | * and for all who have the same, call kernel_add_sysfs_param. |
| 583 | */ | 625 | */ |
| 584 | static void __init param_sysfs_builtin(void) | 626 | static void __init param_sysfs_builtin(void) |
| 585 | { | 627 | { |
| 586 | struct kernel_param *kp, *kp_begin = NULL; | 628 | struct kernel_param *kp; |
| 587 | unsigned int i, name_len, count = 0; | 629 | unsigned int name_len; |
| 588 | char modname[MODULE_NAME_LEN + 1] = ""; | 630 | char modname[MODULE_NAME_LEN]; |
| 589 | 631 | ||
| 590 | for (i=0; i < __stop___param - __start___param; i++) { | 632 | for (kp = __start___param; kp < __stop___param; kp++) { |
| 591 | char *dot; | 633 | char *dot; |
| 592 | size_t max_name_len; | ||
| 593 | 634 | ||
| 594 | kp = &__start___param[i]; | 635 | if (kp->perm == 0) |
| 595 | max_name_len = | 636 | continue; |
| 596 | min_t(size_t, MODULE_NAME_LEN, strlen(kp->name)); | ||
| 597 | 637 | ||
| 598 | dot = memchr(kp->name, '.', max_name_len); | 638 | dot = strchr(kp->name, '.'); |
| 599 | if (!dot) { | 639 | if (!dot) { |
| 600 | DEBUGP("couldn't find period in first %d characters " | 640 | /* This happens for core_param() */ |
| 601 | "of %s\n", MODULE_NAME_LEN, kp->name); | 641 | strcpy(modname, "kernel"); |
| 602 | continue; | 642 | name_len = 0; |
| 603 | } | 643 | } else { |
| 604 | name_len = dot - kp->name; | 644 | name_len = dot - kp->name + 1; |
| 605 | 645 | strlcpy(modname, kp->name, name_len); | |
| 606 | /* new kbuild_modname? */ | ||
| 607 | if (strlen(modname) != name_len | ||
| 608 | || strncmp(modname, kp->name, name_len) != 0) { | ||
| 609 | /* add a new kobject for previous kernel_params. */ | ||
| 610 | if (count) | ||
| 611 | kernel_param_sysfs_setup(modname, | ||
| 612 | kp_begin, | ||
| 613 | count, | ||
| 614 | strlen(modname)+1); | ||
| 615 | |||
| 616 | strncpy(modname, kp->name, name_len); | ||
| 617 | modname[name_len] = '\0'; | ||
| 618 | count = 0; | ||
| 619 | kp_begin = kp; | ||
| 620 | } | 646 | } |
| 621 | count++; | 647 | kernel_add_sysfs_param(modname, kp, name_len); |
| 622 | } | 648 | } |
| 623 | |||
| 624 | /* last kernel_params need to be registered as well */ | ||
| 625 | if (count) | ||
| 626 | kernel_param_sysfs_setup(modname, kp_begin, count, | ||
| 627 | strlen(modname)+1); | ||
| 628 | } | 649 | } |
| 629 | 650 | ||
| 630 | 651 | ||
| 631 | /* module-related sysfs stuff */ | 652 | /* module-related sysfs stuff */ |
| 632 | 653 | ||
| 633 | #define to_module_attr(n) container_of(n, struct module_attribute, attr); | ||
| 634 | #define to_module_kobject(n) container_of(n, struct module_kobject, kobj); | ||
| 635 | |||
| 636 | static ssize_t module_attr_show(struct kobject *kobj, | 654 | static ssize_t module_attr_show(struct kobject *kobj, |
| 637 | struct attribute *attr, | 655 | struct attribute *attr, |
| 638 | char *buf) | 656 | char *buf) |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index b931d7cedbfa..5e79c662294b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
| @@ -639,7 +639,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) | |||
| 639 | (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) | 639 | (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) |
| 640 | timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); | 640 | timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); |
| 641 | 641 | ||
| 642 | remaining = ktime_sub(timer->expires, now); | 642 | remaining = ktime_sub(hrtimer_get_expires(timer), now); |
| 643 | /* Return 0 only, when the timer is expired and not pending */ | 643 | /* Return 0 only, when the timer is expired and not pending */ |
| 644 | if (remaining.tv64 <= 0) { | 644 | if (remaining.tv64 <= 0) { |
| 645 | /* | 645 | /* |
| @@ -733,7 +733,7 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
| 733 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); | 733 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); |
| 734 | timr->it.real.timer.function = posix_timer_fn; | 734 | timr->it.real.timer.function = posix_timer_fn; |
| 735 | 735 | ||
| 736 | timer->expires = timespec_to_ktime(new_setting->it_value); | 736 | hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value)); |
| 737 | 737 | ||
| 738 | /* Convert interval */ | 738 | /* Convert interval */ |
| 739 | timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); | 739 | timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); |
| @@ -742,14 +742,12 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
| 742 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { | 742 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { |
| 743 | /* Setup correct expiry time for relative timers */ | 743 | /* Setup correct expiry time for relative timers */ |
| 744 | if (mode == HRTIMER_MODE_REL) { | 744 | if (mode == HRTIMER_MODE_REL) { |
| 745 | timer->expires = | 745 | hrtimer_add_expires(timer, timer->base->get_time()); |
| 746 | ktime_add_safe(timer->expires, | ||
| 747 | timer->base->get_time()); | ||
| 748 | } | 746 | } |
| 749 | return 0; | 747 | return 0; |
| 750 | } | 748 | } |
| 751 | 749 | ||
| 752 | hrtimer_start(timer, timer->expires, mode); | 750 | hrtimer_start_expires(timer, mode); |
| 753 | return 0; | 751 | return 0; |
| 754 | } | 752 | } |
| 755 | 753 | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 331f9836383f..c9d74083746f 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
| @@ -651,7 +651,7 @@ static int software_resume(void) | |||
| 651 | pr_debug("PM: Preparing processes for restore.\n"); | 651 | pr_debug("PM: Preparing processes for restore.\n"); |
| 652 | error = prepare_processes(); | 652 | error = prepare_processes(); |
| 653 | if (error) { | 653 | if (error) { |
| 654 | swsusp_close(); | 654 | swsusp_close(FMODE_READ); |
| 655 | goto Done; | 655 | goto Done; |
| 656 | } | 656 | } |
| 657 | 657 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index acc0c101dbd5..46b5ec7a3afb 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
| @@ -153,7 +153,7 @@ extern int swsusp_shrink_memory(void); | |||
| 153 | extern void swsusp_free(void); | 153 | extern void swsusp_free(void); |
| 154 | extern int swsusp_read(unsigned int *flags_p); | 154 | extern int swsusp_read(unsigned int *flags_p); |
| 155 | extern int swsusp_write(unsigned int flags); | 155 | extern int swsusp_write(unsigned int flags); |
| 156 | extern void swsusp_close(void); | 156 | extern void swsusp_close(fmode_t); |
| 157 | 157 | ||
| 158 | struct timeval; | 158 | struct timeval; |
| 159 | /* kernel/power/swsusp.c */ | 159 | /* kernel/power/swsusp.c */ |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 80ccac849e46..b7713b53d07a 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
| @@ -172,13 +172,13 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
| 172 | return res; | 172 | return res; |
| 173 | 173 | ||
| 174 | root_swap = res; | 174 | root_swap = res; |
| 175 | res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR); | 175 | res = blkdev_get(resume_bdev, FMODE_WRITE); |
| 176 | if (res) | 176 | if (res) |
| 177 | return res; | 177 | return res; |
| 178 | 178 | ||
| 179 | res = set_blocksize(resume_bdev, PAGE_SIZE); | 179 | res = set_blocksize(resume_bdev, PAGE_SIZE); |
| 180 | if (res < 0) | 180 | if (res < 0) |
| 181 | blkdev_put(resume_bdev); | 181 | blkdev_put(resume_bdev, FMODE_WRITE); |
| 182 | 182 | ||
| 183 | return res; | 183 | return res; |
| 184 | } | 184 | } |
| @@ -426,7 +426,7 @@ int swsusp_write(unsigned int flags) | |||
| 426 | 426 | ||
| 427 | release_swap_writer(&handle); | 427 | release_swap_writer(&handle); |
| 428 | out: | 428 | out: |
| 429 | swsusp_close(); | 429 | swsusp_close(FMODE_WRITE); |
| 430 | return error; | 430 | return error; |
| 431 | } | 431 | } |
| 432 | 432 | ||
| @@ -574,7 +574,7 @@ int swsusp_read(unsigned int *flags_p) | |||
| 574 | error = load_image(&handle, &snapshot, header->pages - 1); | 574 | error = load_image(&handle, &snapshot, header->pages - 1); |
| 575 | release_swap_reader(&handle); | 575 | release_swap_reader(&handle); |
| 576 | 576 | ||
| 577 | blkdev_put(resume_bdev); | 577 | blkdev_put(resume_bdev, FMODE_READ); |
| 578 | 578 | ||
| 579 | if (!error) | 579 | if (!error) |
| 580 | pr_debug("PM: Image successfully loaded\n"); | 580 | pr_debug("PM: Image successfully loaded\n"); |
| @@ -609,7 +609,7 @@ int swsusp_check(void) | |||
| 609 | return -EINVAL; | 609 | return -EINVAL; |
| 610 | } | 610 | } |
| 611 | if (error) | 611 | if (error) |
| 612 | blkdev_put(resume_bdev); | 612 | blkdev_put(resume_bdev, FMODE_READ); |
| 613 | else | 613 | else |
| 614 | pr_debug("PM: Signature found, resuming\n"); | 614 | pr_debug("PM: Signature found, resuming\n"); |
| 615 | } else { | 615 | } else { |
| @@ -626,14 +626,14 @@ int swsusp_check(void) | |||
| 626 | * swsusp_close - close swap device. | 626 | * swsusp_close - close swap device. |
| 627 | */ | 627 | */ |
| 628 | 628 | ||
| 629 | void swsusp_close(void) | 629 | void swsusp_close(fmode_t mode) |
| 630 | { | 630 | { |
| 631 | if (IS_ERR(resume_bdev)) { | 631 | if (IS_ERR(resume_bdev)) { |
| 632 | pr_debug("PM: Image device not initialised\n"); | 632 | pr_debug("PM: Image device not initialised\n"); |
| 633 | return; | 633 | return; |
| 634 | } | 634 | } |
| 635 | 635 | ||
| 636 | blkdev_put(resume_bdev); | 636 | blkdev_put(resume_bdev, mode); /* move up */ |
| 637 | } | 637 | } |
| 638 | 638 | ||
| 639 | static int swsusp_header_init(void) | 639 | static int swsusp_header_init(void) |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 467d5940f624..ad63af8b2521 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -119,18 +119,19 @@ static void _rcu_barrier(enum rcu_barrier type) | |||
| 119 | /* Take cpucontrol mutex to protect against CPU hotplug */ | 119 | /* Take cpucontrol mutex to protect against CPU hotplug */ |
| 120 | mutex_lock(&rcu_barrier_mutex); | 120 | mutex_lock(&rcu_barrier_mutex); |
| 121 | init_completion(&rcu_barrier_completion); | 121 | init_completion(&rcu_barrier_completion); |
| 122 | atomic_set(&rcu_barrier_cpu_count, 0); | ||
| 123 | /* | 122 | /* |
| 124 | * The queueing of callbacks in all CPUs must be atomic with | 123 | * Initialize rcu_barrier_cpu_count to 1, then invoke |
| 125 | * respect to RCU, otherwise one CPU may queue a callback, | 124 | * rcu_barrier_func() on each CPU, so that each CPU also has |
| 126 | * wait for a grace period, decrement barrier count and call | 125 | * incremented rcu_barrier_cpu_count. Only then is it safe to |
| 127 | * complete(), while other CPUs have not yet queued anything. | 126 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU |
| 128 | * So, we need to make sure that grace periods cannot complete | 127 | * might complete its grace period before all of the other CPUs |
| 129 | * until all the callbacks are queued. | 128 | * did their increment, causing this function to return too |
| 129 | * early. | ||
| 130 | */ | 130 | */ |
| 131 | rcu_read_lock(); | 131 | atomic_set(&rcu_barrier_cpu_count, 1); |
| 132 | on_each_cpu(rcu_barrier_func, (void *)type, 1); | 132 | on_each_cpu(rcu_barrier_func, (void *)type, 1); |
| 133 | rcu_read_unlock(); | 133 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
| 134 | complete(&rcu_barrier_completion); | ||
| 134 | wait_for_completion(&rcu_barrier_completion); | 135 | wait_for_completion(&rcu_barrier_completion); |
| 135 | mutex_unlock(&rcu_barrier_mutex); | 136 | mutex_unlock(&rcu_barrier_mutex); |
| 136 | } | 137 | } |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 6522ae5b14a2..69d9cb921ffa 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
| @@ -631,8 +631,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 631 | 631 | ||
| 632 | /* Setup the timer, when timeout != NULL */ | 632 | /* Setup the timer, when timeout != NULL */ |
| 633 | if (unlikely(timeout)) { | 633 | if (unlikely(timeout)) { |
| 634 | hrtimer_start(&timeout->timer, timeout->timer.expires, | 634 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); |
| 635 | HRTIMER_MODE_ABS); | ||
| 636 | if (!hrtimer_active(&timeout->timer)) | 635 | if (!hrtimer_active(&timeout->timer)) |
| 637 | timeout->task = NULL; | 636 | timeout->task = NULL; |
| 638 | } | 637 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index d906f72b42d2..6625c3c4b10d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | #include <linux/cpuset.h> | 55 | #include <linux/cpuset.h> |
| 56 | #include <linux/percpu.h> | 56 | #include <linux/percpu.h> |
| 57 | #include <linux/kthread.h> | 57 | #include <linux/kthread.h> |
| 58 | #include <linux/proc_fs.h> | ||
| 58 | #include <linux/seq_file.h> | 59 | #include <linux/seq_file.h> |
| 59 | #include <linux/sysctl.h> | 60 | #include <linux/sysctl.h> |
| 60 | #include <linux/syscalls.h> | 61 | #include <linux/syscalls.h> |
| @@ -227,9 +228,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
| 227 | 228 | ||
| 228 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | 229 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); |
| 229 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | 230 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); |
| 230 | hrtimer_start(&rt_b->rt_period_timer, | 231 | hrtimer_start_expires(&rt_b->rt_period_timer, |
| 231 | rt_b->rt_period_timer.expires, | 232 | HRTIMER_MODE_ABS); |
| 232 | HRTIMER_MODE_ABS); | ||
| 233 | } | 233 | } |
| 234 | spin_unlock(&rt_b->rt_runtime_lock); | 234 | spin_unlock(&rt_b->rt_runtime_lock); |
| 235 | } | 235 | } |
| @@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
| 819 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 819 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
| 820 | 820 | ||
| 821 | /* | 821 | /* |
| 822 | * Inject some fuzzyness into changing the per-cpu group shares | ||
| 823 | * this avoids remote rq-locks at the expense of fairness. | ||
| 824 | * default: 4 | ||
| 825 | */ | ||
| 826 | unsigned int sysctl_sched_shares_thresh = 4; | ||
| 827 | |||
| 828 | /* | ||
| 822 | * period over which we measure -rt task cpu usage in us. | 829 | * period over which we measure -rt task cpu usage in us. |
| 823 | * default: 1s | 830 | * default: 1s |
| 824 | */ | 831 | */ |
| @@ -1064,7 +1071,7 @@ static void hrtick_start(struct rq *rq, u64 delay) | |||
| 1064 | struct hrtimer *timer = &rq->hrtick_timer; | 1071 | struct hrtimer *timer = &rq->hrtick_timer; |
| 1065 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); | 1072 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); |
| 1066 | 1073 | ||
| 1067 | timer->expires = time; | 1074 | hrtimer_set_expires(timer, time); |
| 1068 | 1075 | ||
| 1069 | if (rq == this_rq()) { | 1076 | if (rq == this_rq()) { |
| 1070 | hrtimer_restart(timer); | 1077 | hrtimer_restart(timer); |
| @@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
| 1454 | * Calculate and set the cpu's group shares. | 1461 | * Calculate and set the cpu's group shares. |
| 1455 | */ | 1462 | */ |
| 1456 | static void | 1463 | static void |
| 1457 | __update_group_shares_cpu(struct task_group *tg, int cpu, | 1464 | update_group_shares_cpu(struct task_group *tg, int cpu, |
| 1458 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1465 | unsigned long sd_shares, unsigned long sd_rq_weight) |
| 1459 | { | 1466 | { |
| 1460 | int boost = 0; | 1467 | int boost = 0; |
| 1461 | unsigned long shares; | 1468 | unsigned long shares; |
| @@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, | |||
| 1486 | * | 1493 | * |
| 1487 | */ | 1494 | */ |
| 1488 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1495 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); |
| 1496 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
| 1489 | 1497 | ||
| 1490 | /* | 1498 | if (abs(shares - tg->se[cpu]->load.weight) > |
| 1491 | * record the actual number of shares, not the boosted amount. | 1499 | sysctl_sched_shares_thresh) { |
| 1492 | */ | 1500 | struct rq *rq = cpu_rq(cpu); |
| 1493 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1501 | unsigned long flags; |
| 1494 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
| 1495 | 1502 | ||
| 1496 | if (shares < MIN_SHARES) | 1503 | spin_lock_irqsave(&rq->lock, flags); |
| 1497 | shares = MIN_SHARES; | 1504 | /* |
| 1498 | else if (shares > MAX_SHARES) | 1505 | * record the actual number of shares, not the boosted amount. |
| 1499 | shares = MAX_SHARES; | 1506 | */ |
| 1507 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
| 1508 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
| 1500 | 1509 | ||
| 1501 | __set_se_shares(tg->se[cpu], shares); | 1510 | __set_se_shares(tg->se[cpu], shares); |
| 1511 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1512 | } | ||
| 1502 | } | 1513 | } |
| 1503 | 1514 | ||
| 1504 | /* | 1515 | /* |
| @@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
| 1527 | if (!rq_weight) | 1538 | if (!rq_weight) |
| 1528 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | 1539 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; |
| 1529 | 1540 | ||
| 1530 | for_each_cpu_mask(i, sd->span) { | 1541 | for_each_cpu_mask(i, sd->span) |
| 1531 | struct rq *rq = cpu_rq(i); | 1542 | update_group_shares_cpu(tg, i, shares, rq_weight); |
| 1532 | unsigned long flags; | ||
| 1533 | |||
| 1534 | spin_lock_irqsave(&rq->lock, flags); | ||
| 1535 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
| 1536 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1537 | } | ||
| 1538 | 1543 | ||
| 1539 | return 0; | 1544 | return 0; |
| 1540 | } | 1545 | } |
| @@ -4443,12 +4448,8 @@ need_resched_nonpreemptible: | |||
| 4443 | if (sched_feat(HRTICK)) | 4448 | if (sched_feat(HRTICK)) |
| 4444 | hrtick_clear(rq); | 4449 | hrtick_clear(rq); |
| 4445 | 4450 | ||
| 4446 | /* | 4451 | spin_lock_irq(&rq->lock); |
| 4447 | * Do the rq-clock update outside the rq lock: | ||
| 4448 | */ | ||
| 4449 | local_irq_disable(); | ||
| 4450 | update_rq_clock(rq); | 4452 | update_rq_clock(rq); |
| 4451 | spin_lock(&rq->lock); | ||
| 4452 | clear_tsk_need_resched(prev); | 4453 | clear_tsk_need_resched(prev); |
| 4453 | 4454 | ||
| 4454 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4455 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f604dae71316..9573c33688b8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL; | |||
| 73 | 73 | ||
| 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
| 75 | 75 | ||
| 76 | static const struct sched_class fair_sched_class; | ||
| 77 | |||
| 76 | /************************************************************** | 78 | /************************************************************** |
| 77 | * CFS operations on generic schedulable entities: | 79 | * CFS operations on generic schedulable entities: |
| 78 | */ | 80 | */ |
| @@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
| 334 | #endif | 336 | #endif |
| 335 | 337 | ||
| 336 | /* | 338 | /* |
| 337 | * delta *= w / rw | 339 | * delta *= P[w / rw] |
| 338 | */ | 340 | */ |
| 339 | static inline unsigned long | 341 | static inline unsigned long |
| 340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | 342 | calc_delta_weight(unsigned long delta, struct sched_entity *se) |
| @@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se) | |||
| 348 | } | 350 | } |
| 349 | 351 | ||
| 350 | /* | 352 | /* |
| 351 | * delta *= rw / w | 353 | * delta /= w |
| 352 | */ | 354 | */ |
| 353 | static inline unsigned long | 355 | static inline unsigned long |
| 354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | 356 | calc_delta_fair(unsigned long delta, struct sched_entity *se) |
| 355 | { | 357 | { |
| 356 | for_each_sched_entity(se) { | 358 | if (unlikely(se->load.weight != NICE_0_LOAD)) |
| 357 | delta = calc_delta_mine(delta, | 359 | delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load); |
| 358 | cfs_rq_of(se)->load.weight, &se->load); | ||
| 359 | } | ||
| 360 | 360 | ||
| 361 | return delta; | 361 | return delta; |
| 362 | } | 362 | } |
| @@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running) | |||
| 386 | * We calculate the wall-time slice from the period by taking a part | 386 | * We calculate the wall-time slice from the period by taking a part |
| 387 | * proportional to the weight. | 387 | * proportional to the weight. |
| 388 | * | 388 | * |
| 389 | * s = p*w/rw | 389 | * s = p*P[w/rw] |
| 390 | */ | 390 | */ |
| 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 392 | { | 392 | { |
| 393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 393 | unsigned long nr_running = cfs_rq->nr_running; |
| 394 | |||
| 395 | if (unlikely(!se->on_rq)) | ||
| 396 | nr_running++; | ||
| 397 | |||
| 398 | return calc_delta_weight(__sched_period(nr_running), se); | ||
| 394 | } | 399 | } |
| 395 | 400 | ||
| 396 | /* | 401 | /* |
| 397 | * We calculate the vruntime slice of a to be inserted task | 402 | * We calculate the vruntime slice of a to be inserted task |
| 398 | * | 403 | * |
| 399 | * vs = s*rw/w = p | 404 | * vs = s/w |
| 400 | */ | 405 | */ |
| 401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 406 | static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 402 | { | 407 | { |
| 403 | unsigned long nr_running = cfs_rq->nr_running; | 408 | return calc_delta_fair(sched_slice(cfs_rq, se), se); |
| 404 | |||
| 405 | if (!se->on_rq) | ||
| 406 | nr_running++; | ||
| 407 | |||
| 408 | return __sched_period(nr_running); | ||
| 409 | } | 409 | } |
| 410 | 410 | ||
| 411 | /* | 411 | /* |
| @@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 628 | * stays open at the end. | 628 | * stays open at the end. |
| 629 | */ | 629 | */ |
| 630 | if (initial && sched_feat(START_DEBIT)) | 630 | if (initial && sched_feat(START_DEBIT)) |
| 631 | vruntime += sched_vslice_add(cfs_rq, se); | 631 | vruntime += sched_vslice(cfs_rq, se); |
| 632 | 632 | ||
| 633 | if (!initial) { | 633 | if (!initial) { |
| 634 | /* sleeps upto a single latency don't count. */ | 634 | /* sleeps upto a single latency don't count. */ |
| @@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 748 | struct rq *rq = rq_of(cfs_rq); | 748 | struct rq *rq = rq_of(cfs_rq); |
| 749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; | 749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; |
| 750 | 750 | ||
| 751 | if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { | 751 | if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { |
| 752 | cfs_rq->pair_start = rq->clock; | 752 | cfs_rq->pair_start = rq->clock; |
| 753 | return se; | 753 | return se; |
| 754 | } | 754 | } |
| @@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
| 849 | hrtick_start(rq, delta); | 849 | hrtick_start(rq, delta); |
| 850 | } | 850 | } |
| 851 | } | 851 | } |
| 852 | |||
| 853 | /* | ||
| 854 | * called from enqueue/dequeue and updates the hrtick when the | ||
| 855 | * current task is from our class and nr_running is low enough | ||
| 856 | * to matter. | ||
| 857 | */ | ||
| 858 | static void hrtick_update(struct rq *rq) | ||
| 859 | { | ||
| 860 | struct task_struct *curr = rq->curr; | ||
| 861 | |||
| 862 | if (curr->sched_class != &fair_sched_class) | ||
| 863 | return; | ||
| 864 | |||
| 865 | if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency) | ||
| 866 | hrtick_start_fair(rq, curr); | ||
| 867 | } | ||
| 852 | #else /* !CONFIG_SCHED_HRTICK */ | 868 | #else /* !CONFIG_SCHED_HRTICK */ |
| 853 | static inline void | 869 | static inline void |
| 854 | hrtick_start_fair(struct rq *rq, struct task_struct *p) | 870 | hrtick_start_fair(struct rq *rq, struct task_struct *p) |
| 855 | { | 871 | { |
| 856 | } | 872 | } |
| 873 | |||
| 874 | static inline void hrtick_update(struct rq *rq) | ||
| 875 | { | ||
| 876 | } | ||
| 857 | #endif | 877 | #endif |
| 858 | 878 | ||
| 859 | /* | 879 | /* |
| @@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 874 | wakeup = 1; | 894 | wakeup = 1; |
| 875 | } | 895 | } |
| 876 | 896 | ||
| 877 | hrtick_start_fair(rq, rq->curr); | 897 | hrtick_update(rq); |
| 878 | } | 898 | } |
| 879 | 899 | ||
| 880 | /* | 900 | /* |
| @@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | |||
| 896 | sleep = 1; | 916 | sleep = 1; |
| 897 | } | 917 | } |
| 898 | 918 | ||
| 899 | hrtick_start_fair(rq, rq->curr); | 919 | hrtick_update(rq); |
| 900 | } | 920 | } |
| 901 | 921 | ||
| 902 | /* | 922 | /* |
| @@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
| 1002 | 1022 | ||
| 1003 | #ifdef CONFIG_SMP | 1023 | #ifdef CONFIG_SMP |
| 1004 | 1024 | ||
| 1005 | static const struct sched_class fair_sched_class; | ||
| 1006 | |||
| 1007 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1025 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 1008 | /* | 1026 | /* |
| 1009 | * effective_load() calculates the load change as seen from the root_task_group | 1027 | * effective_load() calculates the load change as seen from the root_task_group |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 7c9e8f4a049f..fda016218296 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1) | |||
| 5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
| 6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) | 6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) |
| 7 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 7 | SCHED_FEAT(SYNC_WAKEUPS, 1) |
| 8 | SCHED_FEAT(HRTICK, 1) | 8 | SCHED_FEAT(HRTICK, 0) |
| 9 | SCHED_FEAT(DOUBLE_TICK, 0) | 9 | SCHED_FEAT(DOUBLE_TICK, 0) |
| 10 | SCHED_FEAT(ASYM_GRAN, 1) | 10 | SCHED_FEAT(ASYM_GRAN, 1) |
| 11 | SCHED_FEAT(LB_BIAS, 1) | 11 | SCHED_FEAT(LB_BIAS, 1) |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index b8c156979cf2..ee71bec1da66 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | static int show_schedstat(struct seq_file *seq, void *v) | 9 | static int show_schedstat(struct seq_file *seq, void *v) |
| 10 | { | 10 | { |
| 11 | int cpu; | 11 | int cpu; |
| 12 | int mask_len = NR_CPUS/32 * 9; | 12 | int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; |
| 13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); | 13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); |
| 14 | 14 | ||
| 15 | if (mask_str == NULL) | 15 | if (mask_str == NULL) |
| @@ -90,13 +90,20 @@ static int schedstat_open(struct inode *inode, struct file *file) | |||
| 90 | return res; | 90 | return res; |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | const struct file_operations proc_schedstat_operations = { | 93 | static const struct file_operations proc_schedstat_operations = { |
| 94 | .open = schedstat_open, | 94 | .open = schedstat_open, |
| 95 | .read = seq_read, | 95 | .read = seq_read, |
| 96 | .llseek = seq_lseek, | 96 | .llseek = seq_lseek, |
| 97 | .release = single_release, | 97 | .release = single_release, |
| 98 | }; | 98 | }; |
| 99 | 99 | ||
| 100 | static int __init proc_schedstat_init(void) | ||
| 101 | { | ||
| 102 | proc_create("schedstat", 0, NULL, &proc_schedstat_operations); | ||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | module_init(proc_schedstat_init); | ||
| 106 | |||
| 100 | /* | 107 | /* |
| 101 | * Expects runqueue lock to be held for atomicity of update | 108 | * Expects runqueue lock to be held for atomicity of update |
| 102 | */ | 109 | */ |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index af3c7cea258b..9bc4c00872c9 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -37,9 +37,13 @@ struct stop_machine_data { | |||
| 37 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | 37 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ |
| 38 | static unsigned int num_threads; | 38 | static unsigned int num_threads; |
| 39 | static atomic_t thread_ack; | 39 | static atomic_t thread_ack; |
| 40 | static struct completion finished; | ||
| 41 | static DEFINE_MUTEX(lock); | 40 | static DEFINE_MUTEX(lock); |
| 42 | 41 | ||
| 42 | static struct workqueue_struct *stop_machine_wq; | ||
| 43 | static struct stop_machine_data active, idle; | ||
| 44 | static const cpumask_t *active_cpus; | ||
| 45 | static void *stop_machine_work; | ||
| 46 | |||
| 43 | static void set_state(enum stopmachine_state newstate) | 47 | static void set_state(enum stopmachine_state newstate) |
| 44 | { | 48 | { |
| 45 | /* Reset ack counter. */ | 49 | /* Reset ack counter. */ |
| @@ -51,21 +55,26 @@ static void set_state(enum stopmachine_state newstate) | |||
| 51 | /* Last one to ack a state moves to the next state. */ | 55 | /* Last one to ack a state moves to the next state. */ |
| 52 | static void ack_state(void) | 56 | static void ack_state(void) |
| 53 | { | 57 | { |
| 54 | if (atomic_dec_and_test(&thread_ack)) { | 58 | if (atomic_dec_and_test(&thread_ack)) |
| 55 | /* If we're the last one to ack the EXIT, we're finished. */ | 59 | set_state(state + 1); |
| 56 | if (state == STOPMACHINE_EXIT) | ||
| 57 | complete(&finished); | ||
| 58 | else | ||
| 59 | set_state(state + 1); | ||
| 60 | } | ||
| 61 | } | 60 | } |
| 62 | 61 | ||
| 63 | /* This is the actual thread which stops the CPU. It exits by itself rather | 62 | /* This is the actual function which stops the CPU. It runs |
| 64 | * than waiting for kthread_stop(), because it's easier for hotplug CPU. */ | 63 | * in the context of a dedicated stopmachine workqueue. */ |
| 65 | static int stop_cpu(struct stop_machine_data *smdata) | 64 | static void stop_cpu(struct work_struct *unused) |
| 66 | { | 65 | { |
| 67 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 66 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
| 68 | 67 | struct stop_machine_data *smdata = &idle; | |
| 68 | int cpu = smp_processor_id(); | ||
| 69 | int err; | ||
| 70 | |||
| 71 | if (!active_cpus) { | ||
| 72 | if (cpu == first_cpu(cpu_online_map)) | ||
| 73 | smdata = &active; | ||
| 74 | } else { | ||
| 75 | if (cpu_isset(cpu, *active_cpus)) | ||
| 76 | smdata = &active; | ||
| 77 | } | ||
| 69 | /* Simple state machine */ | 78 | /* Simple state machine */ |
| 70 | do { | 79 | do { |
| 71 | /* Chill out and ensure we re-read stopmachine_state. */ | 80 | /* Chill out and ensure we re-read stopmachine_state. */ |
| @@ -78,9 +87,11 @@ static int stop_cpu(struct stop_machine_data *smdata) | |||
| 78 | hard_irq_disable(); | 87 | hard_irq_disable(); |
| 79 | break; | 88 | break; |
| 80 | case STOPMACHINE_RUN: | 89 | case STOPMACHINE_RUN: |
| 81 | /* |= allows error detection if functions on | 90 | /* On multiple CPUs only a single error code |
| 82 | * multiple CPUs. */ | 91 | * is needed to tell that something failed. */ |
| 83 | smdata->fnret |= smdata->fn(smdata->data); | 92 | err = smdata->fn(smdata->data); |
| 93 | if (err) | ||
| 94 | smdata->fnret = err; | ||
| 84 | break; | 95 | break; |
| 85 | default: | 96 | default: |
| 86 | break; | 97 | break; |
| @@ -90,7 +101,6 @@ static int stop_cpu(struct stop_machine_data *smdata) | |||
| 90 | } while (curstate != STOPMACHINE_EXIT); | 101 | } while (curstate != STOPMACHINE_EXIT); |
| 91 | 102 | ||
| 92 | local_irq_enable(); | 103 | local_irq_enable(); |
| 93 | do_exit(0); | ||
| 94 | } | 104 | } |
| 95 | 105 | ||
| 96 | /* Callback for CPUs which aren't supposed to do anything. */ | 106 | /* Callback for CPUs which aren't supposed to do anything. */ |
| @@ -101,78 +111,34 @@ static int chill(void *unused) | |||
| 101 | 111 | ||
| 102 | int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) | 112 | int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) |
| 103 | { | 113 | { |
| 104 | int i, err; | 114 | struct work_struct *sm_work; |
| 105 | struct stop_machine_data active, idle; | 115 | int i; |
| 106 | struct task_struct **threads; | ||
| 107 | 116 | ||
| 117 | /* Set up initial state. */ | ||
| 118 | mutex_lock(&lock); | ||
| 119 | num_threads = num_online_cpus(); | ||
| 120 | active_cpus = cpus; | ||
| 108 | active.fn = fn; | 121 | active.fn = fn; |
| 109 | active.data = data; | 122 | active.data = data; |
| 110 | active.fnret = 0; | 123 | active.fnret = 0; |
| 111 | idle.fn = chill; | 124 | idle.fn = chill; |
| 112 | idle.data = NULL; | 125 | idle.data = NULL; |
| 113 | 126 | ||
| 114 | /* This could be too big for stack on large machines. */ | ||
| 115 | threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL); | ||
| 116 | if (!threads) | ||
| 117 | return -ENOMEM; | ||
| 118 | |||
| 119 | /* Set up initial state. */ | ||
| 120 | mutex_lock(&lock); | ||
| 121 | init_completion(&finished); | ||
| 122 | num_threads = num_online_cpus(); | ||
| 123 | set_state(STOPMACHINE_PREPARE); | 127 | set_state(STOPMACHINE_PREPARE); |
| 124 | 128 | ||
| 125 | for_each_online_cpu(i) { | 129 | /* Schedule the stop_cpu work on all cpus: hold this CPU so one |
| 126 | struct stop_machine_data *smdata = &idle; | ||
| 127 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 128 | |||
| 129 | if (!cpus) { | ||
| 130 | if (i == first_cpu(cpu_online_map)) | ||
| 131 | smdata = &active; | ||
| 132 | } else { | ||
| 133 | if (cpu_isset(i, *cpus)) | ||
| 134 | smdata = &active; | ||
| 135 | } | ||
| 136 | |||
| 137 | threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u", | ||
| 138 | i); | ||
| 139 | if (IS_ERR(threads[i])) { | ||
| 140 | err = PTR_ERR(threads[i]); | ||
| 141 | threads[i] = NULL; | ||
| 142 | goto kill_threads; | ||
| 143 | } | ||
| 144 | |||
| 145 | /* Place it onto correct cpu. */ | ||
| 146 | kthread_bind(threads[i], i); | ||
| 147 | |||
| 148 | /* Make it highest prio. */ | ||
| 149 | if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, ¶m)) | ||
| 150 | BUG(); | ||
| 151 | } | ||
| 152 | |||
| 153 | /* We've created all the threads. Wake them all: hold this CPU so one | ||
| 154 | * doesn't hit this CPU until we're ready. */ | 130 | * doesn't hit this CPU until we're ready. */ |
| 155 | get_cpu(); | 131 | get_cpu(); |
| 156 | for_each_online_cpu(i) | 132 | for_each_online_cpu(i) { |
| 157 | wake_up_process(threads[i]); | 133 | sm_work = percpu_ptr(stop_machine_work, i); |
| 158 | 134 | INIT_WORK(sm_work, stop_cpu); | |
| 135 | queue_work_on(i, stop_machine_wq, sm_work); | ||
| 136 | } | ||
| 159 | /* This will release the thread on our CPU. */ | 137 | /* This will release the thread on our CPU. */ |
| 160 | put_cpu(); | 138 | put_cpu(); |
| 161 | wait_for_completion(&finished); | 139 | flush_workqueue(stop_machine_wq); |
| 162 | mutex_unlock(&lock); | 140 | mutex_unlock(&lock); |
| 163 | |||
| 164 | kfree(threads); | ||
| 165 | |||
| 166 | return active.fnret; | 141 | return active.fnret; |
| 167 | |||
| 168 | kill_threads: | ||
| 169 | for_each_online_cpu(i) | ||
| 170 | if (threads[i]) | ||
| 171 | kthread_stop(threads[i]); | ||
| 172 | mutex_unlock(&lock); | ||
| 173 | |||
| 174 | kfree(threads); | ||
| 175 | return err; | ||
| 176 | } | 142 | } |
| 177 | 143 | ||
| 178 | int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) | 144 | int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) |
| @@ -187,3 +153,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) | |||
| 187 | return ret; | 153 | return ret; |
| 188 | } | 154 | } |
| 189 | EXPORT_SYMBOL_GPL(stop_machine); | 155 | EXPORT_SYMBOL_GPL(stop_machine); |
| 156 | |||
| 157 | static int __init stop_machine_init(void) | ||
| 158 | { | ||
| 159 | stop_machine_wq = create_rt_workqueue("kstop"); | ||
| 160 | stop_machine_work = alloc_percpu(struct work_struct); | ||
| 161 | return 0; | ||
| 162 | } | ||
| 163 | core_initcall(stop_machine_init); | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 53879cdae483..31deba8f7d16 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -1716,6 +1716,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
| 1716 | case PR_SET_TSC: | 1716 | case PR_SET_TSC: |
| 1717 | error = SET_TSC_CTL(arg2); | 1717 | error = SET_TSC_CTL(arg2); |
| 1718 | break; | 1718 | break; |
| 1719 | case PR_GET_TIMERSLACK: | ||
| 1720 | error = current->timer_slack_ns; | ||
| 1721 | break; | ||
| 1722 | case PR_SET_TIMERSLACK: | ||
| 1723 | if (arg2 <= 0) | ||
| 1724 | current->timer_slack_ns = | ||
| 1725 | current->default_timer_slack_ns; | ||
| 1726 | else | ||
| 1727 | current->timer_slack_ns = arg2; | ||
| 1728 | break; | ||
| 1719 | default: | 1729 | default: |
| 1720 | error = -EINVAL; | 1730 | error = -EINVAL; |
| 1721 | break; | 1731 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index edb1075f80d2..9d048fa2d902 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = { | |||
| 276 | }, | 276 | }, |
| 277 | { | 277 | { |
| 278 | .ctl_name = CTL_UNNUMBERED, | 278 | .ctl_name = CTL_UNNUMBERED, |
| 279 | .procname = "sched_shares_thresh", | ||
| 280 | .data = &sysctl_sched_shares_thresh, | ||
| 281 | .maxlen = sizeof(unsigned int), | ||
| 282 | .mode = 0644, | ||
| 283 | .proc_handler = &proc_dointvec_minmax, | ||
| 284 | .strategy = &sysctl_intvec, | ||
| 285 | .extra1 = &zero, | ||
| 286 | }, | ||
| 287 | { | ||
| 288 | .ctl_name = CTL_UNNUMBERED, | ||
| 279 | .procname = "sched_child_runs_first", | 289 | .procname = "sched_child_runs_first", |
| 280 | .data = &sysctl_sched_child_runs_first, | 290 | .data = &sysctl_sched_child_runs_first, |
| 281 | .maxlen = sizeof(unsigned int), | 291 | .maxlen = sizeof(unsigned int), |
diff --git a/kernel/time.c b/kernel/time.c index 6a08660b4fac..d63a4336fad6 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64); | |||
| 669 | #endif | 669 | #endif |
| 670 | 670 | ||
| 671 | EXPORT_SYMBOL(jiffies); | 671 | EXPORT_SYMBOL(jiffies); |
| 672 | |||
| 673 | /* | ||
| 674 | * Add two timespec values and do a safety check for overflow. | ||
| 675 | * It's assumed that both values are valid (>= 0) | ||
| 676 | */ | ||
| 677 | struct timespec timespec_add_safe(const struct timespec lhs, | ||
| 678 | const struct timespec rhs) | ||
| 679 | { | ||
| 680 | struct timespec res; | ||
| 681 | |||
| 682 | set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec, | ||
| 683 | lhs.tv_nsec + rhs.tv_nsec); | ||
| 684 | |||
| 685 | if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec) | ||
| 686 | res.tv_sec = TIME_T_MAX; | ||
| 687 | |||
| 688 | return res; | ||
| 689 | } | ||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 1a20715bfd6e..8ff15e5d486b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -142,8 +142,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) | |||
| 142 | time_state = TIME_OOP; | 142 | time_state = TIME_OOP; |
| 143 | printk(KERN_NOTICE "Clock: " | 143 | printk(KERN_NOTICE "Clock: " |
| 144 | "inserting leap second 23:59:60 UTC\n"); | 144 | "inserting leap second 23:59:60 UTC\n"); |
| 145 | leap_timer.expires = ktime_add_ns(leap_timer.expires, | 145 | hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); |
| 146 | NSEC_PER_SEC); | ||
| 147 | res = HRTIMER_RESTART; | 146 | res = HRTIMER_RESTART; |
| 148 | break; | 147 | break; |
| 149 | case TIME_DEL: | 148 | case TIME_DEL: |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0581c11fe6c6..5bbb1044f847 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -300,7 +300,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 300 | goto out; | 300 | goto out; |
| 301 | } | 301 | } |
| 302 | 302 | ||
| 303 | ts->idle_tick = ts->sched_timer.expires; | 303 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
| 304 | ts->tick_stopped = 1; | 304 | ts->tick_stopped = 1; |
| 305 | ts->idle_jiffies = last_jiffies; | 305 | ts->idle_jiffies = last_jiffies; |
| 306 | rcu_enter_nohz(); | 306 | rcu_enter_nohz(); |
| @@ -380,21 +380,21 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
| 380 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | 380 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) |
| 381 | { | 381 | { |
| 382 | hrtimer_cancel(&ts->sched_timer); | 382 | hrtimer_cancel(&ts->sched_timer); |
| 383 | ts->sched_timer.expires = ts->idle_tick; | 383 | hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); |
| 384 | 384 | ||
| 385 | while (1) { | 385 | while (1) { |
| 386 | /* Forward the time to expire in the future */ | 386 | /* Forward the time to expire in the future */ |
| 387 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 387 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
| 388 | 388 | ||
| 389 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 389 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
| 390 | hrtimer_start(&ts->sched_timer, | 390 | hrtimer_start_expires(&ts->sched_timer, |
| 391 | ts->sched_timer.expires, | ||
| 392 | HRTIMER_MODE_ABS); | 391 | HRTIMER_MODE_ABS); |
| 393 | /* Check, if the timer was already in the past */ | 392 | /* Check, if the timer was already in the past */ |
| 394 | if (hrtimer_active(&ts->sched_timer)) | 393 | if (hrtimer_active(&ts->sched_timer)) |
| 395 | break; | 394 | break; |
| 396 | } else { | 395 | } else { |
| 397 | if (!tick_program_event(ts->sched_timer.expires, 0)) | 396 | if (!tick_program_event( |
| 397 | hrtimer_get_expires(&ts->sched_timer), 0)) | ||
| 398 | break; | 398 | break; |
| 399 | } | 399 | } |
| 400 | /* Update jiffies and reread time */ | 400 | /* Update jiffies and reread time */ |
| @@ -456,14 +456,16 @@ void tick_nohz_restart_sched_tick(void) | |||
| 456 | */ | 456 | */ |
| 457 | ts->tick_stopped = 0; | 457 | ts->tick_stopped = 0; |
| 458 | ts->idle_exittime = now; | 458 | ts->idle_exittime = now; |
| 459 | |||
| 459 | tick_nohz_restart(ts, now); | 460 | tick_nohz_restart(ts, now); |
| 461 | |||
| 460 | local_irq_enable(); | 462 | local_irq_enable(); |
| 461 | } | 463 | } |
| 462 | 464 | ||
| 463 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | 465 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) |
| 464 | { | 466 | { |
| 465 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 467 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
| 466 | return tick_program_event(ts->sched_timer.expires, 0); | 468 | return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); |
| 467 | } | 469 | } |
| 468 | 470 | ||
| 469 | /* | 471 | /* |
| @@ -542,7 +544,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
| 542 | next = tick_init_jiffy_update(); | 544 | next = tick_init_jiffy_update(); |
| 543 | 545 | ||
| 544 | for (;;) { | 546 | for (;;) { |
| 545 | ts->sched_timer.expires = next; | 547 | hrtimer_set_expires(&ts->sched_timer, next); |
| 546 | if (!tick_program_event(next, 0)) | 548 | if (!tick_program_event(next, 0)) |
| 547 | break; | 549 | break; |
| 548 | next = ktime_add(next, tick_period); | 550 | next = ktime_add(next, tick_period); |
| @@ -567,11 +569,21 @@ static void tick_nohz_switch_to_nohz(void) | |||
| 567 | static void tick_nohz_kick_tick(int cpu) | 569 | static void tick_nohz_kick_tick(int cpu) |
| 568 | { | 570 | { |
| 569 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 571 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 572 | ktime_t delta, now; | ||
| 570 | 573 | ||
| 571 | if (!ts->tick_stopped) | 574 | if (!ts->tick_stopped) |
| 572 | return; | 575 | return; |
| 573 | 576 | ||
| 574 | tick_nohz_restart(ts, ktime_get()); | 577 | /* |
| 578 | * Do not touch the tick device, when the next expiry is either | ||
| 579 | * already reached or less/equal than the tick period. | ||
| 580 | */ | ||
| 581 | now = ktime_get(); | ||
| 582 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); | ||
| 583 | if (delta.tv64 <= tick_period.tv64) | ||
| 584 | return; | ||
| 585 | |||
| 586 | tick_nohz_restart(ts, now); | ||
| 575 | } | 587 | } |
| 576 | 588 | ||
| 577 | #else | 589 | #else |
| @@ -668,16 +680,15 @@ void tick_setup_sched_timer(void) | |||
| 668 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; | 680 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; |
| 669 | 681 | ||
| 670 | /* Get the next period (per cpu) */ | 682 | /* Get the next period (per cpu) */ |
| 671 | ts->sched_timer.expires = tick_init_jiffy_update(); | 683 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
| 672 | offset = ktime_to_ns(tick_period) >> 1; | 684 | offset = ktime_to_ns(tick_period) >> 1; |
| 673 | do_div(offset, num_possible_cpus()); | 685 | do_div(offset, num_possible_cpus()); |
| 674 | offset *= smp_processor_id(); | 686 | offset *= smp_processor_id(); |
| 675 | ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); | 687 | hrtimer_add_expires_ns(&ts->sched_timer, offset); |
| 676 | 688 | ||
| 677 | for (;;) { | 689 | for (;;) { |
| 678 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 690 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
| 679 | hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, | 691 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); |
| 680 | HRTIMER_MODE_ABS); | ||
| 681 | /* Check, if the timer was already in the past */ | 692 | /* Check, if the timer was already in the past */ |
| 682 | if (hrtimer_active(&ts->sched_timer)) | 693 | if (hrtimer_active(&ts->sched_timer)) |
| 683 | break; | 694 | break; |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index f6426911e35a..a999b92a1277 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
| @@ -66,9 +66,11 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, | |||
| 66 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | 66 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); |
| 67 | #endif | 67 | #endif |
| 68 | SEQ_printf(m, "\n"); | 68 | SEQ_printf(m, "\n"); |
| 69 | SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", | 69 | SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", |
| 70 | (unsigned long long)ktime_to_ns(timer->expires), | 70 | (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), |
| 71 | (long long)(ktime_to_ns(timer->expires) - now)); | 71 | (unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)), |
| 72 | (long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now), | ||
| 73 | (long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now)); | ||
| 72 | } | 74 | } |
| 73 | 75 | ||
| 74 | static void | 76 | static void |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 714afad46539..f928f2a87b9b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -62,6 +62,7 @@ struct workqueue_struct { | |||
| 62 | const char *name; | 62 | const char *name; |
| 63 | int singlethread; | 63 | int singlethread; |
| 64 | int freezeable; /* Freeze threads during suspend */ | 64 | int freezeable; /* Freeze threads during suspend */ |
| 65 | int rt; | ||
| 65 | #ifdef CONFIG_LOCKDEP | 66 | #ifdef CONFIG_LOCKDEP |
| 66 | struct lockdep_map lockdep_map; | 67 | struct lockdep_map lockdep_map; |
| 67 | #endif | 68 | #endif |
| @@ -766,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu) | |||
| 766 | 767 | ||
| 767 | static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | 768 | static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) |
| 768 | { | 769 | { |
| 770 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 769 | struct workqueue_struct *wq = cwq->wq; | 771 | struct workqueue_struct *wq = cwq->wq; |
| 770 | const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; | 772 | const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; |
| 771 | struct task_struct *p; | 773 | struct task_struct *p; |
| @@ -781,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
| 781 | */ | 783 | */ |
| 782 | if (IS_ERR(p)) | 784 | if (IS_ERR(p)) |
| 783 | return PTR_ERR(p); | 785 | return PTR_ERR(p); |
| 784 | 786 | if (cwq->wq->rt) | |
| 787 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
| 785 | cwq->thread = p; | 788 | cwq->thread = p; |
| 786 | 789 | ||
| 787 | return 0; | 790 | return 0; |
| @@ -801,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
| 801 | struct workqueue_struct *__create_workqueue_key(const char *name, | 804 | struct workqueue_struct *__create_workqueue_key(const char *name, |
| 802 | int singlethread, | 805 | int singlethread, |
| 803 | int freezeable, | 806 | int freezeable, |
| 807 | int rt, | ||
| 804 | struct lock_class_key *key, | 808 | struct lock_class_key *key, |
| 805 | const char *lock_name) | 809 | const char *lock_name) |
| 806 | { | 810 | { |
| @@ -822,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
| 822 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); | 826 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); |
| 823 | wq->singlethread = singlethread; | 827 | wq->singlethread = singlethread; |
| 824 | wq->freezeable = freezeable; | 828 | wq->freezeable = freezeable; |
| 829 | wq->rt = rt; | ||
| 825 | INIT_LIST_HEAD(&wq->list); | 830 | INIT_LIST_HEAD(&wq->list); |
| 826 | 831 | ||
| 827 | if (singlethread) { | 832 | if (singlethread) { |
