diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-27 05:50:54 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-27 05:50:54 -0400 |
commit | 4944dd62de21230af039eda7cd218e9a09021d11 (patch) | |
tree | bac70f7bab8506c7e1b0408bacbdb0b1d77262e9 /kernel | |
parent | f17845e5d97ead8fbdadfd40039e058ec7cf4a42 (diff) | |
parent | 0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff) |
Merge commit 'v2.6.28-rc2' into tracing/urgent
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit_tree.c | 48 | ||||
-rw-r--r-- | kernel/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/exec_domain.c | 33 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 11 | ||||
-rw-r--r-- | kernel/hrtimer.c | 206 | ||||
-rw-r--r-- | kernel/irq/chip.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 343 | ||||
-rw-r--r-- | kernel/panic.c | 17 | ||||
-rw-r--r-- | kernel/params.c | 276 | ||||
-rw-r--r-- | kernel/posix-timers.c | 10 | ||||
-rw-r--r-- | kernel/power/disk.c | 2 | ||||
-rw-r--r-- | kernel/power/power.h | 2 | ||||
-rw-r--r-- | kernel/power/swap.c | 14 | ||||
-rw-r--r-- | kernel/rcupdate.c | 19 | ||||
-rw-r--r-- | kernel/rtmutex.c | 3 | ||||
-rw-r--r-- | kernel/sched.c | 59 | ||||
-rw-r--r-- | kernel/sched_fair.c | 62 | ||||
-rw-r--r-- | kernel/sched_features.h | 2 | ||||
-rw-r--r-- | kernel/sched_stats.h | 11 | ||||
-rw-r--r-- | kernel/stop_machine.c | 120 | ||||
-rw-r--r-- | kernel/sys.c | 10 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 | ||||
-rw-r--r-- | kernel/time.c | 18 | ||||
-rw-r--r-- | kernel/time/ntp.c | 3 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 35 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 8 | ||||
-rw-r--r-- | kernel/workqueue.c | 7 |
28 files changed, 785 insertions, 551 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f7921a2ecf16..8ba0e0d934f2 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -532,7 +532,7 @@ void audit_trim_trees(void) | |||
532 | list_add(&cursor, &tree_list); | 532 | list_add(&cursor, &tree_list); |
533 | while (cursor.next != &tree_list) { | 533 | while (cursor.next != &tree_list) { |
534 | struct audit_tree *tree; | 534 | struct audit_tree *tree; |
535 | struct nameidata nd; | 535 | struct path path; |
536 | struct vfsmount *root_mnt; | 536 | struct vfsmount *root_mnt; |
537 | struct node *node; | 537 | struct node *node; |
538 | struct list_head list; | 538 | struct list_head list; |
@@ -544,12 +544,12 @@ void audit_trim_trees(void) | |||
544 | list_add(&cursor, &tree->list); | 544 | list_add(&cursor, &tree->list); |
545 | mutex_unlock(&audit_filter_mutex); | 545 | mutex_unlock(&audit_filter_mutex); |
546 | 546 | ||
547 | err = path_lookup(tree->pathname, 0, &nd); | 547 | err = kern_path(tree->pathname, 0, &path); |
548 | if (err) | 548 | if (err) |
549 | goto skip_it; | 549 | goto skip_it; |
550 | 550 | ||
551 | root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry); | 551 | root_mnt = collect_mounts(path.mnt, path.dentry); |
552 | path_put(&nd.path); | 552 | path_put(&path); |
553 | if (!root_mnt) | 553 | if (!root_mnt) |
554 | goto skip_it; | 554 | goto skip_it; |
555 | 555 | ||
@@ -580,19 +580,19 @@ skip_it: | |||
580 | } | 580 | } |
581 | 581 | ||
582 | static int is_under(struct vfsmount *mnt, struct dentry *dentry, | 582 | static int is_under(struct vfsmount *mnt, struct dentry *dentry, |
583 | struct nameidata *nd) | 583 | struct path *path) |
584 | { | 584 | { |
585 | if (mnt != nd->path.mnt) { | 585 | if (mnt != path->mnt) { |
586 | for (;;) { | 586 | for (;;) { |
587 | if (mnt->mnt_parent == mnt) | 587 | if (mnt->mnt_parent == mnt) |
588 | return 0; | 588 | return 0; |
589 | if (mnt->mnt_parent == nd->path.mnt) | 589 | if (mnt->mnt_parent == path->mnt) |
590 | break; | 590 | break; |
591 | mnt = mnt->mnt_parent; | 591 | mnt = mnt->mnt_parent; |
592 | } | 592 | } |
593 | dentry = mnt->mnt_mountpoint; | 593 | dentry = mnt->mnt_mountpoint; |
594 | } | 594 | } |
595 | return is_subdir(dentry, nd->path.dentry); | 595 | return is_subdir(dentry, path->dentry); |
596 | } | 596 | } |
597 | 597 | ||
598 | int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) | 598 | int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) |
@@ -618,7 +618,7 @@ void audit_put_tree(struct audit_tree *tree) | |||
618 | int audit_add_tree_rule(struct audit_krule *rule) | 618 | int audit_add_tree_rule(struct audit_krule *rule) |
619 | { | 619 | { |
620 | struct audit_tree *seed = rule->tree, *tree; | 620 | struct audit_tree *seed = rule->tree, *tree; |
621 | struct nameidata nd; | 621 | struct path path; |
622 | struct vfsmount *mnt, *p; | 622 | struct vfsmount *mnt, *p; |
623 | struct list_head list; | 623 | struct list_head list; |
624 | int err; | 624 | int err; |
@@ -637,11 +637,11 @@ int audit_add_tree_rule(struct audit_krule *rule) | |||
637 | /* do not set rule->tree yet */ | 637 | /* do not set rule->tree yet */ |
638 | mutex_unlock(&audit_filter_mutex); | 638 | mutex_unlock(&audit_filter_mutex); |
639 | 639 | ||
640 | err = path_lookup(tree->pathname, 0, &nd); | 640 | err = kern_path(tree->pathname, 0, &path); |
641 | if (err) | 641 | if (err) |
642 | goto Err; | 642 | goto Err; |
643 | mnt = collect_mounts(nd.path.mnt, nd.path.dentry); | 643 | mnt = collect_mounts(path.mnt, path.dentry); |
644 | path_put(&nd.path); | 644 | path_put(&path); |
645 | if (!mnt) { | 645 | if (!mnt) { |
646 | err = -ENOMEM; | 646 | err = -ENOMEM; |
647 | goto Err; | 647 | goto Err; |
@@ -690,29 +690,29 @@ int audit_tag_tree(char *old, char *new) | |||
690 | { | 690 | { |
691 | struct list_head cursor, barrier; | 691 | struct list_head cursor, barrier; |
692 | int failed = 0; | 692 | int failed = 0; |
693 | struct nameidata nd; | 693 | struct path path; |
694 | struct vfsmount *tagged; | 694 | struct vfsmount *tagged; |
695 | struct list_head list; | 695 | struct list_head list; |
696 | struct vfsmount *mnt; | 696 | struct vfsmount *mnt; |
697 | struct dentry *dentry; | 697 | struct dentry *dentry; |
698 | int err; | 698 | int err; |
699 | 699 | ||
700 | err = path_lookup(new, 0, &nd); | 700 | err = kern_path(new, 0, &path); |
701 | if (err) | 701 | if (err) |
702 | return err; | 702 | return err; |
703 | tagged = collect_mounts(nd.path.mnt, nd.path.dentry); | 703 | tagged = collect_mounts(path.mnt, path.dentry); |
704 | path_put(&nd.path); | 704 | path_put(&path); |
705 | if (!tagged) | 705 | if (!tagged) |
706 | return -ENOMEM; | 706 | return -ENOMEM; |
707 | 707 | ||
708 | err = path_lookup(old, 0, &nd); | 708 | err = kern_path(old, 0, &path); |
709 | if (err) { | 709 | if (err) { |
710 | drop_collected_mounts(tagged); | 710 | drop_collected_mounts(tagged); |
711 | return err; | 711 | return err; |
712 | } | 712 | } |
713 | mnt = mntget(nd.path.mnt); | 713 | mnt = mntget(path.mnt); |
714 | dentry = dget(nd.path.dentry); | 714 | dentry = dget(path.dentry); |
715 | path_put(&nd.path); | 715 | path_put(&path); |
716 | 716 | ||
717 | if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) | 717 | if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) |
718 | follow_up(&mnt, &dentry); | 718 | follow_up(&mnt, &dentry); |
@@ -733,7 +733,7 @@ int audit_tag_tree(char *old, char *new) | |||
733 | list_add(&cursor, &tree->list); | 733 | list_add(&cursor, &tree->list); |
734 | mutex_unlock(&audit_filter_mutex); | 734 | mutex_unlock(&audit_filter_mutex); |
735 | 735 | ||
736 | err = path_lookup(tree->pathname, 0, &nd); | 736 | err = kern_path(tree->pathname, 0, &path); |
737 | if (err) { | 737 | if (err) { |
738 | put_tree(tree); | 738 | put_tree(tree); |
739 | mutex_lock(&audit_filter_mutex); | 739 | mutex_lock(&audit_filter_mutex); |
@@ -741,15 +741,15 @@ int audit_tag_tree(char *old, char *new) | |||
741 | } | 741 | } |
742 | 742 | ||
743 | spin_lock(&vfsmount_lock); | 743 | spin_lock(&vfsmount_lock); |
744 | if (!is_under(mnt, dentry, &nd)) { | 744 | if (!is_under(mnt, dentry, &path)) { |
745 | spin_unlock(&vfsmount_lock); | 745 | spin_unlock(&vfsmount_lock); |
746 | path_put(&nd.path); | 746 | path_put(&path); |
747 | put_tree(tree); | 747 | put_tree(tree); |
748 | mutex_lock(&audit_filter_mutex); | 748 | mutex_lock(&audit_filter_mutex); |
749 | continue; | 749 | continue; |
750 | } | 750 | } |
751 | spin_unlock(&vfsmount_lock); | 751 | spin_unlock(&vfsmount_lock); |
752 | path_put(&nd.path); | 752 | path_put(&path); |
753 | 753 | ||
754 | list_for_each_entry(p, &list, mnt_list) { | 754 | list_for_each_entry(p, &list, mnt_list) { |
755 | failed = tag_chunk(p->mnt_root->d_inode, tree); | 755 | failed = tag_chunk(p->mnt_root->d_inode, tree); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 046c1609606b..35eebd5510c2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2104,7 +2104,7 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | |||
2104 | down_read(&cgrp->pids_mutex); | 2104 | down_read(&cgrp->pids_mutex); |
2105 | if (pid) { | 2105 | if (pid) { |
2106 | int end = cgrp->pids_length; | 2106 | int end = cgrp->pids_length; |
2107 | int i; | 2107 | |
2108 | while (index < end) { | 2108 | while (index < end) { |
2109 | int mid = (index + end) / 2; | 2109 | int mid = (index + end) / 2; |
2110 | if (cgrp->tasks_pids[mid] == pid) { | 2110 | if (cgrp->tasks_pids[mid] == pid) { |
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0d407e886735..0511716e9424 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c | |||
@@ -12,7 +12,9 @@ | |||
12 | #include <linux/kmod.h> | 12 | #include <linux/kmod.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
15 | #include <linux/proc_fs.h> | ||
15 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/seq_file.h> | ||
16 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
17 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
18 | #include <linux/types.h> | 20 | #include <linux/types.h> |
@@ -173,20 +175,39 @@ __set_personality(u_long personality) | |||
173 | return 0; | 175 | return 0; |
174 | } | 176 | } |
175 | 177 | ||
176 | int | 178 | #ifdef CONFIG_PROC_FS |
177 | get_exec_domain_list(char *page) | 179 | static int execdomains_proc_show(struct seq_file *m, void *v) |
178 | { | 180 | { |
179 | struct exec_domain *ep; | 181 | struct exec_domain *ep; |
180 | int len = 0; | ||
181 | 182 | ||
182 | read_lock(&exec_domains_lock); | 183 | read_lock(&exec_domains_lock); |
183 | for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next) | 184 | for (ep = exec_domains; ep; ep = ep->next) |
184 | len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n", | 185 | seq_printf(m, "%d-%d\t%-16s\t[%s]\n", |
185 | ep->pers_low, ep->pers_high, ep->name, | 186 | ep->pers_low, ep->pers_high, ep->name, |
186 | module_name(ep->module)); | 187 | module_name(ep->module)); |
187 | read_unlock(&exec_domains_lock); | 188 | read_unlock(&exec_domains_lock); |
188 | return (len); | 189 | return 0; |
190 | } | ||
191 | |||
192 | static int execdomains_proc_open(struct inode *inode, struct file *file) | ||
193 | { | ||
194 | return single_open(file, execdomains_proc_show, NULL); | ||
195 | } | ||
196 | |||
197 | static const struct file_operations execdomains_proc_fops = { | ||
198 | .open = execdomains_proc_open, | ||
199 | .read = seq_read, | ||
200 | .llseek = seq_lseek, | ||
201 | .release = single_release, | ||
202 | }; | ||
203 | |||
204 | static int __init proc_execdomains_init(void) | ||
205 | { | ||
206 | proc_create("execdomains", 0, NULL, &execdomains_proc_fops); | ||
207 | return 0; | ||
189 | } | 208 | } |
209 | module_init(proc_execdomains_init); | ||
210 | #endif | ||
190 | 211 | ||
191 | asmlinkage long | 212 | asmlinkage long |
192 | sys_personality(u_long personality) | 213 | sys_personality(u_long personality) |
diff --git a/kernel/fork.c b/kernel/fork.c index 4d093552dd6e..f6083561dfe0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1018,6 +1018,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1018 | p->prev_utime = cputime_zero; | 1018 | p->prev_utime = cputime_zero; |
1019 | p->prev_stime = cputime_zero; | 1019 | p->prev_stime = cputime_zero; |
1020 | 1020 | ||
1021 | p->default_timer_slack_ns = current->timer_slack_ns; | ||
1022 | |||
1021 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 1023 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
1022 | p->last_switch_count = 0; | 1024 | p->last_switch_count = 0; |
1023 | p->last_switch_timestamp = 0; | 1025 | p->last_switch_timestamp = 0; |
diff --git a/kernel/futex.c b/kernel/futex.c index 7d1136e97c14..8af10027514b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1296,13 +1296,16 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1296 | if (!abs_time) | 1296 | if (!abs_time) |
1297 | schedule(); | 1297 | schedule(); |
1298 | else { | 1298 | else { |
1299 | unsigned long slack; | ||
1300 | slack = current->timer_slack_ns; | ||
1301 | if (rt_task(current)) | ||
1302 | slack = 0; | ||
1299 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, | 1303 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, |
1300 | HRTIMER_MODE_ABS); | 1304 | HRTIMER_MODE_ABS); |
1301 | hrtimer_init_sleeper(&t, current); | 1305 | hrtimer_init_sleeper(&t, current); |
1302 | t.timer.expires = *abs_time; | 1306 | hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); |
1303 | 1307 | ||
1304 | hrtimer_start(&t.timer, t.timer.expires, | 1308 | hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); |
1305 | HRTIMER_MODE_ABS); | ||
1306 | if (!hrtimer_active(&t.timer)) | 1309 | if (!hrtimer_active(&t.timer)) |
1307 | t.task = NULL; | 1310 | t.task = NULL; |
1308 | 1311 | ||
@@ -1404,7 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1404 | hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, | 1407 | hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, |
1405 | HRTIMER_MODE_ABS); | 1408 | HRTIMER_MODE_ABS); |
1406 | hrtimer_init_sleeper(to, current); | 1409 | hrtimer_init_sleeper(to, current); |
1407 | to->timer.expires = *time; | 1410 | hrtimer_set_expires(&to->timer, *time); |
1408 | } | 1411 | } |
1409 | 1412 | ||
1410 | q.pi_state = NULL; | 1413 | q.pi_state = NULL; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 95978f48e039..2b465dfde426 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -517,7 +517,7 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) | |||
517 | if (!base->first) | 517 | if (!base->first) |
518 | continue; | 518 | continue; |
519 | timer = rb_entry(base->first, struct hrtimer, node); | 519 | timer = rb_entry(base->first, struct hrtimer, node); |
520 | expires = ktime_sub(timer->expires, base->offset); | 520 | expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
521 | if (expires.tv64 < cpu_base->expires_next.tv64) | 521 | if (expires.tv64 < cpu_base->expires_next.tv64) |
522 | cpu_base->expires_next = expires; | 522 | cpu_base->expires_next = expires; |
523 | } | 523 | } |
@@ -539,10 +539,10 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
539 | struct hrtimer_clock_base *base) | 539 | struct hrtimer_clock_base *base) |
540 | { | 540 | { |
541 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | 541 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; |
542 | ktime_t expires = ktime_sub(timer->expires, base->offset); | 542 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
543 | int res; | 543 | int res; |
544 | 544 | ||
545 | WARN_ON_ONCE(timer->expires.tv64 < 0); | 545 | WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); |
546 | 546 | ||
547 | /* | 547 | /* |
548 | * When the callback is running, we do not reprogram the clock event | 548 | * When the callback is running, we do not reprogram the clock event |
@@ -795,7 +795,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
795 | u64 orun = 1; | 795 | u64 orun = 1; |
796 | ktime_t delta; | 796 | ktime_t delta; |
797 | 797 | ||
798 | delta = ktime_sub(now, timer->expires); | 798 | delta = ktime_sub(now, hrtimer_get_expires(timer)); |
799 | 799 | ||
800 | if (delta.tv64 < 0) | 800 | if (delta.tv64 < 0) |
801 | return 0; | 801 | return 0; |
@@ -807,8 +807,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
807 | s64 incr = ktime_to_ns(interval); | 807 | s64 incr = ktime_to_ns(interval); |
808 | 808 | ||
809 | orun = ktime_divns(delta, incr); | 809 | orun = ktime_divns(delta, incr); |
810 | timer->expires = ktime_add_ns(timer->expires, incr * orun); | 810 | hrtimer_add_expires_ns(timer, incr * orun); |
811 | if (timer->expires.tv64 > now.tv64) | 811 | if (hrtimer_get_expires_tv64(timer) > now.tv64) |
812 | return orun; | 812 | return orun; |
813 | /* | 813 | /* |
814 | * This (and the ktime_add() below) is the | 814 | * This (and the ktime_add() below) is the |
@@ -816,7 +816,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
816 | */ | 816 | */ |
817 | orun++; | 817 | orun++; |
818 | } | 818 | } |
819 | timer->expires = ktime_add_safe(timer->expires, interval); | 819 | hrtimer_add_expires(timer, interval); |
820 | 820 | ||
821 | return orun; | 821 | return orun; |
822 | } | 822 | } |
@@ -848,7 +848,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, | |||
848 | * We dont care about collisions. Nodes with | 848 | * We dont care about collisions. Nodes with |
849 | * the same expiry time stay together. | 849 | * the same expiry time stay together. |
850 | */ | 850 | */ |
851 | if (timer->expires.tv64 < entry->expires.tv64) { | 851 | if (hrtimer_get_expires_tv64(timer) < |
852 | hrtimer_get_expires_tv64(entry)) { | ||
852 | link = &(*link)->rb_left; | 853 | link = &(*link)->rb_left; |
853 | } else { | 854 | } else { |
854 | link = &(*link)->rb_right; | 855 | link = &(*link)->rb_right; |
@@ -945,9 +946,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
945 | } | 946 | } |
946 | 947 | ||
947 | /** | 948 | /** |
948 | * hrtimer_start - (re)start an relative timer on the current CPU | 949 | * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU |
949 | * @timer: the timer to be added | 950 | * @timer: the timer to be added |
950 | * @tim: expiry time | 951 | * @tim: expiry time |
952 | * @delta_ns: "slack" range for the timer | ||
951 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | 953 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) |
952 | * | 954 | * |
953 | * Returns: | 955 | * Returns: |
@@ -955,7 +957,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
955 | * 1 when the timer was active | 957 | * 1 when the timer was active |
956 | */ | 958 | */ |
957 | int | 959 | int |
958 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | 960 | hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, |
961 | const enum hrtimer_mode mode) | ||
959 | { | 962 | { |
960 | struct hrtimer_clock_base *base, *new_base; | 963 | struct hrtimer_clock_base *base, *new_base; |
961 | unsigned long flags; | 964 | unsigned long flags; |
@@ -983,7 +986,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
983 | #endif | 986 | #endif |
984 | } | 987 | } |
985 | 988 | ||
986 | timer->expires = tim; | 989 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); |
987 | 990 | ||
988 | timer_stats_hrtimer_set_start_info(timer); | 991 | timer_stats_hrtimer_set_start_info(timer); |
989 | 992 | ||
@@ -1016,8 +1019,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
1016 | 1019 | ||
1017 | return ret; | 1020 | return ret; |
1018 | } | 1021 | } |
1022 | EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); | ||
1023 | |||
1024 | /** | ||
1025 | * hrtimer_start - (re)start an hrtimer on the current CPU | ||
1026 | * @timer: the timer to be added | ||
1027 | * @tim: expiry time | ||
1028 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | ||
1029 | * | ||
1030 | * Returns: | ||
1031 | * 0 on success | ||
1032 | * 1 when the timer was active | ||
1033 | */ | ||
1034 | int | ||
1035 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | ||
1036 | { | ||
1037 | return hrtimer_start_range_ns(timer, tim, 0, mode); | ||
1038 | } | ||
1019 | EXPORT_SYMBOL_GPL(hrtimer_start); | 1039 | EXPORT_SYMBOL_GPL(hrtimer_start); |
1020 | 1040 | ||
1041 | |||
1021 | /** | 1042 | /** |
1022 | * hrtimer_try_to_cancel - try to deactivate a timer | 1043 | * hrtimer_try_to_cancel - try to deactivate a timer |
1023 | * @timer: hrtimer to stop | 1044 | * @timer: hrtimer to stop |
@@ -1077,7 +1098,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | |||
1077 | ktime_t rem; | 1098 | ktime_t rem; |
1078 | 1099 | ||
1079 | base = lock_hrtimer_base(timer, &flags); | 1100 | base = lock_hrtimer_base(timer, &flags); |
1080 | rem = ktime_sub(timer->expires, base->get_time()); | 1101 | rem = hrtimer_expires_remaining(timer); |
1081 | unlock_hrtimer_base(timer, &flags); | 1102 | unlock_hrtimer_base(timer, &flags); |
1082 | 1103 | ||
1083 | return rem; | 1104 | return rem; |
@@ -1109,7 +1130,7 @@ ktime_t hrtimer_get_next_event(void) | |||
1109 | continue; | 1130 | continue; |
1110 | 1131 | ||
1111 | timer = rb_entry(base->first, struct hrtimer, node); | 1132 | timer = rb_entry(base->first, struct hrtimer, node); |
1112 | delta.tv64 = timer->expires.tv64; | 1133 | delta.tv64 = hrtimer_get_expires_tv64(timer); |
1113 | delta = ktime_sub(delta, base->get_time()); | 1134 | delta = ktime_sub(delta, base->get_time()); |
1114 | if (delta.tv64 < mindelta.tv64) | 1135 | if (delta.tv64 < mindelta.tv64) |
1115 | mindelta.tv64 = delta.tv64; | 1136 | mindelta.tv64 = delta.tv64; |
@@ -1310,10 +1331,23 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1310 | 1331 | ||
1311 | timer = rb_entry(node, struct hrtimer, node); | 1332 | timer = rb_entry(node, struct hrtimer, node); |
1312 | 1333 | ||
1313 | if (basenow.tv64 < timer->expires.tv64) { | 1334 | /* |
1335 | * The immediate goal for using the softexpires is | ||
1336 | * minimizing wakeups, not running timers at the | ||
1337 | * earliest interrupt after their soft expiration. | ||
1338 | * This allows us to avoid using a Priority Search | ||
1339 | * Tree, which can answer a stabbing querry for | ||
1340 | * overlapping intervals and instead use the simple | ||
1341 | * BST we already have. | ||
1342 | * We don't add extra wakeups by delaying timers that | ||
1343 | * are right-of a not yet expired timer, because that | ||
1344 | * timer will have to trigger a wakeup anyway. | ||
1345 | */ | ||
1346 | |||
1347 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { | ||
1314 | ktime_t expires; | 1348 | ktime_t expires; |
1315 | 1349 | ||
1316 | expires = ktime_sub(timer->expires, | 1350 | expires = ktime_sub(hrtimer_get_expires(timer), |
1317 | base->offset); | 1351 | base->offset); |
1318 | if (expires.tv64 < expires_next.tv64) | 1352 | if (expires.tv64 < expires_next.tv64) |
1319 | expires_next = expires; | 1353 | expires_next = expires; |
@@ -1349,6 +1383,30 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1349 | raise_softirq(HRTIMER_SOFTIRQ); | 1383 | raise_softirq(HRTIMER_SOFTIRQ); |
1350 | } | 1384 | } |
1351 | 1385 | ||
1386 | /** | ||
1387 | * hrtimer_peek_ahead_timers -- run soft-expired timers now | ||
1388 | * | ||
1389 | * hrtimer_peek_ahead_timers will peek at the timer queue of | ||
1390 | * the current cpu and check if there are any timers for which | ||
1391 | * the soft expires time has passed. If any such timers exist, | ||
1392 | * they are run immediately and then removed from the timer queue. | ||
1393 | * | ||
1394 | */ | ||
1395 | void hrtimer_peek_ahead_timers(void) | ||
1396 | { | ||
1397 | struct tick_device *td; | ||
1398 | unsigned long flags; | ||
1399 | |||
1400 | if (!hrtimer_hres_active()) | ||
1401 | return; | ||
1402 | |||
1403 | local_irq_save(flags); | ||
1404 | td = &__get_cpu_var(tick_cpu_device); | ||
1405 | if (td && td->evtdev) | ||
1406 | hrtimer_interrupt(td->evtdev); | ||
1407 | local_irq_restore(flags); | ||
1408 | } | ||
1409 | |||
1352 | static void run_hrtimer_softirq(struct softirq_action *h) | 1410 | static void run_hrtimer_softirq(struct softirq_action *h) |
1353 | { | 1411 | { |
1354 | run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); | 1412 | run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); |
@@ -1414,7 +1472,8 @@ void hrtimer_run_queues(void) | |||
1414 | struct hrtimer *timer; | 1472 | struct hrtimer *timer; |
1415 | 1473 | ||
1416 | timer = rb_entry(node, struct hrtimer, node); | 1474 | timer = rb_entry(node, struct hrtimer, node); |
1417 | if (base->softirq_time.tv64 <= timer->expires.tv64) | 1475 | if (base->softirq_time.tv64 <= |
1476 | hrtimer_get_expires_tv64(timer)) | ||
1418 | break; | 1477 | break; |
1419 | 1478 | ||
1420 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { | 1479 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { |
@@ -1462,7 +1521,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
1462 | 1521 | ||
1463 | do { | 1522 | do { |
1464 | set_current_state(TASK_INTERRUPTIBLE); | 1523 | set_current_state(TASK_INTERRUPTIBLE); |
1465 | hrtimer_start(&t->timer, t->timer.expires, mode); | 1524 | hrtimer_start_expires(&t->timer, mode); |
1466 | if (!hrtimer_active(&t->timer)) | 1525 | if (!hrtimer_active(&t->timer)) |
1467 | t->task = NULL; | 1526 | t->task = NULL; |
1468 | 1527 | ||
@@ -1484,7 +1543,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) | |||
1484 | struct timespec rmt; | 1543 | struct timespec rmt; |
1485 | ktime_t rem; | 1544 | ktime_t rem; |
1486 | 1545 | ||
1487 | rem = ktime_sub(timer->expires, timer->base->get_time()); | 1546 | rem = hrtimer_expires_remaining(timer); |
1488 | if (rem.tv64 <= 0) | 1547 | if (rem.tv64 <= 0) |
1489 | return 0; | 1548 | return 0; |
1490 | rmt = ktime_to_timespec(rem); | 1549 | rmt = ktime_to_timespec(rem); |
@@ -1503,7 +1562,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
1503 | 1562 | ||
1504 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, | 1563 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, |
1505 | HRTIMER_MODE_ABS); | 1564 | HRTIMER_MODE_ABS); |
1506 | t.timer.expires.tv64 = restart->nanosleep.expires; | 1565 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); |
1507 | 1566 | ||
1508 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | 1567 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
1509 | goto out; | 1568 | goto out; |
@@ -1528,9 +1587,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
1528 | struct restart_block *restart; | 1587 | struct restart_block *restart; |
1529 | struct hrtimer_sleeper t; | 1588 | struct hrtimer_sleeper t; |
1530 | int ret = 0; | 1589 | int ret = 0; |
1590 | unsigned long slack; | ||
1591 | |||
1592 | slack = current->timer_slack_ns; | ||
1593 | if (rt_task(current)) | ||
1594 | slack = 0; | ||
1531 | 1595 | ||
1532 | hrtimer_init_on_stack(&t.timer, clockid, mode); | 1596 | hrtimer_init_on_stack(&t.timer, clockid, mode); |
1533 | t.timer.expires = timespec_to_ktime(*rqtp); | 1597 | hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); |
1534 | if (do_nanosleep(&t, mode)) | 1598 | if (do_nanosleep(&t, mode)) |
1535 | goto out; | 1599 | goto out; |
1536 | 1600 | ||
@@ -1550,7 +1614,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
1550 | restart->fn = hrtimer_nanosleep_restart; | 1614 | restart->fn = hrtimer_nanosleep_restart; |
1551 | restart->nanosleep.index = t.timer.base->index; | 1615 | restart->nanosleep.index = t.timer.base->index; |
1552 | restart->nanosleep.rmtp = rmtp; | 1616 | restart->nanosleep.rmtp = rmtp; |
1553 | restart->nanosleep.expires = t.timer.expires.tv64; | 1617 | restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); |
1554 | 1618 | ||
1555 | ret = -ERESTART_RESTARTBLOCK; | 1619 | ret = -ERESTART_RESTARTBLOCK; |
1556 | out: | 1620 | out: |
@@ -1752,3 +1816,103 @@ void __init hrtimers_init(void) | |||
1752 | #endif | 1816 | #endif |
1753 | } | 1817 | } |
1754 | 1818 | ||
1819 | /** | ||
1820 | * schedule_hrtimeout_range - sleep until timeout | ||
1821 | * @expires: timeout value (ktime_t) | ||
1822 | * @delta: slack in expires timeout (ktime_t) | ||
1823 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
1824 | * | ||
1825 | * Make the current task sleep until the given expiry time has | ||
1826 | * elapsed. The routine will return immediately unless | ||
1827 | * the current task state has been set (see set_current_state()). | ||
1828 | * | ||
1829 | * The @delta argument gives the kernel the freedom to schedule the | ||
1830 | * actual wakeup to a time that is both power and performance friendly. | ||
1831 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
1832 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
1833 | * | ||
1834 | * You can set the task state as follows - | ||
1835 | * | ||
1836 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
1837 | * pass before the routine returns. | ||
1838 | * | ||
1839 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
1840 | * delivered to the current task. | ||
1841 | * | ||
1842 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
1843 | * routine returns. | ||
1844 | * | ||
1845 | * Returns 0 when the timer has expired otherwise -EINTR | ||
1846 | */ | ||
1847 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | ||
1848 | const enum hrtimer_mode mode) | ||
1849 | { | ||
1850 | struct hrtimer_sleeper t; | ||
1851 | |||
1852 | /* | ||
1853 | * Optimize when a zero timeout value is given. It does not | ||
1854 | * matter whether this is an absolute or a relative time. | ||
1855 | */ | ||
1856 | if (expires && !expires->tv64) { | ||
1857 | __set_current_state(TASK_RUNNING); | ||
1858 | return 0; | ||
1859 | } | ||
1860 | |||
1861 | /* | ||
1862 | * A NULL parameter means "inifinte" | ||
1863 | */ | ||
1864 | if (!expires) { | ||
1865 | schedule(); | ||
1866 | __set_current_state(TASK_RUNNING); | ||
1867 | return -EINTR; | ||
1868 | } | ||
1869 | |||
1870 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); | ||
1871 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | ||
1872 | |||
1873 | hrtimer_init_sleeper(&t, current); | ||
1874 | |||
1875 | hrtimer_start_expires(&t.timer, mode); | ||
1876 | if (!hrtimer_active(&t.timer)) | ||
1877 | t.task = NULL; | ||
1878 | |||
1879 | if (likely(t.task)) | ||
1880 | schedule(); | ||
1881 | |||
1882 | hrtimer_cancel(&t.timer); | ||
1883 | destroy_hrtimer_on_stack(&t.timer); | ||
1884 | |||
1885 | __set_current_state(TASK_RUNNING); | ||
1886 | |||
1887 | return !t.task ? 0 : -EINTR; | ||
1888 | } | ||
1889 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); | ||
1890 | |||
1891 | /** | ||
1892 | * schedule_hrtimeout - sleep until timeout | ||
1893 | * @expires: timeout value (ktime_t) | ||
1894 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
1895 | * | ||
1896 | * Make the current task sleep until the given expiry time has | ||
1897 | * elapsed. The routine will return immediately unless | ||
1898 | * the current task state has been set (see set_current_state()). | ||
1899 | * | ||
1900 | * You can set the task state as follows - | ||
1901 | * | ||
1902 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
1903 | * pass before the routine returns. | ||
1904 | * | ||
1905 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
1906 | * delivered to the current task. | ||
1907 | * | ||
1908 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
1909 | * routine returns. | ||
1910 | * | ||
1911 | * Returns 0 when the timer has expired otherwise -EINTR | ||
1912 | */ | ||
1913 | int __sched schedule_hrtimeout(ktime_t *expires, | ||
1914 | const enum hrtimer_mode mode) | ||
1915 | { | ||
1916 | return schedule_hrtimeout_range(expires, 0, mode); | ||
1917 | } | ||
1918 | EXPORT_SYMBOL_GPL(schedule_hrtimeout); | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 4895fde4eb93..10b5092e9bfe 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -76,6 +76,7 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
76 | desc->chip_data = NULL; | 76 | desc->chip_data = NULL; |
77 | desc->handle_irq = handle_bad_irq; | 77 | desc->handle_irq = handle_bad_irq; |
78 | desc->chip = &no_irq_chip; | 78 | desc->chip = &no_irq_chip; |
79 | desc->name = NULL; | ||
79 | spin_unlock_irqrestore(&desc->lock, flags); | 80 | spin_unlock_irqrestore(&desc->lock, flags); |
80 | } | 81 | } |
81 | 82 | ||
@@ -127,7 +128,7 @@ int set_irq_type(unsigned int irq, unsigned int type) | |||
127 | return 0; | 128 | return 0; |
128 | 129 | ||
129 | spin_lock_irqsave(&desc->lock, flags); | 130 | spin_lock_irqsave(&desc->lock, flags); |
130 | ret = __irq_set_trigger(desc, irq, flags); | 131 | ret = __irq_set_trigger(desc, irq, type); |
131 | spin_unlock_irqrestore(&desc->lock, flags); | 132 | spin_unlock_irqrestore(&desc->lock, flags); |
132 | return ret; | 133 | return ret; |
133 | } | 134 | } |
diff --git a/kernel/module.c b/kernel/module.c index 0d8d21ee792c..1f4cc00e0c20 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -20,11 +20,13 @@ | |||
20 | #include <linux/moduleloader.h> | 20 | #include <linux/moduleloader.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/kallsyms.h> | 22 | #include <linux/kallsyms.h> |
23 | #include <linux/fs.h> | ||
23 | #include <linux/sysfs.h> | 24 | #include <linux/sysfs.h> |
24 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
26 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
27 | #include <linux/elf.h> | 28 | #include <linux/elf.h> |
29 | #include <linux/proc_fs.h> | ||
28 | #include <linux/seq_file.h> | 30 | #include <linux/seq_file.h> |
29 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
30 | #include <linux/fcntl.h> | 32 | #include <linux/fcntl.h> |
@@ -42,6 +44,7 @@ | |||
42 | #include <linux/string.h> | 44 | #include <linux/string.h> |
43 | #include <linux/mutex.h> | 45 | #include <linux/mutex.h> |
44 | #include <linux/unwind.h> | 46 | #include <linux/unwind.h> |
47 | #include <linux/rculist.h> | ||
45 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
46 | #include <asm/cacheflush.h> | 49 | #include <asm/cacheflush.h> |
47 | #include <linux/license.h> | 50 | #include <linux/license.h> |
@@ -63,7 +66,7 @@ | |||
63 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 66 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
64 | 67 | ||
65 | /* List of modules, protected by module_mutex or preempt_disable | 68 | /* List of modules, protected by module_mutex or preempt_disable |
66 | * (add/delete uses stop_machine). */ | 69 | * (delete uses stop_machine/add uses RCU list operations). */ |
67 | static DEFINE_MUTEX(module_mutex); | 70 | static DEFINE_MUTEX(module_mutex); |
68 | static LIST_HEAD(modules); | 71 | static LIST_HEAD(modules); |
69 | 72 | ||
@@ -132,6 +135,29 @@ static unsigned int find_sec(Elf_Ehdr *hdr, | |||
132 | return 0; | 135 | return 0; |
133 | } | 136 | } |
134 | 137 | ||
138 | /* Find a module section, or NULL. */ | ||
139 | static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs, | ||
140 | const char *secstrings, const char *name) | ||
141 | { | ||
142 | /* Section 0 has sh_addr 0. */ | ||
143 | return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr; | ||
144 | } | ||
145 | |||
146 | /* Find a module section, or NULL. Fill in number of "objects" in section. */ | ||
147 | static void *section_objs(Elf_Ehdr *hdr, | ||
148 | Elf_Shdr *sechdrs, | ||
149 | const char *secstrings, | ||
150 | const char *name, | ||
151 | size_t object_size, | ||
152 | unsigned int *num) | ||
153 | { | ||
154 | unsigned int sec = find_sec(hdr, sechdrs, secstrings, name); | ||
155 | |||
156 | /* Section 0 has sh_addr 0 and sh_size 0. */ | ||
157 | *num = sechdrs[sec].sh_size / object_size; | ||
158 | return (void *)sechdrs[sec].sh_addr; | ||
159 | } | ||
160 | |||
135 | /* Provided by the linker */ | 161 | /* Provided by the linker */ |
136 | extern const struct kernel_symbol __start___ksymtab[]; | 162 | extern const struct kernel_symbol __start___ksymtab[]; |
137 | extern const struct kernel_symbol __stop___ksymtab[]; | 163 | extern const struct kernel_symbol __stop___ksymtab[]; |
@@ -218,7 +244,7 @@ static bool each_symbol(bool (*fn)(const struct symsearch *arr, | |||
218 | if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) | 244 | if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) |
219 | return true; | 245 | return true; |
220 | 246 | ||
221 | list_for_each_entry(mod, &modules, list) { | 247 | list_for_each_entry_rcu(mod, &modules, list) { |
222 | struct symsearch arr[] = { | 248 | struct symsearch arr[] = { |
223 | { mod->syms, mod->syms + mod->num_syms, mod->crcs, | 249 | { mod->syms, mod->syms + mod->num_syms, mod->crcs, |
224 | NOT_GPL_ONLY, false }, | 250 | NOT_GPL_ONLY, false }, |
@@ -1394,17 +1420,6 @@ static void mod_kobject_remove(struct module *mod) | |||
1394 | } | 1420 | } |
1395 | 1421 | ||
1396 | /* | 1422 | /* |
1397 | * link the module with the whole machine is stopped with interrupts off | ||
1398 | * - this defends against kallsyms not taking locks | ||
1399 | */ | ||
1400 | static int __link_module(void *_mod) | ||
1401 | { | ||
1402 | struct module *mod = _mod; | ||
1403 | list_add(&mod->list, &modules); | ||
1404 | return 0; | ||
1405 | } | ||
1406 | |||
1407 | /* | ||
1408 | * unlink the module with the whole machine is stopped with interrupts off | 1423 | * unlink the module with the whole machine is stopped with interrupts off |
1409 | * - this defends against kallsyms not taking locks | 1424 | * - this defends against kallsyms not taking locks |
1410 | */ | 1425 | */ |
@@ -1789,32 +1804,20 @@ static inline void add_kallsyms(struct module *mod, | |||
1789 | } | 1804 | } |
1790 | #endif /* CONFIG_KALLSYMS */ | 1805 | #endif /* CONFIG_KALLSYMS */ |
1791 | 1806 | ||
1792 | #ifdef CONFIG_DYNAMIC_PRINTK_DEBUG | 1807 | static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num) |
1793 | static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex) | ||
1794 | { | 1808 | { |
1795 | struct mod_debug *debug_info; | 1809 | #ifdef CONFIG_DYNAMIC_PRINTK_DEBUG |
1796 | unsigned long pos, end; | 1810 | unsigned int i; |
1797 | unsigned int num_verbose; | ||
1798 | |||
1799 | pos = sechdrs[verboseindex].sh_addr; | ||
1800 | num_verbose = sechdrs[verboseindex].sh_size / | ||
1801 | sizeof(struct mod_debug); | ||
1802 | end = pos + (num_verbose * sizeof(struct mod_debug)); | ||
1803 | 1811 | ||
1804 | for (; pos < end; pos += sizeof(struct mod_debug)) { | 1812 | for (i = 0; i < num; i++) { |
1805 | debug_info = (struct mod_debug *)pos; | 1813 | register_dynamic_debug_module(debug[i].modname, |
1806 | register_dynamic_debug_module(debug_info->modname, | 1814 | debug[i].type, |
1807 | debug_info->type, debug_info->logical_modname, | 1815 | debug[i].logical_modname, |
1808 | debug_info->flag_names, debug_info->hash, | 1816 | debug[i].flag_names, |
1809 | debug_info->hash2); | 1817 | debug[i].hash, debug[i].hash2); |
1810 | } | 1818 | } |
1811 | } | ||
1812 | #else | ||
1813 | static inline void dynamic_printk_setup(Elf_Shdr *sechdrs, | ||
1814 | unsigned int verboseindex) | ||
1815 | { | ||
1816 | } | ||
1817 | #endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ | 1819 | #endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ |
1820 | } | ||
1818 | 1821 | ||
1819 | static void *module_alloc_update_bounds(unsigned long size) | 1822 | static void *module_alloc_update_bounds(unsigned long size) |
1820 | { | 1823 | { |
@@ -1843,37 +1846,14 @@ static noinline struct module *load_module(void __user *umod, | |||
1843 | unsigned int i; | 1846 | unsigned int i; |
1844 | unsigned int symindex = 0; | 1847 | unsigned int symindex = 0; |
1845 | unsigned int strindex = 0; | 1848 | unsigned int strindex = 0; |
1846 | unsigned int setupindex; | 1849 | unsigned int modindex, versindex, infoindex, pcpuindex; |
1847 | unsigned int exindex; | ||
1848 | unsigned int exportindex; | ||
1849 | unsigned int modindex; | ||
1850 | unsigned int obsparmindex; | ||
1851 | unsigned int infoindex; | ||
1852 | unsigned int gplindex; | ||
1853 | unsigned int crcindex; | ||
1854 | unsigned int gplcrcindex; | ||
1855 | unsigned int versindex; | ||
1856 | unsigned int pcpuindex; | ||
1857 | unsigned int gplfutureindex; | ||
1858 | unsigned int gplfuturecrcindex; | ||
1859 | unsigned int unwindex = 0; | 1850 | unsigned int unwindex = 0; |
1860 | #ifdef CONFIG_UNUSED_SYMBOLS | 1851 | unsigned int num_kp, num_mcount; |
1861 | unsigned int unusedindex; | 1852 | struct kernel_param *kp; |
1862 | unsigned int unusedcrcindex; | ||
1863 | unsigned int unusedgplindex; | ||
1864 | unsigned int unusedgplcrcindex; | ||
1865 | #endif | ||
1866 | unsigned int markersindex; | ||
1867 | unsigned int markersstringsindex; | ||
1868 | unsigned int verboseindex; | ||
1869 | unsigned int tracepointsindex; | ||
1870 | unsigned int tracepointsstringsindex; | ||
1871 | unsigned int mcountindex; | ||
1872 | struct module *mod; | 1853 | struct module *mod; |
1873 | long err = 0; | 1854 | long err = 0; |
1874 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1855 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
1875 | void *mseg; | 1856 | unsigned long *mseg; |
1876 | struct exception_table_entry *extable; | ||
1877 | mm_segment_t old_fs; | 1857 | mm_segment_t old_fs; |
1878 | 1858 | ||
1879 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", | 1859 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", |
@@ -1937,6 +1917,7 @@ static noinline struct module *load_module(void __user *umod, | |||
1937 | err = -ENOEXEC; | 1917 | err = -ENOEXEC; |
1938 | goto free_hdr; | 1918 | goto free_hdr; |
1939 | } | 1919 | } |
1920 | /* This is temporary: point mod into copy of data. */ | ||
1940 | mod = (void *)sechdrs[modindex].sh_addr; | 1921 | mod = (void *)sechdrs[modindex].sh_addr; |
1941 | 1922 | ||
1942 | if (symindex == 0) { | 1923 | if (symindex == 0) { |
@@ -1946,22 +1927,6 @@ static noinline struct module *load_module(void __user *umod, | |||
1946 | goto free_hdr; | 1927 | goto free_hdr; |
1947 | } | 1928 | } |
1948 | 1929 | ||
1949 | /* Optional sections */ | ||
1950 | exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); | ||
1951 | gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); | ||
1952 | gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); | ||
1953 | crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); | ||
1954 | gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); | ||
1955 | gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); | ||
1956 | #ifdef CONFIG_UNUSED_SYMBOLS | ||
1957 | unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused"); | ||
1958 | unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl"); | ||
1959 | unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); | ||
1960 | unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); | ||
1961 | #endif | ||
1962 | setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); | ||
1963 | exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); | ||
1964 | obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); | ||
1965 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1930 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
1966 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1931 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
1967 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1932 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
@@ -2117,42 +2082,57 @@ static noinline struct module *load_module(void __user *umod, | |||
2117 | if (err < 0) | 2082 | if (err < 0) |
2118 | goto cleanup; | 2083 | goto cleanup; |
2119 | 2084 | ||
2120 | /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ | 2085 | /* Now we've got everything in the final locations, we can |
2121 | mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); | 2086 | * find optional sections. */ |
2122 | mod->syms = (void *)sechdrs[exportindex].sh_addr; | 2087 | kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*kp), |
2123 | if (crcindex) | 2088 | &num_kp); |
2124 | mod->crcs = (void *)sechdrs[crcindex].sh_addr; | 2089 | mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab", |
2125 | mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); | 2090 | sizeof(*mod->syms), &mod->num_syms); |
2126 | mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; | 2091 | mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab"); |
2127 | if (gplcrcindex) | 2092 | mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl", |
2128 | mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; | 2093 | sizeof(*mod->gpl_syms), |
2129 | mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / | 2094 | &mod->num_gpl_syms); |
2130 | sizeof(*mod->gpl_future_syms); | 2095 | mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl"); |
2131 | mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; | 2096 | mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings, |
2132 | if (gplfuturecrcindex) | 2097 | "__ksymtab_gpl_future", |
2133 | mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; | 2098 | sizeof(*mod->gpl_future_syms), |
2099 | &mod->num_gpl_future_syms); | ||
2100 | mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings, | ||
2101 | "__kcrctab_gpl_future"); | ||
2134 | 2102 | ||
2135 | #ifdef CONFIG_UNUSED_SYMBOLS | 2103 | #ifdef CONFIG_UNUSED_SYMBOLS |
2136 | mod->num_unused_syms = sechdrs[unusedindex].sh_size / | 2104 | mod->unused_syms = section_objs(hdr, sechdrs, secstrings, |
2137 | sizeof(*mod->unused_syms); | 2105 | "__ksymtab_unused", |
2138 | mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / | 2106 | sizeof(*mod->unused_syms), |
2139 | sizeof(*mod->unused_gpl_syms); | 2107 | &mod->num_unused_syms); |
2140 | mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; | 2108 | mod->unused_crcs = section_addr(hdr, sechdrs, secstrings, |
2141 | if (unusedcrcindex) | 2109 | "__kcrctab_unused"); |
2142 | mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; | 2110 | mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings, |
2143 | mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; | 2111 | "__ksymtab_unused_gpl", |
2144 | if (unusedgplcrcindex) | 2112 | sizeof(*mod->unused_gpl_syms), |
2145 | mod->unused_gpl_crcs | 2113 | &mod->num_unused_gpl_syms); |
2146 | = (void *)sechdrs[unusedgplcrcindex].sh_addr; | 2114 | mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, |
2115 | "__kcrctab_unused_gpl"); | ||
2116 | #endif | ||
2117 | |||
2118 | #ifdef CONFIG_MARKERS | ||
2119 | mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", | ||
2120 | sizeof(*mod->markers), &mod->num_markers); | ||
2121 | #endif | ||
2122 | #ifdef CONFIG_TRACEPOINTS | ||
2123 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, | ||
2124 | "__tracepoints", | ||
2125 | sizeof(*mod->tracepoints), | ||
2126 | &mod->num_tracepoints); | ||
2147 | #endif | 2127 | #endif |
2148 | 2128 | ||
2149 | #ifdef CONFIG_MODVERSIONS | 2129 | #ifdef CONFIG_MODVERSIONS |
2150 | if ((mod->num_syms && !crcindex) | 2130 | if ((mod->num_syms && !mod->crcs) |
2151 | || (mod->num_gpl_syms && !gplcrcindex) | 2131 | || (mod->num_gpl_syms && !mod->gpl_crcs) |
2152 | || (mod->num_gpl_future_syms && !gplfuturecrcindex) | 2132 | || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) |
2153 | #ifdef CONFIG_UNUSED_SYMBOLS | 2133 | #ifdef CONFIG_UNUSED_SYMBOLS |
2154 | || (mod->num_unused_syms && !unusedcrcindex) | 2134 | || (mod->num_unused_syms && !mod->unused_crcs) |
2155 | || (mod->num_unused_gpl_syms && !unusedgplcrcindex) | 2135 | || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) |
2156 | #endif | 2136 | #endif |
2157 | ) { | 2137 | ) { |
2158 | printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); | 2138 | printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); |
@@ -2161,16 +2141,6 @@ static noinline struct module *load_module(void __user *umod, | |||
2161 | goto cleanup; | 2141 | goto cleanup; |
2162 | } | 2142 | } |
2163 | #endif | 2143 | #endif |
2164 | markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); | ||
2165 | markersstringsindex = find_sec(hdr, sechdrs, secstrings, | ||
2166 | "__markers_strings"); | ||
2167 | verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose"); | ||
2168 | tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints"); | ||
2169 | tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings, | ||
2170 | "__tracepoints_strings"); | ||
2171 | |||
2172 | mcountindex = find_sec(hdr, sechdrs, secstrings, | ||
2173 | "__mcount_loc"); | ||
2174 | 2144 | ||
2175 | /* Now do relocations. */ | 2145 | /* Now do relocations. */ |
2176 | for (i = 1; i < hdr->e_shnum; i++) { | 2146 | for (i = 1; i < hdr->e_shnum; i++) { |
@@ -2193,28 +2163,16 @@ static noinline struct module *load_module(void __user *umod, | |||
2193 | if (err < 0) | 2163 | if (err < 0) |
2194 | goto cleanup; | 2164 | goto cleanup; |
2195 | } | 2165 | } |
2196 | #ifdef CONFIG_MARKERS | ||
2197 | mod->markers = (void *)sechdrs[markersindex].sh_addr; | ||
2198 | mod->num_markers = | ||
2199 | sechdrs[markersindex].sh_size / sizeof(*mod->markers); | ||
2200 | #endif | ||
2201 | #ifdef CONFIG_TRACEPOINTS | ||
2202 | mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr; | ||
2203 | mod->num_tracepoints = | ||
2204 | sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints); | ||
2205 | #endif | ||
2206 | |||
2207 | 2166 | ||
2208 | /* Find duplicate symbols */ | 2167 | /* Find duplicate symbols */ |
2209 | err = verify_export_symbols(mod); | 2168 | err = verify_export_symbols(mod); |
2210 | |||
2211 | if (err < 0) | 2169 | if (err < 0) |
2212 | goto cleanup; | 2170 | goto cleanup; |
2213 | 2171 | ||
2214 | /* Set up and sort exception table */ | 2172 | /* Set up and sort exception table */ |
2215 | mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); | 2173 | mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", |
2216 | mod->extable = extable = (void *)sechdrs[exindex].sh_addr; | 2174 | sizeof(*mod->extable), &mod->num_exentries); |
2217 | sort_extable(extable, extable + mod->num_exentries); | 2175 | sort_extable(mod->extable, mod->extable + mod->num_exentries); |
2218 | 2176 | ||
2219 | /* Finally, copy percpu area over. */ | 2177 | /* Finally, copy percpu area over. */ |
2220 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, | 2178 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, |
@@ -2223,11 +2181,17 @@ static noinline struct module *load_module(void __user *umod, | |||
2223 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); | 2181 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); |
2224 | 2182 | ||
2225 | if (!mod->taints) { | 2183 | if (!mod->taints) { |
2184 | struct mod_debug *debug; | ||
2185 | unsigned int num_debug; | ||
2186 | |||
2226 | #ifdef CONFIG_MARKERS | 2187 | #ifdef CONFIG_MARKERS |
2227 | marker_update_probe_range(mod->markers, | 2188 | marker_update_probe_range(mod->markers, |
2228 | mod->markers + mod->num_markers); | 2189 | mod->markers + mod->num_markers); |
2229 | #endif | 2190 | #endif |
2230 | dynamic_printk_setup(sechdrs, verboseindex); | 2191 | debug = section_objs(hdr, sechdrs, secstrings, "__verbose", |
2192 | sizeof(*debug), &num_debug); | ||
2193 | dynamic_printk_setup(debug, num_debug); | ||
2194 | |||
2231 | #ifdef CONFIG_TRACEPOINTS | 2195 | #ifdef CONFIG_TRACEPOINTS |
2232 | tracepoint_update_probe_range(mod->tracepoints, | 2196 | tracepoint_update_probe_range(mod->tracepoints, |
2233 | mod->tracepoints + mod->num_tracepoints); | 2197 | mod->tracepoints + mod->num_tracepoints); |
@@ -2235,8 +2199,9 @@ static noinline struct module *load_module(void __user *umod, | |||
2235 | } | 2199 | } |
2236 | 2200 | ||
2237 | /* sechdrs[0].sh_size is always zero */ | 2201 | /* sechdrs[0].sh_size is always zero */ |
2238 | mseg = (void *)sechdrs[mcountindex].sh_addr; | 2202 | mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", |
2239 | ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size); | 2203 | sizeof(*mseg), &num_mcount); |
2204 | ftrace_init_module(mseg, mseg + num_mcount); | ||
2240 | 2205 | ||
2241 | err = module_finalize(hdr, sechdrs, mod); | 2206 | err = module_finalize(hdr, sechdrs, mod); |
2242 | if (err < 0) | 2207 | if (err < 0) |
@@ -2261,30 +2226,24 @@ static noinline struct module *load_module(void __user *umod, | |||
2261 | set_fs(old_fs); | 2226 | set_fs(old_fs); |
2262 | 2227 | ||
2263 | mod->args = args; | 2228 | mod->args = args; |
2264 | if (obsparmindex) | 2229 | if (section_addr(hdr, sechdrs, secstrings, "__obsparm")) |
2265 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", | 2230 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", |
2266 | mod->name); | 2231 | mod->name); |
2267 | 2232 | ||
2268 | /* Now sew it into the lists so we can get lockdep and oops | 2233 | /* Now sew it into the lists so we can get lockdep and oops |
2269 | * info during argument parsing. Noone should access us, since | 2234 | * info during argument parsing. Noone should access us, since |
2270 | * strong_try_module_get() will fail. */ | 2235 | * strong_try_module_get() will fail. |
2271 | stop_machine(__link_module, mod, NULL); | 2236 | * lockdep/oops can run asynchronous, so use the RCU list insertion |
2272 | 2237 | * function to insert in a way safe to concurrent readers. | |
2273 | /* Size of section 0 is 0, so this works well if no params */ | 2238 | * The mutex protects against concurrent writers. |
2274 | err = parse_args(mod->name, mod->args, | 2239 | */ |
2275 | (struct kernel_param *) | 2240 | list_add_rcu(&mod->list, &modules); |
2276 | sechdrs[setupindex].sh_addr, | 2241 | |
2277 | sechdrs[setupindex].sh_size | 2242 | err = parse_args(mod->name, mod->args, kp, num_kp, NULL); |
2278 | / sizeof(struct kernel_param), | ||
2279 | NULL); | ||
2280 | if (err < 0) | 2243 | if (err < 0) |
2281 | goto unlink; | 2244 | goto unlink; |
2282 | 2245 | ||
2283 | err = mod_sysfs_setup(mod, | 2246 | err = mod_sysfs_setup(mod, kp, num_kp); |
2284 | (struct kernel_param *) | ||
2285 | sechdrs[setupindex].sh_addr, | ||
2286 | sechdrs[setupindex].sh_size | ||
2287 | / sizeof(struct kernel_param)); | ||
2288 | if (err < 0) | 2247 | if (err < 0) |
2289 | goto unlink; | 2248 | goto unlink; |
2290 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 2249 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
@@ -2473,7 +2432,7 @@ const char *module_address_lookup(unsigned long addr, | |||
2473 | const char *ret = NULL; | 2432 | const char *ret = NULL; |
2474 | 2433 | ||
2475 | preempt_disable(); | 2434 | preempt_disable(); |
2476 | list_for_each_entry(mod, &modules, list) { | 2435 | list_for_each_entry_rcu(mod, &modules, list) { |
2477 | if (within(addr, mod->module_init, mod->init_size) | 2436 | if (within(addr, mod->module_init, mod->init_size) |
2478 | || within(addr, mod->module_core, mod->core_size)) { | 2437 | || within(addr, mod->module_core, mod->core_size)) { |
2479 | if (modname) | 2438 | if (modname) |
@@ -2496,7 +2455,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) | |||
2496 | struct module *mod; | 2455 | struct module *mod; |
2497 | 2456 | ||
2498 | preempt_disable(); | 2457 | preempt_disable(); |
2499 | list_for_each_entry(mod, &modules, list) { | 2458 | list_for_each_entry_rcu(mod, &modules, list) { |
2500 | if (within(addr, mod->module_init, mod->init_size) || | 2459 | if (within(addr, mod->module_init, mod->init_size) || |
2501 | within(addr, mod->module_core, mod->core_size)) { | 2460 | within(addr, mod->module_core, mod->core_size)) { |
2502 | const char *sym; | 2461 | const char *sym; |
@@ -2520,7 +2479,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | |||
2520 | struct module *mod; | 2479 | struct module *mod; |
2521 | 2480 | ||
2522 | preempt_disable(); | 2481 | preempt_disable(); |
2523 | list_for_each_entry(mod, &modules, list) { | 2482 | list_for_each_entry_rcu(mod, &modules, list) { |
2524 | if (within(addr, mod->module_init, mod->init_size) || | 2483 | if (within(addr, mod->module_init, mod->init_size) || |
2525 | within(addr, mod->module_core, mod->core_size)) { | 2484 | within(addr, mod->module_core, mod->core_size)) { |
2526 | const char *sym; | 2485 | const char *sym; |
@@ -2547,7 +2506,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | |||
2547 | struct module *mod; | 2506 | struct module *mod; |
2548 | 2507 | ||
2549 | preempt_disable(); | 2508 | preempt_disable(); |
2550 | list_for_each_entry(mod, &modules, list) { | 2509 | list_for_each_entry_rcu(mod, &modules, list) { |
2551 | if (symnum < mod->num_symtab) { | 2510 | if (symnum < mod->num_symtab) { |
2552 | *value = mod->symtab[symnum].st_value; | 2511 | *value = mod->symtab[symnum].st_value; |
2553 | *type = mod->symtab[symnum].st_info; | 2512 | *type = mod->symtab[symnum].st_info; |
@@ -2590,7 +2549,7 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
2590 | ret = mod_find_symname(mod, colon+1); | 2549 | ret = mod_find_symname(mod, colon+1); |
2591 | *colon = ':'; | 2550 | *colon = ':'; |
2592 | } else { | 2551 | } else { |
2593 | list_for_each_entry(mod, &modules, list) | 2552 | list_for_each_entry_rcu(mod, &modules, list) |
2594 | if ((ret = mod_find_symname(mod, name)) != 0) | 2553 | if ((ret = mod_find_symname(mod, name)) != 0) |
2595 | break; | 2554 | break; |
2596 | } | 2555 | } |
@@ -2599,23 +2558,6 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
2599 | } | 2558 | } |
2600 | #endif /* CONFIG_KALLSYMS */ | 2559 | #endif /* CONFIG_KALLSYMS */ |
2601 | 2560 | ||
2602 | /* Called by the /proc file system to return a list of modules. */ | ||
2603 | static void *m_start(struct seq_file *m, loff_t *pos) | ||
2604 | { | ||
2605 | mutex_lock(&module_mutex); | ||
2606 | return seq_list_start(&modules, *pos); | ||
2607 | } | ||
2608 | |||
2609 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | ||
2610 | { | ||
2611 | return seq_list_next(p, &modules, pos); | ||
2612 | } | ||
2613 | |||
2614 | static void m_stop(struct seq_file *m, void *p) | ||
2615 | { | ||
2616 | mutex_unlock(&module_mutex); | ||
2617 | } | ||
2618 | |||
2619 | static char *module_flags(struct module *mod, char *buf) | 2561 | static char *module_flags(struct module *mod, char *buf) |
2620 | { | 2562 | { |
2621 | int bx = 0; | 2563 | int bx = 0; |
@@ -2649,6 +2591,24 @@ static char *module_flags(struct module *mod, char *buf) | |||
2649 | return buf; | 2591 | return buf; |
2650 | } | 2592 | } |
2651 | 2593 | ||
2594 | #ifdef CONFIG_PROC_FS | ||
2595 | /* Called by the /proc file system to return a list of modules. */ | ||
2596 | static void *m_start(struct seq_file *m, loff_t *pos) | ||
2597 | { | ||
2598 | mutex_lock(&module_mutex); | ||
2599 | return seq_list_start(&modules, *pos); | ||
2600 | } | ||
2601 | |||
2602 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | ||
2603 | { | ||
2604 | return seq_list_next(p, &modules, pos); | ||
2605 | } | ||
2606 | |||
2607 | static void m_stop(struct seq_file *m, void *p) | ||
2608 | { | ||
2609 | mutex_unlock(&module_mutex); | ||
2610 | } | ||
2611 | |||
2652 | static int m_show(struct seq_file *m, void *p) | 2612 | static int m_show(struct seq_file *m, void *p) |
2653 | { | 2613 | { |
2654 | struct module *mod = list_entry(p, struct module, list); | 2614 | struct module *mod = list_entry(p, struct module, list); |
@@ -2679,13 +2639,33 @@ static int m_show(struct seq_file *m, void *p) | |||
2679 | Where refcount is a number or -, and deps is a comma-separated list | 2639 | Where refcount is a number or -, and deps is a comma-separated list |
2680 | of depends or -. | 2640 | of depends or -. |
2681 | */ | 2641 | */ |
2682 | const struct seq_operations modules_op = { | 2642 | static const struct seq_operations modules_op = { |
2683 | .start = m_start, | 2643 | .start = m_start, |
2684 | .next = m_next, | 2644 | .next = m_next, |
2685 | .stop = m_stop, | 2645 | .stop = m_stop, |
2686 | .show = m_show | 2646 | .show = m_show |
2687 | }; | 2647 | }; |
2688 | 2648 | ||
2649 | static int modules_open(struct inode *inode, struct file *file) | ||
2650 | { | ||
2651 | return seq_open(file, &modules_op); | ||
2652 | } | ||
2653 | |||
2654 | static const struct file_operations proc_modules_operations = { | ||
2655 | .open = modules_open, | ||
2656 | .read = seq_read, | ||
2657 | .llseek = seq_lseek, | ||
2658 | .release = seq_release, | ||
2659 | }; | ||
2660 | |||
2661 | static int __init proc_modules_init(void) | ||
2662 | { | ||
2663 | proc_create("modules", 0, NULL, &proc_modules_operations); | ||
2664 | return 0; | ||
2665 | } | ||
2666 | module_init(proc_modules_init); | ||
2667 | #endif | ||
2668 | |||
2689 | /* Given an address, look for it in the module exception tables. */ | 2669 | /* Given an address, look for it in the module exception tables. */ |
2690 | const struct exception_table_entry *search_module_extables(unsigned long addr) | 2670 | const struct exception_table_entry *search_module_extables(unsigned long addr) |
2691 | { | 2671 | { |
@@ -2693,7 +2673,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
2693 | struct module *mod; | 2673 | struct module *mod; |
2694 | 2674 | ||
2695 | preempt_disable(); | 2675 | preempt_disable(); |
2696 | list_for_each_entry(mod, &modules, list) { | 2676 | list_for_each_entry_rcu(mod, &modules, list) { |
2697 | if (mod->num_exentries == 0) | 2677 | if (mod->num_exentries == 0) |
2698 | continue; | 2678 | continue; |
2699 | 2679 | ||
@@ -2719,7 +2699,7 @@ int is_module_address(unsigned long addr) | |||
2719 | 2699 | ||
2720 | preempt_disable(); | 2700 | preempt_disable(); |
2721 | 2701 | ||
2722 | list_for_each_entry(mod, &modules, list) { | 2702 | list_for_each_entry_rcu(mod, &modules, list) { |
2723 | if (within(addr, mod->module_core, mod->core_size)) { | 2703 | if (within(addr, mod->module_core, mod->core_size)) { |
2724 | preempt_enable(); | 2704 | preempt_enable(); |
2725 | return 1; | 2705 | return 1; |
@@ -2740,7 +2720,7 @@ struct module *__module_text_address(unsigned long addr) | |||
2740 | if (addr < module_addr_min || addr > module_addr_max) | 2720 | if (addr < module_addr_min || addr > module_addr_max) |
2741 | return NULL; | 2721 | return NULL; |
2742 | 2722 | ||
2743 | list_for_each_entry(mod, &modules, list) | 2723 | list_for_each_entry_rcu(mod, &modules, list) |
2744 | if (within(addr, mod->module_init, mod->init_text_size) | 2724 | if (within(addr, mod->module_init, mod->init_text_size) |
2745 | || within(addr, mod->module_core, mod->core_text_size)) | 2725 | || within(addr, mod->module_core, mod->core_text_size)) |
2746 | return mod; | 2726 | return mod; |
@@ -2765,8 +2745,11 @@ void print_modules(void) | |||
2765 | char buf[8]; | 2745 | char buf[8]; |
2766 | 2746 | ||
2767 | printk("Modules linked in:"); | 2747 | printk("Modules linked in:"); |
2768 | list_for_each_entry(mod, &modules, list) | 2748 | /* Most callers should already have preempt disabled, but make sure */ |
2749 | preempt_disable(); | ||
2750 | list_for_each_entry_rcu(mod, &modules, list) | ||
2769 | printk(" %s%s", mod->name, module_flags(mod, buf)); | 2751 | printk(" %s%s", mod->name, module_flags(mod, buf)); |
2752 | preempt_enable(); | ||
2770 | if (last_unloaded_module[0]) | 2753 | if (last_unloaded_module[0]) |
2771 | printk(" [last unloaded: %s]", last_unloaded_module); | 2754 | printk(" [last unloaded: %s]", last_unloaded_module); |
2772 | printk("\n"); | 2755 | printk("\n"); |
diff --git a/kernel/panic.c b/kernel/panic.c index bda561ef3cdf..6513aac8e992 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -34,13 +34,6 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); | |||
34 | 34 | ||
35 | EXPORT_SYMBOL(panic_notifier_list); | 35 | EXPORT_SYMBOL(panic_notifier_list); |
36 | 36 | ||
37 | static int __init panic_setup(char *str) | ||
38 | { | ||
39 | panic_timeout = simple_strtoul(str, NULL, 0); | ||
40 | return 1; | ||
41 | } | ||
42 | __setup("panic=", panic_setup); | ||
43 | |||
44 | static long no_blink(long time) | 37 | static long no_blink(long time) |
45 | { | 38 | { |
46 | return 0; | 39 | return 0; |
@@ -218,13 +211,6 @@ void add_taint(unsigned flag) | |||
218 | } | 211 | } |
219 | EXPORT_SYMBOL(add_taint); | 212 | EXPORT_SYMBOL(add_taint); |
220 | 213 | ||
221 | static int __init pause_on_oops_setup(char *str) | ||
222 | { | ||
223 | pause_on_oops = simple_strtoul(str, NULL, 0); | ||
224 | return 1; | ||
225 | } | ||
226 | __setup("pause_on_oops=", pause_on_oops_setup); | ||
227 | |||
228 | static void spin_msec(int msecs) | 214 | static void spin_msec(int msecs) |
229 | { | 215 | { |
230 | int i; | 216 | int i; |
@@ -384,3 +370,6 @@ void __stack_chk_fail(void) | |||
384 | } | 370 | } |
385 | EXPORT_SYMBOL(__stack_chk_fail); | 371 | EXPORT_SYMBOL(__stack_chk_fail); |
386 | #endif | 372 | #endif |
373 | |||
374 | core_param(panic, panic_timeout, int, 0644); | ||
375 | core_param(pause_on_oops, pause_on_oops, int, 0644); | ||
diff --git a/kernel/params.c b/kernel/params.c index afc46a23eb6d..a1e3025b19a9 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -373,6 +373,8 @@ int param_get_string(char *buffer, struct kernel_param *kp) | |||
373 | } | 373 | } |
374 | 374 | ||
375 | /* sysfs output in /sys/modules/XYZ/parameters/ */ | 375 | /* sysfs output in /sys/modules/XYZ/parameters/ */ |
376 | #define to_module_attr(n) container_of(n, struct module_attribute, attr); | ||
377 | #define to_module_kobject(n) container_of(n, struct module_kobject, kobj); | ||
376 | 378 | ||
377 | extern struct kernel_param __start___param[], __stop___param[]; | 379 | extern struct kernel_param __start___param[], __stop___param[]; |
378 | 380 | ||
@@ -384,6 +386,7 @@ struct param_attribute | |||
384 | 386 | ||
385 | struct module_param_attrs | 387 | struct module_param_attrs |
386 | { | 388 | { |
389 | unsigned int num; | ||
387 | struct attribute_group grp; | 390 | struct attribute_group grp; |
388 | struct param_attribute attrs[0]; | 391 | struct param_attribute attrs[0]; |
389 | }; | 392 | }; |
@@ -434,93 +437,120 @@ static ssize_t param_attr_store(struct module_attribute *mattr, | |||
434 | 437 | ||
435 | #ifdef CONFIG_SYSFS | 438 | #ifdef CONFIG_SYSFS |
436 | /* | 439 | /* |
437 | * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME | 440 | * add_sysfs_param - add a parameter to sysfs |
438 | * @mk: struct module_kobject (contains parent kobject) | 441 | * @mk: struct module_kobject |
439 | * @kparam: array of struct kernel_param, the actual parameter definitions | 442 | * @kparam: the actual parameter definition to add to sysfs |
440 | * @num_params: number of entries in array | 443 | * @name: name of parameter |
441 | * @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules" | ||
442 | * | 444 | * |
443 | * Create a kobject for a (per-module) group of parameters, and create files | 445 | * Create a kobject if for a (per-module) parameter if mp NULL, and |
444 | * in sysfs. A pointer to the param_kobject is returned on success, | 446 | * create file in sysfs. Returns an error on out of memory. Always cleans up |
445 | * NULL if there's no parameter to export, or other ERR_PTR(err). | 447 | * if there's an error. |
446 | */ | 448 | */ |
447 | static __modinit struct module_param_attrs * | 449 | static __modinit int add_sysfs_param(struct module_kobject *mk, |
448 | param_sysfs_setup(struct module_kobject *mk, | 450 | struct kernel_param *kp, |
449 | struct kernel_param *kparam, | 451 | const char *name) |
450 | unsigned int num_params, | ||
451 | unsigned int name_skip) | ||
452 | { | 452 | { |
453 | struct module_param_attrs *mp; | 453 | struct module_param_attrs *new; |
454 | unsigned int valid_attrs = 0; | 454 | struct attribute **attrs; |
455 | unsigned int i, size[2]; | 455 | int err, num; |
456 | struct param_attribute *pattr; | 456 | |
457 | struct attribute **gattr; | 457 | /* We don't bother calling this with invisible parameters. */ |
458 | int err; | 458 | BUG_ON(!kp->perm); |
459 | 459 | ||
460 | for (i=0; i<num_params; i++) { | 460 | if (!mk->mp) { |
461 | if (kparam[i].perm) | 461 | num = 0; |
462 | valid_attrs++; | 462 | attrs = NULL; |
463 | } else { | ||
464 | num = mk->mp->num; | ||
465 | attrs = mk->mp->grp.attrs; | ||
463 | } | 466 | } |
464 | 467 | ||
465 | if (!valid_attrs) | 468 | /* Enlarge. */ |
466 | return NULL; | 469 | new = krealloc(mk->mp, |
467 | 470 | sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1), | |
468 | size[0] = ALIGN(sizeof(*mp) + | 471 | GFP_KERNEL); |
469 | valid_attrs * sizeof(mp->attrs[0]), | 472 | if (!new) { |
470 | sizeof(mp->grp.attrs[0])); | 473 | kfree(mk->mp); |
471 | size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]); | 474 | err = -ENOMEM; |
472 | 475 | goto fail; | |
473 | mp = kzalloc(size[0] + size[1], GFP_KERNEL); | ||
474 | if (!mp) | ||
475 | return ERR_PTR(-ENOMEM); | ||
476 | |||
477 | mp->grp.name = "parameters"; | ||
478 | mp->grp.attrs = (void *)mp + size[0]; | ||
479 | |||
480 | pattr = &mp->attrs[0]; | ||
481 | gattr = &mp->grp.attrs[0]; | ||
482 | for (i = 0; i < num_params; i++) { | ||
483 | struct kernel_param *kp = &kparam[i]; | ||
484 | if (kp->perm) { | ||
485 | pattr->param = kp; | ||
486 | pattr->mattr.show = param_attr_show; | ||
487 | pattr->mattr.store = param_attr_store; | ||
488 | pattr->mattr.attr.name = (char *)&kp->name[name_skip]; | ||
489 | pattr->mattr.attr.mode = kp->perm; | ||
490 | *(gattr++) = &(pattr++)->mattr.attr; | ||
491 | } | ||
492 | } | 476 | } |
493 | *gattr = NULL; | 477 | attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL); |
494 | 478 | if (!attrs) { | |
495 | if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) { | 479 | err = -ENOMEM; |
496 | kfree(mp); | 480 | goto fail_free_new; |
497 | return ERR_PTR(err); | ||
498 | } | 481 | } |
499 | return mp; | 482 | |
483 | /* Sysfs wants everything zeroed. */ | ||
484 | memset(new, 0, sizeof(*new)); | ||
485 | memset(&new->attrs[num], 0, sizeof(new->attrs[num])); | ||
486 | memset(&attrs[num], 0, sizeof(attrs[num])); | ||
487 | new->grp.name = "parameters"; | ||
488 | new->grp.attrs = attrs; | ||
489 | |||
490 | /* Tack new one on the end. */ | ||
491 | new->attrs[num].param = kp; | ||
492 | new->attrs[num].mattr.show = param_attr_show; | ||
493 | new->attrs[num].mattr.store = param_attr_store; | ||
494 | new->attrs[num].mattr.attr.name = (char *)name; | ||
495 | new->attrs[num].mattr.attr.mode = kp->perm; | ||
496 | new->num = num+1; | ||
497 | |||
498 | /* Fix up all the pointers, since krealloc can move us */ | ||
499 | for (num = 0; num < new->num; num++) | ||
500 | new->grp.attrs[num] = &new->attrs[num].mattr.attr; | ||
501 | new->grp.attrs[num] = NULL; | ||
502 | |||
503 | mk->mp = new; | ||
504 | return 0; | ||
505 | |||
506 | fail_free_new: | ||
507 | kfree(new); | ||
508 | fail: | ||
509 | mk->mp = NULL; | ||
510 | return err; | ||
500 | } | 511 | } |
501 | 512 | ||
502 | #ifdef CONFIG_MODULES | 513 | #ifdef CONFIG_MODULES |
514 | static void free_module_param_attrs(struct module_kobject *mk) | ||
515 | { | ||
516 | kfree(mk->mp->grp.attrs); | ||
517 | kfree(mk->mp); | ||
518 | mk->mp = NULL; | ||
519 | } | ||
520 | |||
503 | /* | 521 | /* |
504 | * module_param_sysfs_setup - setup sysfs support for one module | 522 | * module_param_sysfs_setup - setup sysfs support for one module |
505 | * @mod: module | 523 | * @mod: module |
506 | * @kparam: module parameters (array) | 524 | * @kparam: module parameters (array) |
507 | * @num_params: number of module parameters | 525 | * @num_params: number of module parameters |
508 | * | 526 | * |
509 | * Adds sysfs entries for module parameters, and creates a link from | 527 | * Adds sysfs entries for module parameters under |
510 | * /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/ | 528 | * /sys/module/[mod->name]/parameters/ |
511 | */ | 529 | */ |
512 | int module_param_sysfs_setup(struct module *mod, | 530 | int module_param_sysfs_setup(struct module *mod, |
513 | struct kernel_param *kparam, | 531 | struct kernel_param *kparam, |
514 | unsigned int num_params) | 532 | unsigned int num_params) |
515 | { | 533 | { |
516 | struct module_param_attrs *mp; | 534 | int i, err; |
535 | bool params = false; | ||
536 | |||
537 | for (i = 0; i < num_params; i++) { | ||
538 | if (kparam[i].perm == 0) | ||
539 | continue; | ||
540 | err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name); | ||
541 | if (err) | ||
542 | return err; | ||
543 | params = true; | ||
544 | } | ||
517 | 545 | ||
518 | mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0); | 546 | if (!params) |
519 | if (IS_ERR(mp)) | 547 | return 0; |
520 | return PTR_ERR(mp); | ||
521 | 548 | ||
522 | mod->param_attrs = mp; | 549 | /* Create the param group. */ |
523 | return 0; | 550 | err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); |
551 | if (err) | ||
552 | free_module_param_attrs(&mod->mkobj); | ||
553 | return err; | ||
524 | } | 554 | } |
525 | 555 | ||
526 | /* | 556 | /* |
@@ -532,43 +562,55 @@ int module_param_sysfs_setup(struct module *mod, | |||
532 | */ | 562 | */ |
533 | void module_param_sysfs_remove(struct module *mod) | 563 | void module_param_sysfs_remove(struct module *mod) |
534 | { | 564 | { |
535 | if (mod->param_attrs) { | 565 | if (mod->mkobj.mp) { |
536 | sysfs_remove_group(&mod->mkobj.kobj, | 566 | sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); |
537 | &mod->param_attrs->grp); | ||
538 | /* We are positive that no one is using any param | 567 | /* We are positive that no one is using any param |
539 | * attrs at this point. Deallocate immediately. */ | 568 | * attrs at this point. Deallocate immediately. */ |
540 | kfree(mod->param_attrs); | 569 | free_module_param_attrs(&mod->mkobj); |
541 | mod->param_attrs = NULL; | ||
542 | } | 570 | } |
543 | } | 571 | } |
544 | #endif | 572 | #endif |
545 | 573 | ||
546 | /* | 574 | static void __init kernel_add_sysfs_param(const char *name, |
547 | * kernel_param_sysfs_setup - wrapper for built-in params support | 575 | struct kernel_param *kparam, |
548 | */ | 576 | unsigned int name_skip) |
549 | static void __init kernel_param_sysfs_setup(const char *name, | ||
550 | struct kernel_param *kparam, | ||
551 | unsigned int num_params, | ||
552 | unsigned int name_skip) | ||
553 | { | 577 | { |
554 | struct module_kobject *mk; | 578 | struct module_kobject *mk; |
555 | int ret; | 579 | struct kobject *kobj; |
580 | int err; | ||
556 | 581 | ||
557 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); | 582 | kobj = kset_find_obj(module_kset, name); |
558 | BUG_ON(!mk); | 583 | if (kobj) { |
559 | 584 | /* We already have one. Remove params so we can add more. */ | |
560 | mk->mod = THIS_MODULE; | 585 | mk = to_module_kobject(kobj); |
561 | mk->kobj.kset = module_kset; | 586 | /* We need to remove it before adding parameters. */ |
562 | ret = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); | 587 | sysfs_remove_group(&mk->kobj, &mk->mp->grp); |
563 | if (ret) { | 588 | } else { |
564 | kobject_put(&mk->kobj); | 589 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); |
565 | printk(KERN_ERR "Module '%s' failed to be added to sysfs, " | 590 | BUG_ON(!mk); |
566 | "error number %d\n", name, ret); | 591 | |
567 | printk(KERN_ERR "The system will be unstable now.\n"); | 592 | mk->mod = THIS_MODULE; |
568 | return; | 593 | mk->kobj.kset = module_kset; |
594 | err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, | ||
595 | "%s", name); | ||
596 | if (err) { | ||
597 | kobject_put(&mk->kobj); | ||
598 | printk(KERN_ERR "Module '%s' failed add to sysfs, " | ||
599 | "error number %d\n", name, err); | ||
600 | printk(KERN_ERR "The system will be unstable now.\n"); | ||
601 | return; | ||
602 | } | ||
603 | /* So that exit path is even. */ | ||
604 | kobject_get(&mk->kobj); | ||
569 | } | 605 | } |
570 | param_sysfs_setup(mk, kparam, num_params, name_skip); | 606 | |
607 | /* These should not fail at boot. */ | ||
608 | err = add_sysfs_param(mk, kparam, kparam->name + name_skip); | ||
609 | BUG_ON(err); | ||
610 | err = sysfs_create_group(&mk->kobj, &mk->mp->grp); | ||
611 | BUG_ON(err); | ||
571 | kobject_uevent(&mk->kobj, KOBJ_ADD); | 612 | kobject_uevent(&mk->kobj, KOBJ_ADD); |
613 | kobject_put(&mk->kobj); | ||
572 | } | 614 | } |
573 | 615 | ||
574 | /* | 616 | /* |
@@ -579,60 +621,36 @@ static void __init kernel_param_sysfs_setup(const char *name, | |||
579 | * The "module" name (KBUILD_MODNAME) is stored before a dot, the | 621 | * The "module" name (KBUILD_MODNAME) is stored before a dot, the |
580 | * "parameter" name is stored behind a dot in kernel_param->name. So, | 622 | * "parameter" name is stored behind a dot in kernel_param->name. So, |
581 | * extract the "module" name for all built-in kernel_param-eters, | 623 | * extract the "module" name for all built-in kernel_param-eters, |
582 | * and for all who have the same, call kernel_param_sysfs_setup. | 624 | * and for all who have the same, call kernel_add_sysfs_param. |
583 | */ | 625 | */ |
584 | static void __init param_sysfs_builtin(void) | 626 | static void __init param_sysfs_builtin(void) |
585 | { | 627 | { |
586 | struct kernel_param *kp, *kp_begin = NULL; | 628 | struct kernel_param *kp; |
587 | unsigned int i, name_len, count = 0; | 629 | unsigned int name_len; |
588 | char modname[MODULE_NAME_LEN + 1] = ""; | 630 | char modname[MODULE_NAME_LEN]; |
589 | 631 | ||
590 | for (i=0; i < __stop___param - __start___param; i++) { | 632 | for (kp = __start___param; kp < __stop___param; kp++) { |
591 | char *dot; | 633 | char *dot; |
592 | size_t max_name_len; | ||
593 | 634 | ||
594 | kp = &__start___param[i]; | 635 | if (kp->perm == 0) |
595 | max_name_len = | 636 | continue; |
596 | min_t(size_t, MODULE_NAME_LEN, strlen(kp->name)); | ||
597 | 637 | ||
598 | dot = memchr(kp->name, '.', max_name_len); | 638 | dot = strchr(kp->name, '.'); |
599 | if (!dot) { | 639 | if (!dot) { |
600 | DEBUGP("couldn't find period in first %d characters " | 640 | /* This happens for core_param() */ |
601 | "of %s\n", MODULE_NAME_LEN, kp->name); | 641 | strcpy(modname, "kernel"); |
602 | continue; | 642 | name_len = 0; |
603 | } | 643 | } else { |
604 | name_len = dot - kp->name; | 644 | name_len = dot - kp->name + 1; |
605 | 645 | strlcpy(modname, kp->name, name_len); | |
606 | /* new kbuild_modname? */ | ||
607 | if (strlen(modname) != name_len | ||
608 | || strncmp(modname, kp->name, name_len) != 0) { | ||
609 | /* add a new kobject for previous kernel_params. */ | ||
610 | if (count) | ||
611 | kernel_param_sysfs_setup(modname, | ||
612 | kp_begin, | ||
613 | count, | ||
614 | strlen(modname)+1); | ||
615 | |||
616 | strncpy(modname, kp->name, name_len); | ||
617 | modname[name_len] = '\0'; | ||
618 | count = 0; | ||
619 | kp_begin = kp; | ||
620 | } | 646 | } |
621 | count++; | 647 | kernel_add_sysfs_param(modname, kp, name_len); |
622 | } | 648 | } |
623 | |||
624 | /* last kernel_params need to be registered as well */ | ||
625 | if (count) | ||
626 | kernel_param_sysfs_setup(modname, kp_begin, count, | ||
627 | strlen(modname)+1); | ||
628 | } | 649 | } |
629 | 650 | ||
630 | 651 | ||
631 | /* module-related sysfs stuff */ | 652 | /* module-related sysfs stuff */ |
632 | 653 | ||
633 | #define to_module_attr(n) container_of(n, struct module_attribute, attr); | ||
634 | #define to_module_kobject(n) container_of(n, struct module_kobject, kobj); | ||
635 | |||
636 | static ssize_t module_attr_show(struct kobject *kobj, | 654 | static ssize_t module_attr_show(struct kobject *kobj, |
637 | struct attribute *attr, | 655 | struct attribute *attr, |
638 | char *buf) | 656 | char *buf) |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index b931d7cedbfa..5e79c662294b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -639,7 +639,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) | |||
639 | (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) | 639 | (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) |
640 | timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); | 640 | timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); |
641 | 641 | ||
642 | remaining = ktime_sub(timer->expires, now); | 642 | remaining = ktime_sub(hrtimer_get_expires(timer), now); |
643 | /* Return 0 only, when the timer is expired and not pending */ | 643 | /* Return 0 only, when the timer is expired and not pending */ |
644 | if (remaining.tv64 <= 0) { | 644 | if (remaining.tv64 <= 0) { |
645 | /* | 645 | /* |
@@ -733,7 +733,7 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
733 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); | 733 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); |
734 | timr->it.real.timer.function = posix_timer_fn; | 734 | timr->it.real.timer.function = posix_timer_fn; |
735 | 735 | ||
736 | timer->expires = timespec_to_ktime(new_setting->it_value); | 736 | hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value)); |
737 | 737 | ||
738 | /* Convert interval */ | 738 | /* Convert interval */ |
739 | timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); | 739 | timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); |
@@ -742,14 +742,12 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
742 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { | 742 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { |
743 | /* Setup correct expiry time for relative timers */ | 743 | /* Setup correct expiry time for relative timers */ |
744 | if (mode == HRTIMER_MODE_REL) { | 744 | if (mode == HRTIMER_MODE_REL) { |
745 | timer->expires = | 745 | hrtimer_add_expires(timer, timer->base->get_time()); |
746 | ktime_add_safe(timer->expires, | ||
747 | timer->base->get_time()); | ||
748 | } | 746 | } |
749 | return 0; | 747 | return 0; |
750 | } | 748 | } |
751 | 749 | ||
752 | hrtimer_start(timer, timer->expires, mode); | 750 | hrtimer_start_expires(timer, mode); |
753 | return 0; | 751 | return 0; |
754 | } | 752 | } |
755 | 753 | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 331f9836383f..c9d74083746f 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -651,7 +651,7 @@ static int software_resume(void) | |||
651 | pr_debug("PM: Preparing processes for restore.\n"); | 651 | pr_debug("PM: Preparing processes for restore.\n"); |
652 | error = prepare_processes(); | 652 | error = prepare_processes(); |
653 | if (error) { | 653 | if (error) { |
654 | swsusp_close(); | 654 | swsusp_close(FMODE_READ); |
655 | goto Done; | 655 | goto Done; |
656 | } | 656 | } |
657 | 657 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index acc0c101dbd5..46b5ec7a3afb 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -153,7 +153,7 @@ extern int swsusp_shrink_memory(void); | |||
153 | extern void swsusp_free(void); | 153 | extern void swsusp_free(void); |
154 | extern int swsusp_read(unsigned int *flags_p); | 154 | extern int swsusp_read(unsigned int *flags_p); |
155 | extern int swsusp_write(unsigned int flags); | 155 | extern int swsusp_write(unsigned int flags); |
156 | extern void swsusp_close(void); | 156 | extern void swsusp_close(fmode_t); |
157 | 157 | ||
158 | struct timeval; | 158 | struct timeval; |
159 | /* kernel/power/swsusp.c */ | 159 | /* kernel/power/swsusp.c */ |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 80ccac849e46..b7713b53d07a 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -172,13 +172,13 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
172 | return res; | 172 | return res; |
173 | 173 | ||
174 | root_swap = res; | 174 | root_swap = res; |
175 | res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR); | 175 | res = blkdev_get(resume_bdev, FMODE_WRITE); |
176 | if (res) | 176 | if (res) |
177 | return res; | 177 | return res; |
178 | 178 | ||
179 | res = set_blocksize(resume_bdev, PAGE_SIZE); | 179 | res = set_blocksize(resume_bdev, PAGE_SIZE); |
180 | if (res < 0) | 180 | if (res < 0) |
181 | blkdev_put(resume_bdev); | 181 | blkdev_put(resume_bdev, FMODE_WRITE); |
182 | 182 | ||
183 | return res; | 183 | return res; |
184 | } | 184 | } |
@@ -426,7 +426,7 @@ int swsusp_write(unsigned int flags) | |||
426 | 426 | ||
427 | release_swap_writer(&handle); | 427 | release_swap_writer(&handle); |
428 | out: | 428 | out: |
429 | swsusp_close(); | 429 | swsusp_close(FMODE_WRITE); |
430 | return error; | 430 | return error; |
431 | } | 431 | } |
432 | 432 | ||
@@ -574,7 +574,7 @@ int swsusp_read(unsigned int *flags_p) | |||
574 | error = load_image(&handle, &snapshot, header->pages - 1); | 574 | error = load_image(&handle, &snapshot, header->pages - 1); |
575 | release_swap_reader(&handle); | 575 | release_swap_reader(&handle); |
576 | 576 | ||
577 | blkdev_put(resume_bdev); | 577 | blkdev_put(resume_bdev, FMODE_READ); |
578 | 578 | ||
579 | if (!error) | 579 | if (!error) |
580 | pr_debug("PM: Image successfully loaded\n"); | 580 | pr_debug("PM: Image successfully loaded\n"); |
@@ -609,7 +609,7 @@ int swsusp_check(void) | |||
609 | return -EINVAL; | 609 | return -EINVAL; |
610 | } | 610 | } |
611 | if (error) | 611 | if (error) |
612 | blkdev_put(resume_bdev); | 612 | blkdev_put(resume_bdev, FMODE_READ); |
613 | else | 613 | else |
614 | pr_debug("PM: Signature found, resuming\n"); | 614 | pr_debug("PM: Signature found, resuming\n"); |
615 | } else { | 615 | } else { |
@@ -626,14 +626,14 @@ int swsusp_check(void) | |||
626 | * swsusp_close - close swap device. | 626 | * swsusp_close - close swap device. |
627 | */ | 627 | */ |
628 | 628 | ||
629 | void swsusp_close(void) | 629 | void swsusp_close(fmode_t mode) |
630 | { | 630 | { |
631 | if (IS_ERR(resume_bdev)) { | 631 | if (IS_ERR(resume_bdev)) { |
632 | pr_debug("PM: Image device not initialised\n"); | 632 | pr_debug("PM: Image device not initialised\n"); |
633 | return; | 633 | return; |
634 | } | 634 | } |
635 | 635 | ||
636 | blkdev_put(resume_bdev); | 636 | blkdev_put(resume_bdev, mode); /* move up */ |
637 | } | 637 | } |
638 | 638 | ||
639 | static int swsusp_header_init(void) | 639 | static int swsusp_header_init(void) |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 467d5940f624..ad63af8b2521 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -119,18 +119,19 @@ static void _rcu_barrier(enum rcu_barrier type) | |||
119 | /* Take cpucontrol mutex to protect against CPU hotplug */ | 119 | /* Take cpucontrol mutex to protect against CPU hotplug */ |
120 | mutex_lock(&rcu_barrier_mutex); | 120 | mutex_lock(&rcu_barrier_mutex); |
121 | init_completion(&rcu_barrier_completion); | 121 | init_completion(&rcu_barrier_completion); |
122 | atomic_set(&rcu_barrier_cpu_count, 0); | ||
123 | /* | 122 | /* |
124 | * The queueing of callbacks in all CPUs must be atomic with | 123 | * Initialize rcu_barrier_cpu_count to 1, then invoke |
125 | * respect to RCU, otherwise one CPU may queue a callback, | 124 | * rcu_barrier_func() on each CPU, so that each CPU also has |
126 | * wait for a grace period, decrement barrier count and call | 125 | * incremented rcu_barrier_cpu_count. Only then is it safe to |
127 | * complete(), while other CPUs have not yet queued anything. | 126 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU |
128 | * So, we need to make sure that grace periods cannot complete | 127 | * might complete its grace period before all of the other CPUs |
129 | * until all the callbacks are queued. | 128 | * did their increment, causing this function to return too |
129 | * early. | ||
130 | */ | 130 | */ |
131 | rcu_read_lock(); | 131 | atomic_set(&rcu_barrier_cpu_count, 1); |
132 | on_each_cpu(rcu_barrier_func, (void *)type, 1); | 132 | on_each_cpu(rcu_barrier_func, (void *)type, 1); |
133 | rcu_read_unlock(); | 133 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
134 | complete(&rcu_barrier_completion); | ||
134 | wait_for_completion(&rcu_barrier_completion); | 135 | wait_for_completion(&rcu_barrier_completion); |
135 | mutex_unlock(&rcu_barrier_mutex); | 136 | mutex_unlock(&rcu_barrier_mutex); |
136 | } | 137 | } |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 6522ae5b14a2..69d9cb921ffa 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -631,8 +631,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
631 | 631 | ||
632 | /* Setup the timer, when timeout != NULL */ | 632 | /* Setup the timer, when timeout != NULL */ |
633 | if (unlikely(timeout)) { | 633 | if (unlikely(timeout)) { |
634 | hrtimer_start(&timeout->timer, timeout->timer.expires, | 634 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); |
635 | HRTIMER_MODE_ABS); | ||
636 | if (!hrtimer_active(&timeout->timer)) | 635 | if (!hrtimer_active(&timeout->timer)) |
637 | timeout->task = NULL; | 636 | timeout->task = NULL; |
638 | } | 637 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index d906f72b42d2..6625c3c4b10d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/cpuset.h> | 55 | #include <linux/cpuset.h> |
56 | #include <linux/percpu.h> | 56 | #include <linux/percpu.h> |
57 | #include <linux/kthread.h> | 57 | #include <linux/kthread.h> |
58 | #include <linux/proc_fs.h> | ||
58 | #include <linux/seq_file.h> | 59 | #include <linux/seq_file.h> |
59 | #include <linux/sysctl.h> | 60 | #include <linux/sysctl.h> |
60 | #include <linux/syscalls.h> | 61 | #include <linux/syscalls.h> |
@@ -227,9 +228,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
227 | 228 | ||
228 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | 229 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); |
229 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | 230 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); |
230 | hrtimer_start(&rt_b->rt_period_timer, | 231 | hrtimer_start_expires(&rt_b->rt_period_timer, |
231 | rt_b->rt_period_timer.expires, | 232 | HRTIMER_MODE_ABS); |
232 | HRTIMER_MODE_ABS); | ||
233 | } | 233 | } |
234 | spin_unlock(&rt_b->rt_runtime_lock); | 234 | spin_unlock(&rt_b->rt_runtime_lock); |
235 | } | 235 | } |
@@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
819 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 819 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
820 | 820 | ||
821 | /* | 821 | /* |
822 | * Inject some fuzzyness into changing the per-cpu group shares | ||
823 | * this avoids remote rq-locks at the expense of fairness. | ||
824 | * default: 4 | ||
825 | */ | ||
826 | unsigned int sysctl_sched_shares_thresh = 4; | ||
827 | |||
828 | /* | ||
822 | * period over which we measure -rt task cpu usage in us. | 829 | * period over which we measure -rt task cpu usage in us. |
823 | * default: 1s | 830 | * default: 1s |
824 | */ | 831 | */ |
@@ -1064,7 +1071,7 @@ static void hrtick_start(struct rq *rq, u64 delay) | |||
1064 | struct hrtimer *timer = &rq->hrtick_timer; | 1071 | struct hrtimer *timer = &rq->hrtick_timer; |
1065 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); | 1072 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); |
1066 | 1073 | ||
1067 | timer->expires = time; | 1074 | hrtimer_set_expires(timer, time); |
1068 | 1075 | ||
1069 | if (rq == this_rq()) { | 1076 | if (rq == this_rq()) { |
1070 | hrtimer_restart(timer); | 1077 | hrtimer_restart(timer); |
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
1454 | * Calculate and set the cpu's group shares. | 1461 | * Calculate and set the cpu's group shares. |
1455 | */ | 1462 | */ |
1456 | static void | 1463 | static void |
1457 | __update_group_shares_cpu(struct task_group *tg, int cpu, | 1464 | update_group_shares_cpu(struct task_group *tg, int cpu, |
1458 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1465 | unsigned long sd_shares, unsigned long sd_rq_weight) |
1459 | { | 1466 | { |
1460 | int boost = 0; | 1467 | int boost = 0; |
1461 | unsigned long shares; | 1468 | unsigned long shares; |
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1486 | * | 1493 | * |
1487 | */ | 1494 | */ |
1488 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1495 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); |
1496 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
1489 | 1497 | ||
1490 | /* | 1498 | if (abs(shares - tg->se[cpu]->load.weight) > |
1491 | * record the actual number of shares, not the boosted amount. | 1499 | sysctl_sched_shares_thresh) { |
1492 | */ | 1500 | struct rq *rq = cpu_rq(cpu); |
1493 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1501 | unsigned long flags; |
1494 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1495 | 1502 | ||
1496 | if (shares < MIN_SHARES) | 1503 | spin_lock_irqsave(&rq->lock, flags); |
1497 | shares = MIN_SHARES; | 1504 | /* |
1498 | else if (shares > MAX_SHARES) | 1505 | * record the actual number of shares, not the boosted amount. |
1499 | shares = MAX_SHARES; | 1506 | */ |
1507 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1508 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1500 | 1509 | ||
1501 | __set_se_shares(tg->se[cpu], shares); | 1510 | __set_se_shares(tg->se[cpu], shares); |
1511 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1512 | } | ||
1502 | } | 1513 | } |
1503 | 1514 | ||
1504 | /* | 1515 | /* |
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1527 | if (!rq_weight) | 1538 | if (!rq_weight) |
1528 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | 1539 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; |
1529 | 1540 | ||
1530 | for_each_cpu_mask(i, sd->span) { | 1541 | for_each_cpu_mask(i, sd->span) |
1531 | struct rq *rq = cpu_rq(i); | 1542 | update_group_shares_cpu(tg, i, shares, rq_weight); |
1532 | unsigned long flags; | ||
1533 | |||
1534 | spin_lock_irqsave(&rq->lock, flags); | ||
1535 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
1536 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1537 | } | ||
1538 | 1543 | ||
1539 | return 0; | 1544 | return 0; |
1540 | } | 1545 | } |
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible: | |||
4443 | if (sched_feat(HRTICK)) | 4448 | if (sched_feat(HRTICK)) |
4444 | hrtick_clear(rq); | 4449 | hrtick_clear(rq); |
4445 | 4450 | ||
4446 | /* | 4451 | spin_lock_irq(&rq->lock); |
4447 | * Do the rq-clock update outside the rq lock: | ||
4448 | */ | ||
4449 | local_irq_disable(); | ||
4450 | update_rq_clock(rq); | 4452 | update_rq_clock(rq); |
4451 | spin_lock(&rq->lock); | ||
4452 | clear_tsk_need_resched(prev); | 4453 | clear_tsk_need_resched(prev); |
4453 | 4454 | ||
4454 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4455 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f604dae71316..9573c33688b8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL; | |||
73 | 73 | ||
74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
75 | 75 | ||
76 | static const struct sched_class fair_sched_class; | ||
77 | |||
76 | /************************************************************** | 78 | /************************************************************** |
77 | * CFS operations on generic schedulable entities: | 79 | * CFS operations on generic schedulable entities: |
78 | */ | 80 | */ |
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
334 | #endif | 336 | #endif |
335 | 337 | ||
336 | /* | 338 | /* |
337 | * delta *= w / rw | 339 | * delta *= P[w / rw] |
338 | */ | 340 | */ |
339 | static inline unsigned long | 341 | static inline unsigned long |
340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | 342 | calc_delta_weight(unsigned long delta, struct sched_entity *se) |
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se) | |||
348 | } | 350 | } |
349 | 351 | ||
350 | /* | 352 | /* |
351 | * delta *= rw / w | 353 | * delta /= w |
352 | */ | 354 | */ |
353 | static inline unsigned long | 355 | static inline unsigned long |
354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | 356 | calc_delta_fair(unsigned long delta, struct sched_entity *se) |
355 | { | 357 | { |
356 | for_each_sched_entity(se) { | 358 | if (unlikely(se->load.weight != NICE_0_LOAD)) |
357 | delta = calc_delta_mine(delta, | 359 | delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load); |
358 | cfs_rq_of(se)->load.weight, &se->load); | ||
359 | } | ||
360 | 360 | ||
361 | return delta; | 361 | return delta; |
362 | } | 362 | } |
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running) | |||
386 | * We calculate the wall-time slice from the period by taking a part | 386 | * We calculate the wall-time slice from the period by taking a part |
387 | * proportional to the weight. | 387 | * proportional to the weight. |
388 | * | 388 | * |
389 | * s = p*w/rw | 389 | * s = p*P[w/rw] |
390 | */ | 390 | */ |
391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
392 | { | 392 | { |
393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 393 | unsigned long nr_running = cfs_rq->nr_running; |
394 | |||
395 | if (unlikely(!se->on_rq)) | ||
396 | nr_running++; | ||
397 | |||
398 | return calc_delta_weight(__sched_period(nr_running), se); | ||
394 | } | 399 | } |
395 | 400 | ||
396 | /* | 401 | /* |
397 | * We calculate the vruntime slice of a to be inserted task | 402 | * We calculate the vruntime slice of a to be inserted task |
398 | * | 403 | * |
399 | * vs = s*rw/w = p | 404 | * vs = s/w |
400 | */ | 405 | */ |
401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 406 | static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
402 | { | 407 | { |
403 | unsigned long nr_running = cfs_rq->nr_running; | 408 | return calc_delta_fair(sched_slice(cfs_rq, se), se); |
404 | |||
405 | if (!se->on_rq) | ||
406 | nr_running++; | ||
407 | |||
408 | return __sched_period(nr_running); | ||
409 | } | 409 | } |
410 | 410 | ||
411 | /* | 411 | /* |
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
628 | * stays open at the end. | 628 | * stays open at the end. |
629 | */ | 629 | */ |
630 | if (initial && sched_feat(START_DEBIT)) | 630 | if (initial && sched_feat(START_DEBIT)) |
631 | vruntime += sched_vslice_add(cfs_rq, se); | 631 | vruntime += sched_vslice(cfs_rq, se); |
632 | 632 | ||
633 | if (!initial) { | 633 | if (!initial) { |
634 | /* sleeps upto a single latency don't count. */ | 634 | /* sleeps upto a single latency don't count. */ |
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
748 | struct rq *rq = rq_of(cfs_rq); | 748 | struct rq *rq = rq_of(cfs_rq); |
749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; | 749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; |
750 | 750 | ||
751 | if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { | 751 | if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { |
752 | cfs_rq->pair_start = rq->clock; | 752 | cfs_rq->pair_start = rq->clock; |
753 | return se; | 753 | return se; |
754 | } | 754 | } |
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
849 | hrtick_start(rq, delta); | 849 | hrtick_start(rq, delta); |
850 | } | 850 | } |
851 | } | 851 | } |
852 | |||
853 | /* | ||
854 | * called from enqueue/dequeue and updates the hrtick when the | ||
855 | * current task is from our class and nr_running is low enough | ||
856 | * to matter. | ||
857 | */ | ||
858 | static void hrtick_update(struct rq *rq) | ||
859 | { | ||
860 | struct task_struct *curr = rq->curr; | ||
861 | |||
862 | if (curr->sched_class != &fair_sched_class) | ||
863 | return; | ||
864 | |||
865 | if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency) | ||
866 | hrtick_start_fair(rq, curr); | ||
867 | } | ||
852 | #else /* !CONFIG_SCHED_HRTICK */ | 868 | #else /* !CONFIG_SCHED_HRTICK */ |
853 | static inline void | 869 | static inline void |
854 | hrtick_start_fair(struct rq *rq, struct task_struct *p) | 870 | hrtick_start_fair(struct rq *rq, struct task_struct *p) |
855 | { | 871 | { |
856 | } | 872 | } |
873 | |||
874 | static inline void hrtick_update(struct rq *rq) | ||
875 | { | ||
876 | } | ||
857 | #endif | 877 | #endif |
858 | 878 | ||
859 | /* | 879 | /* |
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
874 | wakeup = 1; | 894 | wakeup = 1; |
875 | } | 895 | } |
876 | 896 | ||
877 | hrtick_start_fair(rq, rq->curr); | 897 | hrtick_update(rq); |
878 | } | 898 | } |
879 | 899 | ||
880 | /* | 900 | /* |
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | |||
896 | sleep = 1; | 916 | sleep = 1; |
897 | } | 917 | } |
898 | 918 | ||
899 | hrtick_start_fair(rq, rq->curr); | 919 | hrtick_update(rq); |
900 | } | 920 | } |
901 | 921 | ||
902 | /* | 922 | /* |
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
1002 | 1022 | ||
1003 | #ifdef CONFIG_SMP | 1023 | #ifdef CONFIG_SMP |
1004 | 1024 | ||
1005 | static const struct sched_class fair_sched_class; | ||
1006 | |||
1007 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1025 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1008 | /* | 1026 | /* |
1009 | * effective_load() calculates the load change as seen from the root_task_group | 1027 | * effective_load() calculates the load change as seen from the root_task_group |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 7c9e8f4a049f..fda016218296 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1) | |||
5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) | 6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) |
7 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 7 | SCHED_FEAT(SYNC_WAKEUPS, 1) |
8 | SCHED_FEAT(HRTICK, 1) | 8 | SCHED_FEAT(HRTICK, 0) |
9 | SCHED_FEAT(DOUBLE_TICK, 0) | 9 | SCHED_FEAT(DOUBLE_TICK, 0) |
10 | SCHED_FEAT(ASYM_GRAN, 1) | 10 | SCHED_FEAT(ASYM_GRAN, 1) |
11 | SCHED_FEAT(LB_BIAS, 1) | 11 | SCHED_FEAT(LB_BIAS, 1) |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index b8c156979cf2..ee71bec1da66 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -9,7 +9,7 @@ | |||
9 | static int show_schedstat(struct seq_file *seq, void *v) | 9 | static int show_schedstat(struct seq_file *seq, void *v) |
10 | { | 10 | { |
11 | int cpu; | 11 | int cpu; |
12 | int mask_len = NR_CPUS/32 * 9; | 12 | int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; |
13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); | 13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); |
14 | 14 | ||
15 | if (mask_str == NULL) | 15 | if (mask_str == NULL) |
@@ -90,13 +90,20 @@ static int schedstat_open(struct inode *inode, struct file *file) | |||
90 | return res; | 90 | return res; |
91 | } | 91 | } |
92 | 92 | ||
93 | const struct file_operations proc_schedstat_operations = { | 93 | static const struct file_operations proc_schedstat_operations = { |
94 | .open = schedstat_open, | 94 | .open = schedstat_open, |
95 | .read = seq_read, | 95 | .read = seq_read, |
96 | .llseek = seq_lseek, | 96 | .llseek = seq_lseek, |
97 | .release = single_release, | 97 | .release = single_release, |
98 | }; | 98 | }; |
99 | 99 | ||
100 | static int __init proc_schedstat_init(void) | ||
101 | { | ||
102 | proc_create("schedstat", 0, NULL, &proc_schedstat_operations); | ||
103 | return 0; | ||
104 | } | ||
105 | module_init(proc_schedstat_init); | ||
106 | |||
100 | /* | 107 | /* |
101 | * Expects runqueue lock to be held for atomicity of update | 108 | * Expects runqueue lock to be held for atomicity of update |
102 | */ | 109 | */ |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index af3c7cea258b..9bc4c00872c9 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -37,9 +37,13 @@ struct stop_machine_data { | |||
37 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | 37 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ |
38 | static unsigned int num_threads; | 38 | static unsigned int num_threads; |
39 | static atomic_t thread_ack; | 39 | static atomic_t thread_ack; |
40 | static struct completion finished; | ||
41 | static DEFINE_MUTEX(lock); | 40 | static DEFINE_MUTEX(lock); |
42 | 41 | ||
42 | static struct workqueue_struct *stop_machine_wq; | ||
43 | static struct stop_machine_data active, idle; | ||
44 | static const cpumask_t *active_cpus; | ||
45 | static void *stop_machine_work; | ||
46 | |||
43 | static void set_state(enum stopmachine_state newstate) | 47 | static void set_state(enum stopmachine_state newstate) |
44 | { | 48 | { |
45 | /* Reset ack counter. */ | 49 | /* Reset ack counter. */ |
@@ -51,21 +55,26 @@ static void set_state(enum stopmachine_state newstate) | |||
51 | /* Last one to ack a state moves to the next state. */ | 55 | /* Last one to ack a state moves to the next state. */ |
52 | static void ack_state(void) | 56 | static void ack_state(void) |
53 | { | 57 | { |
54 | if (atomic_dec_and_test(&thread_ack)) { | 58 | if (atomic_dec_and_test(&thread_ack)) |
55 | /* If we're the last one to ack the EXIT, we're finished. */ | 59 | set_state(state + 1); |
56 | if (state == STOPMACHINE_EXIT) | ||
57 | complete(&finished); | ||
58 | else | ||
59 | set_state(state + 1); | ||
60 | } | ||
61 | } | 60 | } |
62 | 61 | ||
63 | /* This is the actual thread which stops the CPU. It exits by itself rather | 62 | /* This is the actual function which stops the CPU. It runs |
64 | * than waiting for kthread_stop(), because it's easier for hotplug CPU. */ | 63 | * in the context of a dedicated stopmachine workqueue. */ |
65 | static int stop_cpu(struct stop_machine_data *smdata) | 64 | static void stop_cpu(struct work_struct *unused) |
66 | { | 65 | { |
67 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 66 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
68 | 67 | struct stop_machine_data *smdata = &idle; | |
68 | int cpu = smp_processor_id(); | ||
69 | int err; | ||
70 | |||
71 | if (!active_cpus) { | ||
72 | if (cpu == first_cpu(cpu_online_map)) | ||
73 | smdata = &active; | ||
74 | } else { | ||
75 | if (cpu_isset(cpu, *active_cpus)) | ||
76 | smdata = &active; | ||
77 | } | ||
69 | /* Simple state machine */ | 78 | /* Simple state machine */ |
70 | do { | 79 | do { |
71 | /* Chill out and ensure we re-read stopmachine_state. */ | 80 | /* Chill out and ensure we re-read stopmachine_state. */ |
@@ -78,9 +87,11 @@ static int stop_cpu(struct stop_machine_data *smdata) | |||
78 | hard_irq_disable(); | 87 | hard_irq_disable(); |
79 | break; | 88 | break; |
80 | case STOPMACHINE_RUN: | 89 | case STOPMACHINE_RUN: |
81 | /* |= allows error detection if functions on | 90 | /* On multiple CPUs only a single error code |
82 | * multiple CPUs. */ | 91 | * is needed to tell that something failed. */ |
83 | smdata->fnret |= smdata->fn(smdata->data); | 92 | err = smdata->fn(smdata->data); |
93 | if (err) | ||
94 | smdata->fnret = err; | ||
84 | break; | 95 | break; |
85 | default: | 96 | default: |
86 | break; | 97 | break; |
@@ -90,7 +101,6 @@ static int stop_cpu(struct stop_machine_data *smdata) | |||
90 | } while (curstate != STOPMACHINE_EXIT); | 101 | } while (curstate != STOPMACHINE_EXIT); |
91 | 102 | ||
92 | local_irq_enable(); | 103 | local_irq_enable(); |
93 | do_exit(0); | ||
94 | } | 104 | } |
95 | 105 | ||
96 | /* Callback for CPUs which aren't supposed to do anything. */ | 106 | /* Callback for CPUs which aren't supposed to do anything. */ |
@@ -101,78 +111,34 @@ static int chill(void *unused) | |||
101 | 111 | ||
102 | int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) | 112 | int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) |
103 | { | 113 | { |
104 | int i, err; | 114 | struct work_struct *sm_work; |
105 | struct stop_machine_data active, idle; | 115 | int i; |
106 | struct task_struct **threads; | ||
107 | 116 | ||
117 | /* Set up initial state. */ | ||
118 | mutex_lock(&lock); | ||
119 | num_threads = num_online_cpus(); | ||
120 | active_cpus = cpus; | ||
108 | active.fn = fn; | 121 | active.fn = fn; |
109 | active.data = data; | 122 | active.data = data; |
110 | active.fnret = 0; | 123 | active.fnret = 0; |
111 | idle.fn = chill; | 124 | idle.fn = chill; |
112 | idle.data = NULL; | 125 | idle.data = NULL; |
113 | 126 | ||
114 | /* This could be too big for stack on large machines. */ | ||
115 | threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL); | ||
116 | if (!threads) | ||
117 | return -ENOMEM; | ||
118 | |||
119 | /* Set up initial state. */ | ||
120 | mutex_lock(&lock); | ||
121 | init_completion(&finished); | ||
122 | num_threads = num_online_cpus(); | ||
123 | set_state(STOPMACHINE_PREPARE); | 127 | set_state(STOPMACHINE_PREPARE); |
124 | 128 | ||
125 | for_each_online_cpu(i) { | 129 | /* Schedule the stop_cpu work on all cpus: hold this CPU so one |
126 | struct stop_machine_data *smdata = &idle; | ||
127 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
128 | |||
129 | if (!cpus) { | ||
130 | if (i == first_cpu(cpu_online_map)) | ||
131 | smdata = &active; | ||
132 | } else { | ||
133 | if (cpu_isset(i, *cpus)) | ||
134 | smdata = &active; | ||
135 | } | ||
136 | |||
137 | threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u", | ||
138 | i); | ||
139 | if (IS_ERR(threads[i])) { | ||
140 | err = PTR_ERR(threads[i]); | ||
141 | threads[i] = NULL; | ||
142 | goto kill_threads; | ||
143 | } | ||
144 | |||
145 | /* Place it onto correct cpu. */ | ||
146 | kthread_bind(threads[i], i); | ||
147 | |||
148 | /* Make it highest prio. */ | ||
149 | if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, ¶m)) | ||
150 | BUG(); | ||
151 | } | ||
152 | |||
153 | /* We've created all the threads. Wake them all: hold this CPU so one | ||
154 | * doesn't hit this CPU until we're ready. */ | 130 | * doesn't hit this CPU until we're ready. */ |
155 | get_cpu(); | 131 | get_cpu(); |
156 | for_each_online_cpu(i) | 132 | for_each_online_cpu(i) { |
157 | wake_up_process(threads[i]); | 133 | sm_work = percpu_ptr(stop_machine_work, i); |
158 | 134 | INIT_WORK(sm_work, stop_cpu); | |
135 | queue_work_on(i, stop_machine_wq, sm_work); | ||
136 | } | ||
159 | /* This will release the thread on our CPU. */ | 137 | /* This will release the thread on our CPU. */ |
160 | put_cpu(); | 138 | put_cpu(); |
161 | wait_for_completion(&finished); | 139 | flush_workqueue(stop_machine_wq); |
162 | mutex_unlock(&lock); | 140 | mutex_unlock(&lock); |
163 | |||
164 | kfree(threads); | ||
165 | |||
166 | return active.fnret; | 141 | return active.fnret; |
167 | |||
168 | kill_threads: | ||
169 | for_each_online_cpu(i) | ||
170 | if (threads[i]) | ||
171 | kthread_stop(threads[i]); | ||
172 | mutex_unlock(&lock); | ||
173 | |||
174 | kfree(threads); | ||
175 | return err; | ||
176 | } | 142 | } |
177 | 143 | ||
178 | int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) | 144 | int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) |
@@ -187,3 +153,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) | |||
187 | return ret; | 153 | return ret; |
188 | } | 154 | } |
189 | EXPORT_SYMBOL_GPL(stop_machine); | 155 | EXPORT_SYMBOL_GPL(stop_machine); |
156 | |||
157 | static int __init stop_machine_init(void) | ||
158 | { | ||
159 | stop_machine_wq = create_rt_workqueue("kstop"); | ||
160 | stop_machine_work = alloc_percpu(struct work_struct); | ||
161 | return 0; | ||
162 | } | ||
163 | core_initcall(stop_machine_init); | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 53879cdae483..31deba8f7d16 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1716,6 +1716,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1716 | case PR_SET_TSC: | 1716 | case PR_SET_TSC: |
1717 | error = SET_TSC_CTL(arg2); | 1717 | error = SET_TSC_CTL(arg2); |
1718 | break; | 1718 | break; |
1719 | case PR_GET_TIMERSLACK: | ||
1720 | error = current->timer_slack_ns; | ||
1721 | break; | ||
1722 | case PR_SET_TIMERSLACK: | ||
1723 | if (arg2 <= 0) | ||
1724 | current->timer_slack_ns = | ||
1725 | current->default_timer_slack_ns; | ||
1726 | else | ||
1727 | current->timer_slack_ns = arg2; | ||
1728 | break; | ||
1719 | default: | 1729 | default: |
1720 | error = -EINVAL; | 1730 | error = -EINVAL; |
1721 | break; | 1731 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index edb1075f80d2..9d048fa2d902 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = { | |||
276 | }, | 276 | }, |
277 | { | 277 | { |
278 | .ctl_name = CTL_UNNUMBERED, | 278 | .ctl_name = CTL_UNNUMBERED, |
279 | .procname = "sched_shares_thresh", | ||
280 | .data = &sysctl_sched_shares_thresh, | ||
281 | .maxlen = sizeof(unsigned int), | ||
282 | .mode = 0644, | ||
283 | .proc_handler = &proc_dointvec_minmax, | ||
284 | .strategy = &sysctl_intvec, | ||
285 | .extra1 = &zero, | ||
286 | }, | ||
287 | { | ||
288 | .ctl_name = CTL_UNNUMBERED, | ||
279 | .procname = "sched_child_runs_first", | 289 | .procname = "sched_child_runs_first", |
280 | .data = &sysctl_sched_child_runs_first, | 290 | .data = &sysctl_sched_child_runs_first, |
281 | .maxlen = sizeof(unsigned int), | 291 | .maxlen = sizeof(unsigned int), |
diff --git a/kernel/time.c b/kernel/time.c index 6a08660b4fac..d63a4336fad6 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64); | |||
669 | #endif | 669 | #endif |
670 | 670 | ||
671 | EXPORT_SYMBOL(jiffies); | 671 | EXPORT_SYMBOL(jiffies); |
672 | |||
673 | /* | ||
674 | * Add two timespec values and do a safety check for overflow. | ||
675 | * It's assumed that both values are valid (>= 0) | ||
676 | */ | ||
677 | struct timespec timespec_add_safe(const struct timespec lhs, | ||
678 | const struct timespec rhs) | ||
679 | { | ||
680 | struct timespec res; | ||
681 | |||
682 | set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec, | ||
683 | lhs.tv_nsec + rhs.tv_nsec); | ||
684 | |||
685 | if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec) | ||
686 | res.tv_sec = TIME_T_MAX; | ||
687 | |||
688 | return res; | ||
689 | } | ||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 1a20715bfd6e..8ff15e5d486b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -142,8 +142,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) | |||
142 | time_state = TIME_OOP; | 142 | time_state = TIME_OOP; |
143 | printk(KERN_NOTICE "Clock: " | 143 | printk(KERN_NOTICE "Clock: " |
144 | "inserting leap second 23:59:60 UTC\n"); | 144 | "inserting leap second 23:59:60 UTC\n"); |
145 | leap_timer.expires = ktime_add_ns(leap_timer.expires, | 145 | hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); |
146 | NSEC_PER_SEC); | ||
147 | res = HRTIMER_RESTART; | 146 | res = HRTIMER_RESTART; |
148 | break; | 147 | break; |
149 | case TIME_DEL: | 148 | case TIME_DEL: |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0581c11fe6c6..5bbb1044f847 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -300,7 +300,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
300 | goto out; | 300 | goto out; |
301 | } | 301 | } |
302 | 302 | ||
303 | ts->idle_tick = ts->sched_timer.expires; | 303 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
304 | ts->tick_stopped = 1; | 304 | ts->tick_stopped = 1; |
305 | ts->idle_jiffies = last_jiffies; | 305 | ts->idle_jiffies = last_jiffies; |
306 | rcu_enter_nohz(); | 306 | rcu_enter_nohz(); |
@@ -380,21 +380,21 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
380 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | 380 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) |
381 | { | 381 | { |
382 | hrtimer_cancel(&ts->sched_timer); | 382 | hrtimer_cancel(&ts->sched_timer); |
383 | ts->sched_timer.expires = ts->idle_tick; | 383 | hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); |
384 | 384 | ||
385 | while (1) { | 385 | while (1) { |
386 | /* Forward the time to expire in the future */ | 386 | /* Forward the time to expire in the future */ |
387 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 387 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
388 | 388 | ||
389 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 389 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
390 | hrtimer_start(&ts->sched_timer, | 390 | hrtimer_start_expires(&ts->sched_timer, |
391 | ts->sched_timer.expires, | ||
392 | HRTIMER_MODE_ABS); | 391 | HRTIMER_MODE_ABS); |
393 | /* Check, if the timer was already in the past */ | 392 | /* Check, if the timer was already in the past */ |
394 | if (hrtimer_active(&ts->sched_timer)) | 393 | if (hrtimer_active(&ts->sched_timer)) |
395 | break; | 394 | break; |
396 | } else { | 395 | } else { |
397 | if (!tick_program_event(ts->sched_timer.expires, 0)) | 396 | if (!tick_program_event( |
397 | hrtimer_get_expires(&ts->sched_timer), 0)) | ||
398 | break; | 398 | break; |
399 | } | 399 | } |
400 | /* Update jiffies and reread time */ | 400 | /* Update jiffies and reread time */ |
@@ -456,14 +456,16 @@ void tick_nohz_restart_sched_tick(void) | |||
456 | */ | 456 | */ |
457 | ts->tick_stopped = 0; | 457 | ts->tick_stopped = 0; |
458 | ts->idle_exittime = now; | 458 | ts->idle_exittime = now; |
459 | |||
459 | tick_nohz_restart(ts, now); | 460 | tick_nohz_restart(ts, now); |
461 | |||
460 | local_irq_enable(); | 462 | local_irq_enable(); |
461 | } | 463 | } |
462 | 464 | ||
463 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | 465 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) |
464 | { | 466 | { |
465 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 467 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
466 | return tick_program_event(ts->sched_timer.expires, 0); | 468 | return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); |
467 | } | 469 | } |
468 | 470 | ||
469 | /* | 471 | /* |
@@ -542,7 +544,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
542 | next = tick_init_jiffy_update(); | 544 | next = tick_init_jiffy_update(); |
543 | 545 | ||
544 | for (;;) { | 546 | for (;;) { |
545 | ts->sched_timer.expires = next; | 547 | hrtimer_set_expires(&ts->sched_timer, next); |
546 | if (!tick_program_event(next, 0)) | 548 | if (!tick_program_event(next, 0)) |
547 | break; | 549 | break; |
548 | next = ktime_add(next, tick_period); | 550 | next = ktime_add(next, tick_period); |
@@ -567,11 +569,21 @@ static void tick_nohz_switch_to_nohz(void) | |||
567 | static void tick_nohz_kick_tick(int cpu) | 569 | static void tick_nohz_kick_tick(int cpu) |
568 | { | 570 | { |
569 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 571 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
572 | ktime_t delta, now; | ||
570 | 573 | ||
571 | if (!ts->tick_stopped) | 574 | if (!ts->tick_stopped) |
572 | return; | 575 | return; |
573 | 576 | ||
574 | tick_nohz_restart(ts, ktime_get()); | 577 | /* |
578 | * Do not touch the tick device, when the next expiry is either | ||
579 | * already reached or less/equal than the tick period. | ||
580 | */ | ||
581 | now = ktime_get(); | ||
582 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); | ||
583 | if (delta.tv64 <= tick_period.tv64) | ||
584 | return; | ||
585 | |||
586 | tick_nohz_restart(ts, now); | ||
575 | } | 587 | } |
576 | 588 | ||
577 | #else | 589 | #else |
@@ -668,16 +680,15 @@ void tick_setup_sched_timer(void) | |||
668 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; | 680 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; |
669 | 681 | ||
670 | /* Get the next period (per cpu) */ | 682 | /* Get the next period (per cpu) */ |
671 | ts->sched_timer.expires = tick_init_jiffy_update(); | 683 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
672 | offset = ktime_to_ns(tick_period) >> 1; | 684 | offset = ktime_to_ns(tick_period) >> 1; |
673 | do_div(offset, num_possible_cpus()); | 685 | do_div(offset, num_possible_cpus()); |
674 | offset *= smp_processor_id(); | 686 | offset *= smp_processor_id(); |
675 | ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); | 687 | hrtimer_add_expires_ns(&ts->sched_timer, offset); |
676 | 688 | ||
677 | for (;;) { | 689 | for (;;) { |
678 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 690 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
679 | hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, | 691 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); |
680 | HRTIMER_MODE_ABS); | ||
681 | /* Check, if the timer was already in the past */ | 692 | /* Check, if the timer was already in the past */ |
682 | if (hrtimer_active(&ts->sched_timer)) | 693 | if (hrtimer_active(&ts->sched_timer)) |
683 | break; | 694 | break; |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index f6426911e35a..a999b92a1277 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -66,9 +66,11 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, | |||
66 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | 66 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); |
67 | #endif | 67 | #endif |
68 | SEQ_printf(m, "\n"); | 68 | SEQ_printf(m, "\n"); |
69 | SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", | 69 | SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", |
70 | (unsigned long long)ktime_to_ns(timer->expires), | 70 | (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), |
71 | (long long)(ktime_to_ns(timer->expires) - now)); | 71 | (unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)), |
72 | (long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now), | ||
73 | (long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now)); | ||
72 | } | 74 | } |
73 | 75 | ||
74 | static void | 76 | static void |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 714afad46539..f928f2a87b9b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -62,6 +62,7 @@ struct workqueue_struct { | |||
62 | const char *name; | 62 | const char *name; |
63 | int singlethread; | 63 | int singlethread; |
64 | int freezeable; /* Freeze threads during suspend */ | 64 | int freezeable; /* Freeze threads during suspend */ |
65 | int rt; | ||
65 | #ifdef CONFIG_LOCKDEP | 66 | #ifdef CONFIG_LOCKDEP |
66 | struct lockdep_map lockdep_map; | 67 | struct lockdep_map lockdep_map; |
67 | #endif | 68 | #endif |
@@ -766,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu) | |||
766 | 767 | ||
767 | static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | 768 | static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) |
768 | { | 769 | { |
770 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
769 | struct workqueue_struct *wq = cwq->wq; | 771 | struct workqueue_struct *wq = cwq->wq; |
770 | const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; | 772 | const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; |
771 | struct task_struct *p; | 773 | struct task_struct *p; |
@@ -781,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
781 | */ | 783 | */ |
782 | if (IS_ERR(p)) | 784 | if (IS_ERR(p)) |
783 | return PTR_ERR(p); | 785 | return PTR_ERR(p); |
784 | 786 | if (cwq->wq->rt) | |
787 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
785 | cwq->thread = p; | 788 | cwq->thread = p; |
786 | 789 | ||
787 | return 0; | 790 | return 0; |
@@ -801,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
801 | struct workqueue_struct *__create_workqueue_key(const char *name, | 804 | struct workqueue_struct *__create_workqueue_key(const char *name, |
802 | int singlethread, | 805 | int singlethread, |
803 | int freezeable, | 806 | int freezeable, |
807 | int rt, | ||
804 | struct lock_class_key *key, | 808 | struct lock_class_key *key, |
805 | const char *lock_name) | 809 | const char *lock_name) |
806 | { | 810 | { |
@@ -822,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
822 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); | 826 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); |
823 | wq->singlethread = singlethread; | 827 | wq->singlethread = singlethread; |
824 | wq->freezeable = freezeable; | 828 | wq->freezeable = freezeable; |
829 | wq->rt = rt; | ||
825 | INIT_LIST_HEAD(&wq->list); | 830 | INIT_LIST_HEAD(&wq->list); |
826 | 831 | ||
827 | if (singlethread) { | 832 | if (singlethread) { |