aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-27 05:50:54 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-27 05:50:54 -0400
commit4944dd62de21230af039eda7cd218e9a09021d11 (patch)
treebac70f7bab8506c7e1b0408bacbdb0b1d77262e9 /kernel
parentf17845e5d97ead8fbdadfd40039e058ec7cf4a42 (diff)
parent0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff)
Merge commit 'v2.6.28-rc2' into tracing/urgent
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c48
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/exec_domain.c33
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex.c11
-rw-r--r--kernel/hrtimer.c206
-rw-r--r--kernel/irq/chip.c3
-rw-r--r--kernel/module.c343
-rw-r--r--kernel/panic.c17
-rw-r--r--kernel/params.c276
-rw-r--r--kernel/posix-timers.c10
-rw-r--r--kernel/power/disk.c2
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/swap.c14
-rw-r--r--kernel/rcupdate.c19
-rw-r--r--kernel/rtmutex.c3
-rw-r--r--kernel/sched.c59
-rw-r--r--kernel/sched_fair.c62
-rw-r--r--kernel/sched_features.h2
-rw-r--r--kernel/sched_stats.h11
-rw-r--r--kernel/stop_machine.c120
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/time.c18
-rw-r--r--kernel/time/ntp.c3
-rw-r--r--kernel/time/tick-sched.c35
-rw-r--r--kernel/time/timer_list.c8
-rw-r--r--kernel/workqueue.c7
28 files changed, 785 insertions, 551 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f7921a2ecf16..8ba0e0d934f2 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -532,7 +532,7 @@ void audit_trim_trees(void)
532 list_add(&cursor, &tree_list); 532 list_add(&cursor, &tree_list);
533 while (cursor.next != &tree_list) { 533 while (cursor.next != &tree_list) {
534 struct audit_tree *tree; 534 struct audit_tree *tree;
535 struct nameidata nd; 535 struct path path;
536 struct vfsmount *root_mnt; 536 struct vfsmount *root_mnt;
537 struct node *node; 537 struct node *node;
538 struct list_head list; 538 struct list_head list;
@@ -544,12 +544,12 @@ void audit_trim_trees(void)
544 list_add(&cursor, &tree->list); 544 list_add(&cursor, &tree->list);
545 mutex_unlock(&audit_filter_mutex); 545 mutex_unlock(&audit_filter_mutex);
546 546
547 err = path_lookup(tree->pathname, 0, &nd); 547 err = kern_path(tree->pathname, 0, &path);
548 if (err) 548 if (err)
549 goto skip_it; 549 goto skip_it;
550 550
551 root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry); 551 root_mnt = collect_mounts(path.mnt, path.dentry);
552 path_put(&nd.path); 552 path_put(&path);
553 if (!root_mnt) 553 if (!root_mnt)
554 goto skip_it; 554 goto skip_it;
555 555
@@ -580,19 +580,19 @@ skip_it:
580} 580}
581 581
582static int is_under(struct vfsmount *mnt, struct dentry *dentry, 582static int is_under(struct vfsmount *mnt, struct dentry *dentry,
583 struct nameidata *nd) 583 struct path *path)
584{ 584{
585 if (mnt != nd->path.mnt) { 585 if (mnt != path->mnt) {
586 for (;;) { 586 for (;;) {
587 if (mnt->mnt_parent == mnt) 587 if (mnt->mnt_parent == mnt)
588 return 0; 588 return 0;
589 if (mnt->mnt_parent == nd->path.mnt) 589 if (mnt->mnt_parent == path->mnt)
590 break; 590 break;
591 mnt = mnt->mnt_parent; 591 mnt = mnt->mnt_parent;
592 } 592 }
593 dentry = mnt->mnt_mountpoint; 593 dentry = mnt->mnt_mountpoint;
594 } 594 }
595 return is_subdir(dentry, nd->path.dentry); 595 return is_subdir(dentry, path->dentry);
596} 596}
597 597
598int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) 598int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
@@ -618,7 +618,7 @@ void audit_put_tree(struct audit_tree *tree)
618int audit_add_tree_rule(struct audit_krule *rule) 618int audit_add_tree_rule(struct audit_krule *rule)
619{ 619{
620 struct audit_tree *seed = rule->tree, *tree; 620 struct audit_tree *seed = rule->tree, *tree;
621 struct nameidata nd; 621 struct path path;
622 struct vfsmount *mnt, *p; 622 struct vfsmount *mnt, *p;
623 struct list_head list; 623 struct list_head list;
624 int err; 624 int err;
@@ -637,11 +637,11 @@ int audit_add_tree_rule(struct audit_krule *rule)
637 /* do not set rule->tree yet */ 637 /* do not set rule->tree yet */
638 mutex_unlock(&audit_filter_mutex); 638 mutex_unlock(&audit_filter_mutex);
639 639
640 err = path_lookup(tree->pathname, 0, &nd); 640 err = kern_path(tree->pathname, 0, &path);
641 if (err) 641 if (err)
642 goto Err; 642 goto Err;
643 mnt = collect_mounts(nd.path.mnt, nd.path.dentry); 643 mnt = collect_mounts(path.mnt, path.dentry);
644 path_put(&nd.path); 644 path_put(&path);
645 if (!mnt) { 645 if (!mnt) {
646 err = -ENOMEM; 646 err = -ENOMEM;
647 goto Err; 647 goto Err;
@@ -690,29 +690,29 @@ int audit_tag_tree(char *old, char *new)
690{ 690{
691 struct list_head cursor, barrier; 691 struct list_head cursor, barrier;
692 int failed = 0; 692 int failed = 0;
693 struct nameidata nd; 693 struct path path;
694 struct vfsmount *tagged; 694 struct vfsmount *tagged;
695 struct list_head list; 695 struct list_head list;
696 struct vfsmount *mnt; 696 struct vfsmount *mnt;
697 struct dentry *dentry; 697 struct dentry *dentry;
698 int err; 698 int err;
699 699
700 err = path_lookup(new, 0, &nd); 700 err = kern_path(new, 0, &path);
701 if (err) 701 if (err)
702 return err; 702 return err;
703 tagged = collect_mounts(nd.path.mnt, nd.path.dentry); 703 tagged = collect_mounts(path.mnt, path.dentry);
704 path_put(&nd.path); 704 path_put(&path);
705 if (!tagged) 705 if (!tagged)
706 return -ENOMEM; 706 return -ENOMEM;
707 707
708 err = path_lookup(old, 0, &nd); 708 err = kern_path(old, 0, &path);
709 if (err) { 709 if (err) {
710 drop_collected_mounts(tagged); 710 drop_collected_mounts(tagged);
711 return err; 711 return err;
712 } 712 }
713 mnt = mntget(nd.path.mnt); 713 mnt = mntget(path.mnt);
714 dentry = dget(nd.path.dentry); 714 dentry = dget(path.dentry);
715 path_put(&nd.path); 715 path_put(&path);
716 716
717 if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) 717 if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
718 follow_up(&mnt, &dentry); 718 follow_up(&mnt, &dentry);
@@ -733,7 +733,7 @@ int audit_tag_tree(char *old, char *new)
733 list_add(&cursor, &tree->list); 733 list_add(&cursor, &tree->list);
734 mutex_unlock(&audit_filter_mutex); 734 mutex_unlock(&audit_filter_mutex);
735 735
736 err = path_lookup(tree->pathname, 0, &nd); 736 err = kern_path(tree->pathname, 0, &path);
737 if (err) { 737 if (err) {
738 put_tree(tree); 738 put_tree(tree);
739 mutex_lock(&audit_filter_mutex); 739 mutex_lock(&audit_filter_mutex);
@@ -741,15 +741,15 @@ int audit_tag_tree(char *old, char *new)
741 } 741 }
742 742
743 spin_lock(&vfsmount_lock); 743 spin_lock(&vfsmount_lock);
744 if (!is_under(mnt, dentry, &nd)) { 744 if (!is_under(mnt, dentry, &path)) {
745 spin_unlock(&vfsmount_lock); 745 spin_unlock(&vfsmount_lock);
746 path_put(&nd.path); 746 path_put(&path);
747 put_tree(tree); 747 put_tree(tree);
748 mutex_lock(&audit_filter_mutex); 748 mutex_lock(&audit_filter_mutex);
749 continue; 749 continue;
750 } 750 }
751 spin_unlock(&vfsmount_lock); 751 spin_unlock(&vfsmount_lock);
752 path_put(&nd.path); 752 path_put(&path);
753 753
754 list_for_each_entry(p, &list, mnt_list) { 754 list_for_each_entry(p, &list, mnt_list) {
755 failed = tag_chunk(p->mnt_root->d_inode, tree); 755 failed = tag_chunk(p->mnt_root->d_inode, tree);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 046c1609606b..35eebd5510c2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2104,7 +2104,7 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
2104 down_read(&cgrp->pids_mutex); 2104 down_read(&cgrp->pids_mutex);
2105 if (pid) { 2105 if (pid) {
2106 int end = cgrp->pids_length; 2106 int end = cgrp->pids_length;
2107 int i; 2107
2108 while (index < end) { 2108 while (index < end) {
2109 int mid = (index + end) / 2; 2109 int mid = (index + end) / 2;
2110 if (cgrp->tasks_pids[mid] == pid) { 2110 if (cgrp->tasks_pids[mid] == pid) {
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 0d407e886735..0511716e9424 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -12,7 +12,9 @@
12#include <linux/kmod.h> 12#include <linux/kmod.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/personality.h> 14#include <linux/personality.h>
15#include <linux/proc_fs.h>
15#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/seq_file.h>
16#include <linux/syscalls.h> 18#include <linux/syscalls.h>
17#include <linux/sysctl.h> 19#include <linux/sysctl.h>
18#include <linux/types.h> 20#include <linux/types.h>
@@ -173,20 +175,39 @@ __set_personality(u_long personality)
173 return 0; 175 return 0;
174} 176}
175 177
176int 178#ifdef CONFIG_PROC_FS
177get_exec_domain_list(char *page) 179static int execdomains_proc_show(struct seq_file *m, void *v)
178{ 180{
179 struct exec_domain *ep; 181 struct exec_domain *ep;
180 int len = 0;
181 182
182 read_lock(&exec_domains_lock); 183 read_lock(&exec_domains_lock);
183 for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next) 184 for (ep = exec_domains; ep; ep = ep->next)
184 len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n", 185 seq_printf(m, "%d-%d\t%-16s\t[%s]\n",
185 ep->pers_low, ep->pers_high, ep->name, 186 ep->pers_low, ep->pers_high, ep->name,
186 module_name(ep->module)); 187 module_name(ep->module));
187 read_unlock(&exec_domains_lock); 188 read_unlock(&exec_domains_lock);
188 return (len); 189 return 0;
190}
191
192static int execdomains_proc_open(struct inode *inode, struct file *file)
193{
194 return single_open(file, execdomains_proc_show, NULL);
195}
196
197static const struct file_operations execdomains_proc_fops = {
198 .open = execdomains_proc_open,
199 .read = seq_read,
200 .llseek = seq_lseek,
201 .release = single_release,
202};
203
204static int __init proc_execdomains_init(void)
205{
206 proc_create("execdomains", 0, NULL, &execdomains_proc_fops);
207 return 0;
189} 208}
209module_init(proc_execdomains_init);
210#endif
190 211
191asmlinkage long 212asmlinkage long
192sys_personality(u_long personality) 213sys_personality(u_long personality)
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d093552dd6e..f6083561dfe0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1018,6 +1018,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1018 p->prev_utime = cputime_zero; 1018 p->prev_utime = cputime_zero;
1019 p->prev_stime = cputime_zero; 1019 p->prev_stime = cputime_zero;
1020 1020
1021 p->default_timer_slack_ns = current->timer_slack_ns;
1022
1021#ifdef CONFIG_DETECT_SOFTLOCKUP 1023#ifdef CONFIG_DETECT_SOFTLOCKUP
1022 p->last_switch_count = 0; 1024 p->last_switch_count = 0;
1023 p->last_switch_timestamp = 0; 1025 p->last_switch_timestamp = 0;
diff --git a/kernel/futex.c b/kernel/futex.c
index 7d1136e97c14..8af10027514b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1296,13 +1296,16 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1296 if (!abs_time) 1296 if (!abs_time)
1297 schedule(); 1297 schedule();
1298 else { 1298 else {
1299 unsigned long slack;
1300 slack = current->timer_slack_ns;
1301 if (rt_task(current))
1302 slack = 0;
1299 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, 1303 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
1300 HRTIMER_MODE_ABS); 1304 HRTIMER_MODE_ABS);
1301 hrtimer_init_sleeper(&t, current); 1305 hrtimer_init_sleeper(&t, current);
1302 t.timer.expires = *abs_time; 1306 hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
1303 1307
1304 hrtimer_start(&t.timer, t.timer.expires, 1308 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
1305 HRTIMER_MODE_ABS);
1306 if (!hrtimer_active(&t.timer)) 1309 if (!hrtimer_active(&t.timer))
1307 t.task = NULL; 1310 t.task = NULL;
1308 1311
@@ -1404,7 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1404 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, 1407 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1405 HRTIMER_MODE_ABS); 1408 HRTIMER_MODE_ABS);
1406 hrtimer_init_sleeper(to, current); 1409 hrtimer_init_sleeper(to, current);
1407 to->timer.expires = *time; 1410 hrtimer_set_expires(&to->timer, *time);
1408 } 1411 }
1409 1412
1410 q.pi_state = NULL; 1413 q.pi_state = NULL;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 95978f48e039..2b465dfde426 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -517,7 +517,7 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
517 if (!base->first) 517 if (!base->first)
518 continue; 518 continue;
519 timer = rb_entry(base->first, struct hrtimer, node); 519 timer = rb_entry(base->first, struct hrtimer, node);
520 expires = ktime_sub(timer->expires, base->offset); 520 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
521 if (expires.tv64 < cpu_base->expires_next.tv64) 521 if (expires.tv64 < cpu_base->expires_next.tv64)
522 cpu_base->expires_next = expires; 522 cpu_base->expires_next = expires;
523 } 523 }
@@ -539,10 +539,10 @@ static int hrtimer_reprogram(struct hrtimer *timer,
539 struct hrtimer_clock_base *base) 539 struct hrtimer_clock_base *base)
540{ 540{
541 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; 541 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
542 ktime_t expires = ktime_sub(timer->expires, base->offset); 542 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
543 int res; 543 int res;
544 544
545 WARN_ON_ONCE(timer->expires.tv64 < 0); 545 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
546 546
547 /* 547 /*
548 * When the callback is running, we do not reprogram the clock event 548 * When the callback is running, we do not reprogram the clock event
@@ -795,7 +795,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
795 u64 orun = 1; 795 u64 orun = 1;
796 ktime_t delta; 796 ktime_t delta;
797 797
798 delta = ktime_sub(now, timer->expires); 798 delta = ktime_sub(now, hrtimer_get_expires(timer));
799 799
800 if (delta.tv64 < 0) 800 if (delta.tv64 < 0)
801 return 0; 801 return 0;
@@ -807,8 +807,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
807 s64 incr = ktime_to_ns(interval); 807 s64 incr = ktime_to_ns(interval);
808 808
809 orun = ktime_divns(delta, incr); 809 orun = ktime_divns(delta, incr);
810 timer->expires = ktime_add_ns(timer->expires, incr * orun); 810 hrtimer_add_expires_ns(timer, incr * orun);
811 if (timer->expires.tv64 > now.tv64) 811 if (hrtimer_get_expires_tv64(timer) > now.tv64)
812 return orun; 812 return orun;
813 /* 813 /*
814 * This (and the ktime_add() below) is the 814 * This (and the ktime_add() below) is the
@@ -816,7 +816,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
816 */ 816 */
817 orun++; 817 orun++;
818 } 818 }
819 timer->expires = ktime_add_safe(timer->expires, interval); 819 hrtimer_add_expires(timer, interval);
820 820
821 return orun; 821 return orun;
822} 822}
@@ -848,7 +848,8 @@ static void enqueue_hrtimer(struct hrtimer *timer,
848 * We dont care about collisions. Nodes with 848 * We dont care about collisions. Nodes with
849 * the same expiry time stay together. 849 * the same expiry time stay together.
850 */ 850 */
851 if (timer->expires.tv64 < entry->expires.tv64) { 851 if (hrtimer_get_expires_tv64(timer) <
852 hrtimer_get_expires_tv64(entry)) {
852 link = &(*link)->rb_left; 853 link = &(*link)->rb_left;
853 } else { 854 } else {
854 link = &(*link)->rb_right; 855 link = &(*link)->rb_right;
@@ -945,9 +946,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
945} 946}
946 947
947/** 948/**
948 * hrtimer_start - (re)start an relative timer on the current CPU 949 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
949 * @timer: the timer to be added 950 * @timer: the timer to be added
950 * @tim: expiry time 951 * @tim: expiry time
952 * @delta_ns: "slack" range for the timer
951 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 953 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
952 * 954 *
953 * Returns: 955 * Returns:
@@ -955,7 +957,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
955 * 1 when the timer was active 957 * 1 when the timer was active
956 */ 958 */
957int 959int
958hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) 960hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
961 const enum hrtimer_mode mode)
959{ 962{
960 struct hrtimer_clock_base *base, *new_base; 963 struct hrtimer_clock_base *base, *new_base;
961 unsigned long flags; 964 unsigned long flags;
@@ -983,7 +986,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
983#endif 986#endif
984 } 987 }
985 988
986 timer->expires = tim; 989 hrtimer_set_expires_range_ns(timer, tim, delta_ns);
987 990
988 timer_stats_hrtimer_set_start_info(timer); 991 timer_stats_hrtimer_set_start_info(timer);
989 992
@@ -1016,8 +1019,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1016 1019
1017 return ret; 1020 return ret;
1018} 1021}
1022EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1023
1024/**
1025 * hrtimer_start - (re)start an hrtimer on the current CPU
1026 * @timer: the timer to be added
1027 * @tim: expiry time
1028 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
1029 *
1030 * Returns:
1031 * 0 on success
1032 * 1 when the timer was active
1033 */
1034int
1035hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1036{
1037 return hrtimer_start_range_ns(timer, tim, 0, mode);
1038}
1019EXPORT_SYMBOL_GPL(hrtimer_start); 1039EXPORT_SYMBOL_GPL(hrtimer_start);
1020 1040
1041
1021/** 1042/**
1022 * hrtimer_try_to_cancel - try to deactivate a timer 1043 * hrtimer_try_to_cancel - try to deactivate a timer
1023 * @timer: hrtimer to stop 1044 * @timer: hrtimer to stop
@@ -1077,7 +1098,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
1077 ktime_t rem; 1098 ktime_t rem;
1078 1099
1079 base = lock_hrtimer_base(timer, &flags); 1100 base = lock_hrtimer_base(timer, &flags);
1080 rem = ktime_sub(timer->expires, base->get_time()); 1101 rem = hrtimer_expires_remaining(timer);
1081 unlock_hrtimer_base(timer, &flags); 1102 unlock_hrtimer_base(timer, &flags);
1082 1103
1083 return rem; 1104 return rem;
@@ -1109,7 +1130,7 @@ ktime_t hrtimer_get_next_event(void)
1109 continue; 1130 continue;
1110 1131
1111 timer = rb_entry(base->first, struct hrtimer, node); 1132 timer = rb_entry(base->first, struct hrtimer, node);
1112 delta.tv64 = timer->expires.tv64; 1133 delta.tv64 = hrtimer_get_expires_tv64(timer);
1113 delta = ktime_sub(delta, base->get_time()); 1134 delta = ktime_sub(delta, base->get_time());
1114 if (delta.tv64 < mindelta.tv64) 1135 if (delta.tv64 < mindelta.tv64)
1115 mindelta.tv64 = delta.tv64; 1136 mindelta.tv64 = delta.tv64;
@@ -1310,10 +1331,23 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1310 1331
1311 timer = rb_entry(node, struct hrtimer, node); 1332 timer = rb_entry(node, struct hrtimer, node);
1312 1333
1313 if (basenow.tv64 < timer->expires.tv64) { 1334 /*
1335 * The immediate goal for using the softexpires is
1336 * minimizing wakeups, not running timers at the
1337 * earliest interrupt after their soft expiration.
1338 * This allows us to avoid using a Priority Search
1339 * Tree, which can answer a stabbing querry for
1340 * overlapping intervals and instead use the simple
1341 * BST we already have.
1342 * We don't add extra wakeups by delaying timers that
1343 * are right-of a not yet expired timer, because that
1344 * timer will have to trigger a wakeup anyway.
1345 */
1346
1347 if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
1314 ktime_t expires; 1348 ktime_t expires;
1315 1349
1316 expires = ktime_sub(timer->expires, 1350 expires = ktime_sub(hrtimer_get_expires(timer),
1317 base->offset); 1351 base->offset);
1318 if (expires.tv64 < expires_next.tv64) 1352 if (expires.tv64 < expires_next.tv64)
1319 expires_next = expires; 1353 expires_next = expires;
@@ -1349,6 +1383,30 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1349 raise_softirq(HRTIMER_SOFTIRQ); 1383 raise_softirq(HRTIMER_SOFTIRQ);
1350} 1384}
1351 1385
1386/**
1387 * hrtimer_peek_ahead_timers -- run soft-expired timers now
1388 *
1389 * hrtimer_peek_ahead_timers will peek at the timer queue of
1390 * the current cpu and check if there are any timers for which
1391 * the soft expires time has passed. If any such timers exist,
1392 * they are run immediately and then removed from the timer queue.
1393 *
1394 */
1395void hrtimer_peek_ahead_timers(void)
1396{
1397 struct tick_device *td;
1398 unsigned long flags;
1399
1400 if (!hrtimer_hres_active())
1401 return;
1402
1403 local_irq_save(flags);
1404 td = &__get_cpu_var(tick_cpu_device);
1405 if (td && td->evtdev)
1406 hrtimer_interrupt(td->evtdev);
1407 local_irq_restore(flags);
1408}
1409
1352static void run_hrtimer_softirq(struct softirq_action *h) 1410static void run_hrtimer_softirq(struct softirq_action *h)
1353{ 1411{
1354 run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); 1412 run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
@@ -1414,7 +1472,8 @@ void hrtimer_run_queues(void)
1414 struct hrtimer *timer; 1472 struct hrtimer *timer;
1415 1473
1416 timer = rb_entry(node, struct hrtimer, node); 1474 timer = rb_entry(node, struct hrtimer, node);
1417 if (base->softirq_time.tv64 <= timer->expires.tv64) 1475 if (base->softirq_time.tv64 <=
1476 hrtimer_get_expires_tv64(timer))
1418 break; 1477 break;
1419 1478
1420 if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { 1479 if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
@@ -1462,7 +1521,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
1462 1521
1463 do { 1522 do {
1464 set_current_state(TASK_INTERRUPTIBLE); 1523 set_current_state(TASK_INTERRUPTIBLE);
1465 hrtimer_start(&t->timer, t->timer.expires, mode); 1524 hrtimer_start_expires(&t->timer, mode);
1466 if (!hrtimer_active(&t->timer)) 1525 if (!hrtimer_active(&t->timer))
1467 t->task = NULL; 1526 t->task = NULL;
1468 1527
@@ -1484,7 +1543,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
1484 struct timespec rmt; 1543 struct timespec rmt;
1485 ktime_t rem; 1544 ktime_t rem;
1486 1545
1487 rem = ktime_sub(timer->expires, timer->base->get_time()); 1546 rem = hrtimer_expires_remaining(timer);
1488 if (rem.tv64 <= 0) 1547 if (rem.tv64 <= 0)
1489 return 0; 1548 return 0;
1490 rmt = ktime_to_timespec(rem); 1549 rmt = ktime_to_timespec(rem);
@@ -1503,7 +1562,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1503 1562
1504 hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, 1563 hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
1505 HRTIMER_MODE_ABS); 1564 HRTIMER_MODE_ABS);
1506 t.timer.expires.tv64 = restart->nanosleep.expires; 1565 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1507 1566
1508 if (do_nanosleep(&t, HRTIMER_MODE_ABS)) 1567 if (do_nanosleep(&t, HRTIMER_MODE_ABS))
1509 goto out; 1568 goto out;
@@ -1528,9 +1587,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1528 struct restart_block *restart; 1587 struct restart_block *restart;
1529 struct hrtimer_sleeper t; 1588 struct hrtimer_sleeper t;
1530 int ret = 0; 1589 int ret = 0;
1590 unsigned long slack;
1591
1592 slack = current->timer_slack_ns;
1593 if (rt_task(current))
1594 slack = 0;
1531 1595
1532 hrtimer_init_on_stack(&t.timer, clockid, mode); 1596 hrtimer_init_on_stack(&t.timer, clockid, mode);
1533 t.timer.expires = timespec_to_ktime(*rqtp); 1597 hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
1534 if (do_nanosleep(&t, mode)) 1598 if (do_nanosleep(&t, mode))
1535 goto out; 1599 goto out;
1536 1600
@@ -1550,7 +1614,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1550 restart->fn = hrtimer_nanosleep_restart; 1614 restart->fn = hrtimer_nanosleep_restart;
1551 restart->nanosleep.index = t.timer.base->index; 1615 restart->nanosleep.index = t.timer.base->index;
1552 restart->nanosleep.rmtp = rmtp; 1616 restart->nanosleep.rmtp = rmtp;
1553 restart->nanosleep.expires = t.timer.expires.tv64; 1617 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
1554 1618
1555 ret = -ERESTART_RESTARTBLOCK; 1619 ret = -ERESTART_RESTARTBLOCK;
1556out: 1620out:
@@ -1752,3 +1816,103 @@ void __init hrtimers_init(void)
1752#endif 1816#endif
1753} 1817}
1754 1818
1819/**
1820 * schedule_hrtimeout_range - sleep until timeout
1821 * @expires: timeout value (ktime_t)
1822 * @delta: slack in expires timeout (ktime_t)
1823 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1824 *
1825 * Make the current task sleep until the given expiry time has
1826 * elapsed. The routine will return immediately unless
1827 * the current task state has been set (see set_current_state()).
1828 *
1829 * The @delta argument gives the kernel the freedom to schedule the
1830 * actual wakeup to a time that is both power and performance friendly.
1831 * The kernel give the normal best effort behavior for "@expires+@delta",
1832 * but may decide to fire the timer earlier, but no earlier than @expires.
1833 *
1834 * You can set the task state as follows -
1835 *
1836 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1837 * pass before the routine returns.
1838 *
1839 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1840 * delivered to the current task.
1841 *
1842 * The current task state is guaranteed to be TASK_RUNNING when this
1843 * routine returns.
1844 *
1845 * Returns 0 when the timer has expired otherwise -EINTR
1846 */
1847int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1848 const enum hrtimer_mode mode)
1849{
1850 struct hrtimer_sleeper t;
1851
1852 /*
1853 * Optimize when a zero timeout value is given. It does not
1854 * matter whether this is an absolute or a relative time.
1855 */
1856 if (expires && !expires->tv64) {
1857 __set_current_state(TASK_RUNNING);
1858 return 0;
1859 }
1860
1861 /*
1862 * A NULL parameter means "inifinte"
1863 */
1864 if (!expires) {
1865 schedule();
1866 __set_current_state(TASK_RUNNING);
1867 return -EINTR;
1868 }
1869
1870 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
1871 hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
1872
1873 hrtimer_init_sleeper(&t, current);
1874
1875 hrtimer_start_expires(&t.timer, mode);
1876 if (!hrtimer_active(&t.timer))
1877 t.task = NULL;
1878
1879 if (likely(t.task))
1880 schedule();
1881
1882 hrtimer_cancel(&t.timer);
1883 destroy_hrtimer_on_stack(&t.timer);
1884
1885 __set_current_state(TASK_RUNNING);
1886
1887 return !t.task ? 0 : -EINTR;
1888}
1889EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
1890
1891/**
1892 * schedule_hrtimeout - sleep until timeout
1893 * @expires: timeout value (ktime_t)
1894 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1895 *
1896 * Make the current task sleep until the given expiry time has
1897 * elapsed. The routine will return immediately unless
1898 * the current task state has been set (see set_current_state()).
1899 *
1900 * You can set the task state as follows -
1901 *
1902 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1903 * pass before the routine returns.
1904 *
1905 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1906 * delivered to the current task.
1907 *
1908 * The current task state is guaranteed to be TASK_RUNNING when this
1909 * routine returns.
1910 *
1911 * Returns 0 when the timer has expired otherwise -EINTR
1912 */
1913int __sched schedule_hrtimeout(ktime_t *expires,
1914 const enum hrtimer_mode mode)
1915{
1916 return schedule_hrtimeout_range(expires, 0, mode);
1917}
1918EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4895fde4eb93..10b5092e9bfe 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -76,6 +76,7 @@ void dynamic_irq_cleanup(unsigned int irq)
76 desc->chip_data = NULL; 76 desc->chip_data = NULL;
77 desc->handle_irq = handle_bad_irq; 77 desc->handle_irq = handle_bad_irq;
78 desc->chip = &no_irq_chip; 78 desc->chip = &no_irq_chip;
79 desc->name = NULL;
79 spin_unlock_irqrestore(&desc->lock, flags); 80 spin_unlock_irqrestore(&desc->lock, flags);
80} 81}
81 82
@@ -127,7 +128,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
127 return 0; 128 return 0;
128 129
129 spin_lock_irqsave(&desc->lock, flags); 130 spin_lock_irqsave(&desc->lock, flags);
130 ret = __irq_set_trigger(desc, irq, flags); 131 ret = __irq_set_trigger(desc, irq, type);
131 spin_unlock_irqrestore(&desc->lock, flags); 132 spin_unlock_irqrestore(&desc->lock, flags);
132 return ret; 133 return ret;
133} 134}
diff --git a/kernel/module.c b/kernel/module.c
index 0d8d21ee792c..1f4cc00e0c20 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -20,11 +20,13 @@
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/kallsyms.h> 22#include <linux/kallsyms.h>
23#include <linux/fs.h>
23#include <linux/sysfs.h> 24#include <linux/sysfs.h>
24#include <linux/kernel.h> 25#include <linux/kernel.h>
25#include <linux/slab.h> 26#include <linux/slab.h>
26#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
27#include <linux/elf.h> 28#include <linux/elf.h>
29#include <linux/proc_fs.h>
28#include <linux/seq_file.h> 30#include <linux/seq_file.h>
29#include <linux/syscalls.h> 31#include <linux/syscalls.h>
30#include <linux/fcntl.h> 32#include <linux/fcntl.h>
@@ -42,6 +44,7 @@
42#include <linux/string.h> 44#include <linux/string.h>
43#include <linux/mutex.h> 45#include <linux/mutex.h>
44#include <linux/unwind.h> 46#include <linux/unwind.h>
47#include <linux/rculist.h>
45#include <asm/uaccess.h> 48#include <asm/uaccess.h>
46#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
47#include <linux/license.h> 50#include <linux/license.h>
@@ -63,7 +66,7 @@
63#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 66#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
64 67
65/* List of modules, protected by module_mutex or preempt_disable 68/* List of modules, protected by module_mutex or preempt_disable
66 * (add/delete uses stop_machine). */ 69 * (delete uses stop_machine/add uses RCU list operations). */
67static DEFINE_MUTEX(module_mutex); 70static DEFINE_MUTEX(module_mutex);
68static LIST_HEAD(modules); 71static LIST_HEAD(modules);
69 72
@@ -132,6 +135,29 @@ static unsigned int find_sec(Elf_Ehdr *hdr,
132 return 0; 135 return 0;
133} 136}
134 137
138/* Find a module section, or NULL. */
139static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs,
140 const char *secstrings, const char *name)
141{
142 /* Section 0 has sh_addr 0. */
143 return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr;
144}
145
146/* Find a module section, or NULL. Fill in number of "objects" in section. */
147static void *section_objs(Elf_Ehdr *hdr,
148 Elf_Shdr *sechdrs,
149 const char *secstrings,
150 const char *name,
151 size_t object_size,
152 unsigned int *num)
153{
154 unsigned int sec = find_sec(hdr, sechdrs, secstrings, name);
155
156 /* Section 0 has sh_addr 0 and sh_size 0. */
157 *num = sechdrs[sec].sh_size / object_size;
158 return (void *)sechdrs[sec].sh_addr;
159}
160
135/* Provided by the linker */ 161/* Provided by the linker */
136extern const struct kernel_symbol __start___ksymtab[]; 162extern const struct kernel_symbol __start___ksymtab[];
137extern const struct kernel_symbol __stop___ksymtab[]; 163extern const struct kernel_symbol __stop___ksymtab[];
@@ -218,7 +244,7 @@ static bool each_symbol(bool (*fn)(const struct symsearch *arr,
218 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) 244 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
219 return true; 245 return true;
220 246
221 list_for_each_entry(mod, &modules, list) { 247 list_for_each_entry_rcu(mod, &modules, list) {
222 struct symsearch arr[] = { 248 struct symsearch arr[] = {
223 { mod->syms, mod->syms + mod->num_syms, mod->crcs, 249 { mod->syms, mod->syms + mod->num_syms, mod->crcs,
224 NOT_GPL_ONLY, false }, 250 NOT_GPL_ONLY, false },
@@ -1394,17 +1420,6 @@ static void mod_kobject_remove(struct module *mod)
1394} 1420}
1395 1421
1396/* 1422/*
1397 * link the module with the whole machine is stopped with interrupts off
1398 * - this defends against kallsyms not taking locks
1399 */
1400static int __link_module(void *_mod)
1401{
1402 struct module *mod = _mod;
1403 list_add(&mod->list, &modules);
1404 return 0;
1405}
1406
1407/*
1408 * unlink the module with the whole machine is stopped with interrupts off 1423 * unlink the module with the whole machine is stopped with interrupts off
1409 * - this defends against kallsyms not taking locks 1424 * - this defends against kallsyms not taking locks
1410 */ 1425 */
@@ -1789,32 +1804,20 @@ static inline void add_kallsyms(struct module *mod,
1789} 1804}
1790#endif /* CONFIG_KALLSYMS */ 1805#endif /* CONFIG_KALLSYMS */
1791 1806
1792#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG 1807static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num)
1793static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex)
1794{ 1808{
1795 struct mod_debug *debug_info; 1809#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
1796 unsigned long pos, end; 1810 unsigned int i;
1797 unsigned int num_verbose;
1798
1799 pos = sechdrs[verboseindex].sh_addr;
1800 num_verbose = sechdrs[verboseindex].sh_size /
1801 sizeof(struct mod_debug);
1802 end = pos + (num_verbose * sizeof(struct mod_debug));
1803 1811
1804 for (; pos < end; pos += sizeof(struct mod_debug)) { 1812 for (i = 0; i < num; i++) {
1805 debug_info = (struct mod_debug *)pos; 1813 register_dynamic_debug_module(debug[i].modname,
1806 register_dynamic_debug_module(debug_info->modname, 1814 debug[i].type,
1807 debug_info->type, debug_info->logical_modname, 1815 debug[i].logical_modname,
1808 debug_info->flag_names, debug_info->hash, 1816 debug[i].flag_names,
1809 debug_info->hash2); 1817 debug[i].hash, debug[i].hash2);
1810 } 1818 }
1811}
1812#else
1813static inline void dynamic_printk_setup(Elf_Shdr *sechdrs,
1814 unsigned int verboseindex)
1815{
1816}
1817#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ 1819#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
1820}
1818 1821
1819static void *module_alloc_update_bounds(unsigned long size) 1822static void *module_alloc_update_bounds(unsigned long size)
1820{ 1823{
@@ -1843,37 +1846,14 @@ static noinline struct module *load_module(void __user *umod,
1843 unsigned int i; 1846 unsigned int i;
1844 unsigned int symindex = 0; 1847 unsigned int symindex = 0;
1845 unsigned int strindex = 0; 1848 unsigned int strindex = 0;
1846 unsigned int setupindex; 1849 unsigned int modindex, versindex, infoindex, pcpuindex;
1847 unsigned int exindex;
1848 unsigned int exportindex;
1849 unsigned int modindex;
1850 unsigned int obsparmindex;
1851 unsigned int infoindex;
1852 unsigned int gplindex;
1853 unsigned int crcindex;
1854 unsigned int gplcrcindex;
1855 unsigned int versindex;
1856 unsigned int pcpuindex;
1857 unsigned int gplfutureindex;
1858 unsigned int gplfuturecrcindex;
1859 unsigned int unwindex = 0; 1850 unsigned int unwindex = 0;
1860#ifdef CONFIG_UNUSED_SYMBOLS 1851 unsigned int num_kp, num_mcount;
1861 unsigned int unusedindex; 1852 struct kernel_param *kp;
1862 unsigned int unusedcrcindex;
1863 unsigned int unusedgplindex;
1864 unsigned int unusedgplcrcindex;
1865#endif
1866 unsigned int markersindex;
1867 unsigned int markersstringsindex;
1868 unsigned int verboseindex;
1869 unsigned int tracepointsindex;
1870 unsigned int tracepointsstringsindex;
1871 unsigned int mcountindex;
1872 struct module *mod; 1853 struct module *mod;
1873 long err = 0; 1854 long err = 0;
1874 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1855 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
1875 void *mseg; 1856 unsigned long *mseg;
1876 struct exception_table_entry *extable;
1877 mm_segment_t old_fs; 1857 mm_segment_t old_fs;
1878 1858
1879 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", 1859 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -1937,6 +1917,7 @@ static noinline struct module *load_module(void __user *umod,
1937 err = -ENOEXEC; 1917 err = -ENOEXEC;
1938 goto free_hdr; 1918 goto free_hdr;
1939 } 1919 }
1920 /* This is temporary: point mod into copy of data. */
1940 mod = (void *)sechdrs[modindex].sh_addr; 1921 mod = (void *)sechdrs[modindex].sh_addr;
1941 1922
1942 if (symindex == 0) { 1923 if (symindex == 0) {
@@ -1946,22 +1927,6 @@ static noinline struct module *load_module(void __user *umod,
1946 goto free_hdr; 1927 goto free_hdr;
1947 } 1928 }
1948 1929
1949 /* Optional sections */
1950 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
1951 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
1952 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
1953 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
1954 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
1955 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
1956#ifdef CONFIG_UNUSED_SYMBOLS
1957 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
1958 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
1959 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
1960 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
1961#endif
1962 setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
1963 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
1964 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
1965 versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); 1930 versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
1966 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); 1931 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
1967 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); 1932 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
@@ -2117,42 +2082,57 @@ static noinline struct module *load_module(void __user *umod,
2117 if (err < 0) 2082 if (err < 0)
2118 goto cleanup; 2083 goto cleanup;
2119 2084
2120 /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ 2085 /* Now we've got everything in the final locations, we can
2121 mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); 2086 * find optional sections. */
2122 mod->syms = (void *)sechdrs[exportindex].sh_addr; 2087 kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*kp),
2123 if (crcindex) 2088 &num_kp);
2124 mod->crcs = (void *)sechdrs[crcindex].sh_addr; 2089 mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
2125 mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); 2090 sizeof(*mod->syms), &mod->num_syms);
2126 mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; 2091 mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
2127 if (gplcrcindex) 2092 mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
2128 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; 2093 sizeof(*mod->gpl_syms),
2129 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / 2094 &mod->num_gpl_syms);
2130 sizeof(*mod->gpl_future_syms); 2095 mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
2131 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; 2096 mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
2132 if (gplfuturecrcindex) 2097 "__ksymtab_gpl_future",
2133 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; 2098 sizeof(*mod->gpl_future_syms),
2099 &mod->num_gpl_future_syms);
2100 mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
2101 "__kcrctab_gpl_future");
2134 2102
2135#ifdef CONFIG_UNUSED_SYMBOLS 2103#ifdef CONFIG_UNUSED_SYMBOLS
2136 mod->num_unused_syms = sechdrs[unusedindex].sh_size / 2104 mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
2137 sizeof(*mod->unused_syms); 2105 "__ksymtab_unused",
2138 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / 2106 sizeof(*mod->unused_syms),
2139 sizeof(*mod->unused_gpl_syms); 2107 &mod->num_unused_syms);
2140 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; 2108 mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
2141 if (unusedcrcindex) 2109 "__kcrctab_unused");
2142 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; 2110 mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
2143 mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; 2111 "__ksymtab_unused_gpl",
2144 if (unusedgplcrcindex) 2112 sizeof(*mod->unused_gpl_syms),
2145 mod->unused_gpl_crcs 2113 &mod->num_unused_gpl_syms);
2146 = (void *)sechdrs[unusedgplcrcindex].sh_addr; 2114 mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
2115 "__kcrctab_unused_gpl");
2116#endif
2117
2118#ifdef CONFIG_MARKERS
2119 mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
2120 sizeof(*mod->markers), &mod->num_markers);
2121#endif
2122#ifdef CONFIG_TRACEPOINTS
2123 mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
2124 "__tracepoints",
2125 sizeof(*mod->tracepoints),
2126 &mod->num_tracepoints);
2147#endif 2127#endif
2148 2128
2149#ifdef CONFIG_MODVERSIONS 2129#ifdef CONFIG_MODVERSIONS
2150 if ((mod->num_syms && !crcindex) 2130 if ((mod->num_syms && !mod->crcs)
2151 || (mod->num_gpl_syms && !gplcrcindex) 2131 || (mod->num_gpl_syms && !mod->gpl_crcs)
2152 || (mod->num_gpl_future_syms && !gplfuturecrcindex) 2132 || (mod->num_gpl_future_syms && !mod->gpl_future_crcs)
2153#ifdef CONFIG_UNUSED_SYMBOLS 2133#ifdef CONFIG_UNUSED_SYMBOLS
2154 || (mod->num_unused_syms && !unusedcrcindex) 2134 || (mod->num_unused_syms && !mod->unused_crcs)
2155 || (mod->num_unused_gpl_syms && !unusedgplcrcindex) 2135 || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
2156#endif 2136#endif
2157 ) { 2137 ) {
2158 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); 2138 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
@@ -2161,16 +2141,6 @@ static noinline struct module *load_module(void __user *umod,
2161 goto cleanup; 2141 goto cleanup;
2162 } 2142 }
2163#endif 2143#endif
2164 markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
2165 markersstringsindex = find_sec(hdr, sechdrs, secstrings,
2166 "__markers_strings");
2167 verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
2168 tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
2169 tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
2170 "__tracepoints_strings");
2171
2172 mcountindex = find_sec(hdr, sechdrs, secstrings,
2173 "__mcount_loc");
2174 2144
2175 /* Now do relocations. */ 2145 /* Now do relocations. */
2176 for (i = 1; i < hdr->e_shnum; i++) { 2146 for (i = 1; i < hdr->e_shnum; i++) {
@@ -2193,28 +2163,16 @@ static noinline struct module *load_module(void __user *umod,
2193 if (err < 0) 2163 if (err < 0)
2194 goto cleanup; 2164 goto cleanup;
2195 } 2165 }
2196#ifdef CONFIG_MARKERS
2197 mod->markers = (void *)sechdrs[markersindex].sh_addr;
2198 mod->num_markers =
2199 sechdrs[markersindex].sh_size / sizeof(*mod->markers);
2200#endif
2201#ifdef CONFIG_TRACEPOINTS
2202 mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
2203 mod->num_tracepoints =
2204 sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
2205#endif
2206
2207 2166
2208 /* Find duplicate symbols */ 2167 /* Find duplicate symbols */
2209 err = verify_export_symbols(mod); 2168 err = verify_export_symbols(mod);
2210
2211 if (err < 0) 2169 if (err < 0)
2212 goto cleanup; 2170 goto cleanup;
2213 2171
2214 /* Set up and sort exception table */ 2172 /* Set up and sort exception table */
2215 mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); 2173 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
2216 mod->extable = extable = (void *)sechdrs[exindex].sh_addr; 2174 sizeof(*mod->extable), &mod->num_exentries);
2217 sort_extable(extable, extable + mod->num_exentries); 2175 sort_extable(mod->extable, mod->extable + mod->num_exentries);
2218 2176
2219 /* Finally, copy percpu area over. */ 2177 /* Finally, copy percpu area over. */
2220 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, 2178 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
@@ -2223,11 +2181,17 @@ static noinline struct module *load_module(void __user *umod,
2223 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); 2181 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
2224 2182
2225 if (!mod->taints) { 2183 if (!mod->taints) {
2184 struct mod_debug *debug;
2185 unsigned int num_debug;
2186
2226#ifdef CONFIG_MARKERS 2187#ifdef CONFIG_MARKERS
2227 marker_update_probe_range(mod->markers, 2188 marker_update_probe_range(mod->markers,
2228 mod->markers + mod->num_markers); 2189 mod->markers + mod->num_markers);
2229#endif 2190#endif
2230 dynamic_printk_setup(sechdrs, verboseindex); 2191 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2192 sizeof(*debug), &num_debug);
2193 dynamic_printk_setup(debug, num_debug);
2194
2231#ifdef CONFIG_TRACEPOINTS 2195#ifdef CONFIG_TRACEPOINTS
2232 tracepoint_update_probe_range(mod->tracepoints, 2196 tracepoint_update_probe_range(mod->tracepoints,
2233 mod->tracepoints + mod->num_tracepoints); 2197 mod->tracepoints + mod->num_tracepoints);
@@ -2235,8 +2199,9 @@ static noinline struct module *load_module(void __user *umod,
2235 } 2199 }
2236 2200
2237 /* sechdrs[0].sh_size is always zero */ 2201 /* sechdrs[0].sh_size is always zero */
2238 mseg = (void *)sechdrs[mcountindex].sh_addr; 2202 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2239 ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size); 2203 sizeof(*mseg), &num_mcount);
2204 ftrace_init_module(mseg, mseg + num_mcount);
2240 2205
2241 err = module_finalize(hdr, sechdrs, mod); 2206 err = module_finalize(hdr, sechdrs, mod);
2242 if (err < 0) 2207 if (err < 0)
@@ -2261,30 +2226,24 @@ static noinline struct module *load_module(void __user *umod,
2261 set_fs(old_fs); 2226 set_fs(old_fs);
2262 2227
2263 mod->args = args; 2228 mod->args = args;
2264 if (obsparmindex) 2229 if (section_addr(hdr, sechdrs, secstrings, "__obsparm"))
2265 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", 2230 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
2266 mod->name); 2231 mod->name);
2267 2232
2268 /* Now sew it into the lists so we can get lockdep and oops 2233 /* Now sew it into the lists so we can get lockdep and oops
2269 * info during argument parsing. Noone should access us, since 2234 * info during argument parsing. Noone should access us, since
2270 * strong_try_module_get() will fail. */ 2235 * strong_try_module_get() will fail.
2271 stop_machine(__link_module, mod, NULL); 2236 * lockdep/oops can run asynchronous, so use the RCU list insertion
2272 2237 * function to insert in a way safe to concurrent readers.
2273 /* Size of section 0 is 0, so this works well if no params */ 2238 * The mutex protects against concurrent writers.
2274 err = parse_args(mod->name, mod->args, 2239 */
2275 (struct kernel_param *) 2240 list_add_rcu(&mod->list, &modules);
2276 sechdrs[setupindex].sh_addr, 2241
2277 sechdrs[setupindex].sh_size 2242 err = parse_args(mod->name, mod->args, kp, num_kp, NULL);
2278 / sizeof(struct kernel_param),
2279 NULL);
2280 if (err < 0) 2243 if (err < 0)
2281 goto unlink; 2244 goto unlink;
2282 2245
2283 err = mod_sysfs_setup(mod, 2246 err = mod_sysfs_setup(mod, kp, num_kp);
2284 (struct kernel_param *)
2285 sechdrs[setupindex].sh_addr,
2286 sechdrs[setupindex].sh_size
2287 / sizeof(struct kernel_param));
2288 if (err < 0) 2247 if (err < 0)
2289 goto unlink; 2248 goto unlink;
2290 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2249 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
@@ -2473,7 +2432,7 @@ const char *module_address_lookup(unsigned long addr,
2473 const char *ret = NULL; 2432 const char *ret = NULL;
2474 2433
2475 preempt_disable(); 2434 preempt_disable();
2476 list_for_each_entry(mod, &modules, list) { 2435 list_for_each_entry_rcu(mod, &modules, list) {
2477 if (within(addr, mod->module_init, mod->init_size) 2436 if (within(addr, mod->module_init, mod->init_size)
2478 || within(addr, mod->module_core, mod->core_size)) { 2437 || within(addr, mod->module_core, mod->core_size)) {
2479 if (modname) 2438 if (modname)
@@ -2496,7 +2455,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
2496 struct module *mod; 2455 struct module *mod;
2497 2456
2498 preempt_disable(); 2457 preempt_disable();
2499 list_for_each_entry(mod, &modules, list) { 2458 list_for_each_entry_rcu(mod, &modules, list) {
2500 if (within(addr, mod->module_init, mod->init_size) || 2459 if (within(addr, mod->module_init, mod->init_size) ||
2501 within(addr, mod->module_core, mod->core_size)) { 2460 within(addr, mod->module_core, mod->core_size)) {
2502 const char *sym; 2461 const char *sym;
@@ -2520,7 +2479,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
2520 struct module *mod; 2479 struct module *mod;
2521 2480
2522 preempt_disable(); 2481 preempt_disable();
2523 list_for_each_entry(mod, &modules, list) { 2482 list_for_each_entry_rcu(mod, &modules, list) {
2524 if (within(addr, mod->module_init, mod->init_size) || 2483 if (within(addr, mod->module_init, mod->init_size) ||
2525 within(addr, mod->module_core, mod->core_size)) { 2484 within(addr, mod->module_core, mod->core_size)) {
2526 const char *sym; 2485 const char *sym;
@@ -2547,7 +2506,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2547 struct module *mod; 2506 struct module *mod;
2548 2507
2549 preempt_disable(); 2508 preempt_disable();
2550 list_for_each_entry(mod, &modules, list) { 2509 list_for_each_entry_rcu(mod, &modules, list) {
2551 if (symnum < mod->num_symtab) { 2510 if (symnum < mod->num_symtab) {
2552 *value = mod->symtab[symnum].st_value; 2511 *value = mod->symtab[symnum].st_value;
2553 *type = mod->symtab[symnum].st_info; 2512 *type = mod->symtab[symnum].st_info;
@@ -2590,7 +2549,7 @@ unsigned long module_kallsyms_lookup_name(const char *name)
2590 ret = mod_find_symname(mod, colon+1); 2549 ret = mod_find_symname(mod, colon+1);
2591 *colon = ':'; 2550 *colon = ':';
2592 } else { 2551 } else {
2593 list_for_each_entry(mod, &modules, list) 2552 list_for_each_entry_rcu(mod, &modules, list)
2594 if ((ret = mod_find_symname(mod, name)) != 0) 2553 if ((ret = mod_find_symname(mod, name)) != 0)
2595 break; 2554 break;
2596 } 2555 }
@@ -2599,23 +2558,6 @@ unsigned long module_kallsyms_lookup_name(const char *name)
2599} 2558}
2600#endif /* CONFIG_KALLSYMS */ 2559#endif /* CONFIG_KALLSYMS */
2601 2560
2602/* Called by the /proc file system to return a list of modules. */
2603static void *m_start(struct seq_file *m, loff_t *pos)
2604{
2605 mutex_lock(&module_mutex);
2606 return seq_list_start(&modules, *pos);
2607}
2608
2609static void *m_next(struct seq_file *m, void *p, loff_t *pos)
2610{
2611 return seq_list_next(p, &modules, pos);
2612}
2613
2614static void m_stop(struct seq_file *m, void *p)
2615{
2616 mutex_unlock(&module_mutex);
2617}
2618
2619static char *module_flags(struct module *mod, char *buf) 2561static char *module_flags(struct module *mod, char *buf)
2620{ 2562{
2621 int bx = 0; 2563 int bx = 0;
@@ -2649,6 +2591,24 @@ static char *module_flags(struct module *mod, char *buf)
2649 return buf; 2591 return buf;
2650} 2592}
2651 2593
2594#ifdef CONFIG_PROC_FS
2595/* Called by the /proc file system to return a list of modules. */
2596static void *m_start(struct seq_file *m, loff_t *pos)
2597{
2598 mutex_lock(&module_mutex);
2599 return seq_list_start(&modules, *pos);
2600}
2601
2602static void *m_next(struct seq_file *m, void *p, loff_t *pos)
2603{
2604 return seq_list_next(p, &modules, pos);
2605}
2606
2607static void m_stop(struct seq_file *m, void *p)
2608{
2609 mutex_unlock(&module_mutex);
2610}
2611
2652static int m_show(struct seq_file *m, void *p) 2612static int m_show(struct seq_file *m, void *p)
2653{ 2613{
2654 struct module *mod = list_entry(p, struct module, list); 2614 struct module *mod = list_entry(p, struct module, list);
@@ -2679,13 +2639,33 @@ static int m_show(struct seq_file *m, void *p)
2679 Where refcount is a number or -, and deps is a comma-separated list 2639 Where refcount is a number or -, and deps is a comma-separated list
2680 of depends or -. 2640 of depends or -.
2681*/ 2641*/
2682const struct seq_operations modules_op = { 2642static const struct seq_operations modules_op = {
2683 .start = m_start, 2643 .start = m_start,
2684 .next = m_next, 2644 .next = m_next,
2685 .stop = m_stop, 2645 .stop = m_stop,
2686 .show = m_show 2646 .show = m_show
2687}; 2647};
2688 2648
2649static int modules_open(struct inode *inode, struct file *file)
2650{
2651 return seq_open(file, &modules_op);
2652}
2653
2654static const struct file_operations proc_modules_operations = {
2655 .open = modules_open,
2656 .read = seq_read,
2657 .llseek = seq_lseek,
2658 .release = seq_release,
2659};
2660
2661static int __init proc_modules_init(void)
2662{
2663 proc_create("modules", 0, NULL, &proc_modules_operations);
2664 return 0;
2665}
2666module_init(proc_modules_init);
2667#endif
2668
2689/* Given an address, look for it in the module exception tables. */ 2669/* Given an address, look for it in the module exception tables. */
2690const struct exception_table_entry *search_module_extables(unsigned long addr) 2670const struct exception_table_entry *search_module_extables(unsigned long addr)
2691{ 2671{
@@ -2693,7 +2673,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2693 struct module *mod; 2673 struct module *mod;
2694 2674
2695 preempt_disable(); 2675 preempt_disable();
2696 list_for_each_entry(mod, &modules, list) { 2676 list_for_each_entry_rcu(mod, &modules, list) {
2697 if (mod->num_exentries == 0) 2677 if (mod->num_exentries == 0)
2698 continue; 2678 continue;
2699 2679
@@ -2719,7 +2699,7 @@ int is_module_address(unsigned long addr)
2719 2699
2720 preempt_disable(); 2700 preempt_disable();
2721 2701
2722 list_for_each_entry(mod, &modules, list) { 2702 list_for_each_entry_rcu(mod, &modules, list) {
2723 if (within(addr, mod->module_core, mod->core_size)) { 2703 if (within(addr, mod->module_core, mod->core_size)) {
2724 preempt_enable(); 2704 preempt_enable();
2725 return 1; 2705 return 1;
@@ -2740,7 +2720,7 @@ struct module *__module_text_address(unsigned long addr)
2740 if (addr < module_addr_min || addr > module_addr_max) 2720 if (addr < module_addr_min || addr > module_addr_max)
2741 return NULL; 2721 return NULL;
2742 2722
2743 list_for_each_entry(mod, &modules, list) 2723 list_for_each_entry_rcu(mod, &modules, list)
2744 if (within(addr, mod->module_init, mod->init_text_size) 2724 if (within(addr, mod->module_init, mod->init_text_size)
2745 || within(addr, mod->module_core, mod->core_text_size)) 2725 || within(addr, mod->module_core, mod->core_text_size))
2746 return mod; 2726 return mod;
@@ -2765,8 +2745,11 @@ void print_modules(void)
2765 char buf[8]; 2745 char buf[8];
2766 2746
2767 printk("Modules linked in:"); 2747 printk("Modules linked in:");
2768 list_for_each_entry(mod, &modules, list) 2748 /* Most callers should already have preempt disabled, but make sure */
2749 preempt_disable();
2750 list_for_each_entry_rcu(mod, &modules, list)
2769 printk(" %s%s", mod->name, module_flags(mod, buf)); 2751 printk(" %s%s", mod->name, module_flags(mod, buf));
2752 preempt_enable();
2770 if (last_unloaded_module[0]) 2753 if (last_unloaded_module[0])
2771 printk(" [last unloaded: %s]", last_unloaded_module); 2754 printk(" [last unloaded: %s]", last_unloaded_module);
2772 printk("\n"); 2755 printk("\n");
diff --git a/kernel/panic.c b/kernel/panic.c
index bda561ef3cdf..6513aac8e992 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,13 +34,6 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
34 34
35EXPORT_SYMBOL(panic_notifier_list); 35EXPORT_SYMBOL(panic_notifier_list);
36 36
37static int __init panic_setup(char *str)
38{
39 panic_timeout = simple_strtoul(str, NULL, 0);
40 return 1;
41}
42__setup("panic=", panic_setup);
43
44static long no_blink(long time) 37static long no_blink(long time)
45{ 38{
46 return 0; 39 return 0;
@@ -218,13 +211,6 @@ void add_taint(unsigned flag)
218} 211}
219EXPORT_SYMBOL(add_taint); 212EXPORT_SYMBOL(add_taint);
220 213
221static int __init pause_on_oops_setup(char *str)
222{
223 pause_on_oops = simple_strtoul(str, NULL, 0);
224 return 1;
225}
226__setup("pause_on_oops=", pause_on_oops_setup);
227
228static void spin_msec(int msecs) 214static void spin_msec(int msecs)
229{ 215{
230 int i; 216 int i;
@@ -384,3 +370,6 @@ void __stack_chk_fail(void)
384} 370}
385EXPORT_SYMBOL(__stack_chk_fail); 371EXPORT_SYMBOL(__stack_chk_fail);
386#endif 372#endif
373
374core_param(panic, panic_timeout, int, 0644);
375core_param(pause_on_oops, pause_on_oops, int, 0644);
diff --git a/kernel/params.c b/kernel/params.c
index afc46a23eb6d..a1e3025b19a9 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -373,6 +373,8 @@ int param_get_string(char *buffer, struct kernel_param *kp)
373} 373}
374 374
375/* sysfs output in /sys/modules/XYZ/parameters/ */ 375/* sysfs output in /sys/modules/XYZ/parameters/ */
376#define to_module_attr(n) container_of(n, struct module_attribute, attr);
377#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
376 378
377extern struct kernel_param __start___param[], __stop___param[]; 379extern struct kernel_param __start___param[], __stop___param[];
378 380
@@ -384,6 +386,7 @@ struct param_attribute
384 386
385struct module_param_attrs 387struct module_param_attrs
386{ 388{
389 unsigned int num;
387 struct attribute_group grp; 390 struct attribute_group grp;
388 struct param_attribute attrs[0]; 391 struct param_attribute attrs[0];
389}; 392};
@@ -434,93 +437,120 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
434 437
435#ifdef CONFIG_SYSFS 438#ifdef CONFIG_SYSFS
436/* 439/*
437 * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME 440 * add_sysfs_param - add a parameter to sysfs
438 * @mk: struct module_kobject (contains parent kobject) 441 * @mk: struct module_kobject
439 * @kparam: array of struct kernel_param, the actual parameter definitions 442 * @kparam: the actual parameter definition to add to sysfs
440 * @num_params: number of entries in array 443 * @name: name of parameter
441 * @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules"
442 * 444 *
443 * Create a kobject for a (per-module) group of parameters, and create files 445 * Create a kobject if for a (per-module) parameter if mp NULL, and
444 * in sysfs. A pointer to the param_kobject is returned on success, 446 * create file in sysfs. Returns an error on out of memory. Always cleans up
445 * NULL if there's no parameter to export, or other ERR_PTR(err). 447 * if there's an error.
446 */ 448 */
447static __modinit struct module_param_attrs * 449static __modinit int add_sysfs_param(struct module_kobject *mk,
448param_sysfs_setup(struct module_kobject *mk, 450 struct kernel_param *kp,
449 struct kernel_param *kparam, 451 const char *name)
450 unsigned int num_params,
451 unsigned int name_skip)
452{ 452{
453 struct module_param_attrs *mp; 453 struct module_param_attrs *new;
454 unsigned int valid_attrs = 0; 454 struct attribute **attrs;
455 unsigned int i, size[2]; 455 int err, num;
456 struct param_attribute *pattr; 456
457 struct attribute **gattr; 457 /* We don't bother calling this with invisible parameters. */
458 int err; 458 BUG_ON(!kp->perm);
459 459
460 for (i=0; i<num_params; i++) { 460 if (!mk->mp) {
461 if (kparam[i].perm) 461 num = 0;
462 valid_attrs++; 462 attrs = NULL;
463 } else {
464 num = mk->mp->num;
465 attrs = mk->mp->grp.attrs;
463 } 466 }
464 467
465 if (!valid_attrs) 468 /* Enlarge. */
466 return NULL; 469 new = krealloc(mk->mp,
467 470 sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
468 size[0] = ALIGN(sizeof(*mp) + 471 GFP_KERNEL);
469 valid_attrs * sizeof(mp->attrs[0]), 472 if (!new) {
470 sizeof(mp->grp.attrs[0])); 473 kfree(mk->mp);
471 size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]); 474 err = -ENOMEM;
472 475 goto fail;
473 mp = kzalloc(size[0] + size[1], GFP_KERNEL);
474 if (!mp)
475 return ERR_PTR(-ENOMEM);
476
477 mp->grp.name = "parameters";
478 mp->grp.attrs = (void *)mp + size[0];
479
480 pattr = &mp->attrs[0];
481 gattr = &mp->grp.attrs[0];
482 for (i = 0; i < num_params; i++) {
483 struct kernel_param *kp = &kparam[i];
484 if (kp->perm) {
485 pattr->param = kp;
486 pattr->mattr.show = param_attr_show;
487 pattr->mattr.store = param_attr_store;
488 pattr->mattr.attr.name = (char *)&kp->name[name_skip];
489 pattr->mattr.attr.mode = kp->perm;
490 *(gattr++) = &(pattr++)->mattr.attr;
491 }
492 } 476 }
493 *gattr = NULL; 477 attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
494 478 if (!attrs) {
495 if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) { 479 err = -ENOMEM;
496 kfree(mp); 480 goto fail_free_new;
497 return ERR_PTR(err);
498 } 481 }
499 return mp; 482
483 /* Sysfs wants everything zeroed. */
484 memset(new, 0, sizeof(*new));
485 memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
486 memset(&attrs[num], 0, sizeof(attrs[num]));
487 new->grp.name = "parameters";
488 new->grp.attrs = attrs;
489
490 /* Tack new one on the end. */
491 new->attrs[num].param = kp;
492 new->attrs[num].mattr.show = param_attr_show;
493 new->attrs[num].mattr.store = param_attr_store;
494 new->attrs[num].mattr.attr.name = (char *)name;
495 new->attrs[num].mattr.attr.mode = kp->perm;
496 new->num = num+1;
497
498 /* Fix up all the pointers, since krealloc can move us */
499 for (num = 0; num < new->num; num++)
500 new->grp.attrs[num] = &new->attrs[num].mattr.attr;
501 new->grp.attrs[num] = NULL;
502
503 mk->mp = new;
504 return 0;
505
506fail_free_new:
507 kfree(new);
508fail:
509 mk->mp = NULL;
510 return err;
500} 511}
501 512
502#ifdef CONFIG_MODULES 513#ifdef CONFIG_MODULES
514static void free_module_param_attrs(struct module_kobject *mk)
515{
516 kfree(mk->mp->grp.attrs);
517 kfree(mk->mp);
518 mk->mp = NULL;
519}
520
503/* 521/*
504 * module_param_sysfs_setup - setup sysfs support for one module 522 * module_param_sysfs_setup - setup sysfs support for one module
505 * @mod: module 523 * @mod: module
506 * @kparam: module parameters (array) 524 * @kparam: module parameters (array)
507 * @num_params: number of module parameters 525 * @num_params: number of module parameters
508 * 526 *
509 * Adds sysfs entries for module parameters, and creates a link from 527 * Adds sysfs entries for module parameters under
510 * /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/ 528 * /sys/module/[mod->name]/parameters/
511 */ 529 */
512int module_param_sysfs_setup(struct module *mod, 530int module_param_sysfs_setup(struct module *mod,
513 struct kernel_param *kparam, 531 struct kernel_param *kparam,
514 unsigned int num_params) 532 unsigned int num_params)
515{ 533{
516 struct module_param_attrs *mp; 534 int i, err;
535 bool params = false;
536
537 for (i = 0; i < num_params; i++) {
538 if (kparam[i].perm == 0)
539 continue;
540 err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
541 if (err)
542 return err;
543 params = true;
544 }
517 545
518 mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0); 546 if (!params)
519 if (IS_ERR(mp)) 547 return 0;
520 return PTR_ERR(mp);
521 548
522 mod->param_attrs = mp; 549 /* Create the param group. */
523 return 0; 550 err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
551 if (err)
552 free_module_param_attrs(&mod->mkobj);
553 return err;
524} 554}
525 555
526/* 556/*
@@ -532,43 +562,55 @@ int module_param_sysfs_setup(struct module *mod,
532 */ 562 */
533void module_param_sysfs_remove(struct module *mod) 563void module_param_sysfs_remove(struct module *mod)
534{ 564{
535 if (mod->param_attrs) { 565 if (mod->mkobj.mp) {
536 sysfs_remove_group(&mod->mkobj.kobj, 566 sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
537 &mod->param_attrs->grp);
538 /* We are positive that no one is using any param 567 /* We are positive that no one is using any param
539 * attrs at this point. Deallocate immediately. */ 568 * attrs at this point. Deallocate immediately. */
540 kfree(mod->param_attrs); 569 free_module_param_attrs(&mod->mkobj);
541 mod->param_attrs = NULL;
542 } 570 }
543} 571}
544#endif 572#endif
545 573
546/* 574static void __init kernel_add_sysfs_param(const char *name,
547 * kernel_param_sysfs_setup - wrapper for built-in params support 575 struct kernel_param *kparam,
548 */ 576 unsigned int name_skip)
549static void __init kernel_param_sysfs_setup(const char *name,
550 struct kernel_param *kparam,
551 unsigned int num_params,
552 unsigned int name_skip)
553{ 577{
554 struct module_kobject *mk; 578 struct module_kobject *mk;
555 int ret; 579 struct kobject *kobj;
580 int err;
556 581
557 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); 582 kobj = kset_find_obj(module_kset, name);
558 BUG_ON(!mk); 583 if (kobj) {
559 584 /* We already have one. Remove params so we can add more. */
560 mk->mod = THIS_MODULE; 585 mk = to_module_kobject(kobj);
561 mk->kobj.kset = module_kset; 586 /* We need to remove it before adding parameters. */
562 ret = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); 587 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
563 if (ret) { 588 } else {
564 kobject_put(&mk->kobj); 589 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
565 printk(KERN_ERR "Module '%s' failed to be added to sysfs, " 590 BUG_ON(!mk);
566 "error number %d\n", name, ret); 591
567 printk(KERN_ERR "The system will be unstable now.\n"); 592 mk->mod = THIS_MODULE;
568 return; 593 mk->kobj.kset = module_kset;
594 err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL,
595 "%s", name);
596 if (err) {
597 kobject_put(&mk->kobj);
598 printk(KERN_ERR "Module '%s' failed add to sysfs, "
599 "error number %d\n", name, err);
600 printk(KERN_ERR "The system will be unstable now.\n");
601 return;
602 }
603 /* So that exit path is even. */
604 kobject_get(&mk->kobj);
569 } 605 }
570 param_sysfs_setup(mk, kparam, num_params, name_skip); 606
607 /* These should not fail at boot. */
608 err = add_sysfs_param(mk, kparam, kparam->name + name_skip);
609 BUG_ON(err);
610 err = sysfs_create_group(&mk->kobj, &mk->mp->grp);
611 BUG_ON(err);
571 kobject_uevent(&mk->kobj, KOBJ_ADD); 612 kobject_uevent(&mk->kobj, KOBJ_ADD);
613 kobject_put(&mk->kobj);
572} 614}
573 615
574/* 616/*
@@ -579,60 +621,36 @@ static void __init kernel_param_sysfs_setup(const char *name,
579 * The "module" name (KBUILD_MODNAME) is stored before a dot, the 621 * The "module" name (KBUILD_MODNAME) is stored before a dot, the
580 * "parameter" name is stored behind a dot in kernel_param->name. So, 622 * "parameter" name is stored behind a dot in kernel_param->name. So,
581 * extract the "module" name for all built-in kernel_param-eters, 623 * extract the "module" name for all built-in kernel_param-eters,
582 * and for all who have the same, call kernel_param_sysfs_setup. 624 * and for all who have the same, call kernel_add_sysfs_param.
583 */ 625 */
584static void __init param_sysfs_builtin(void) 626static void __init param_sysfs_builtin(void)
585{ 627{
586 struct kernel_param *kp, *kp_begin = NULL; 628 struct kernel_param *kp;
587 unsigned int i, name_len, count = 0; 629 unsigned int name_len;
588 char modname[MODULE_NAME_LEN + 1] = ""; 630 char modname[MODULE_NAME_LEN];
589 631
590 for (i=0; i < __stop___param - __start___param; i++) { 632 for (kp = __start___param; kp < __stop___param; kp++) {
591 char *dot; 633 char *dot;
592 size_t max_name_len;
593 634
594 kp = &__start___param[i]; 635 if (kp->perm == 0)
595 max_name_len = 636 continue;
596 min_t(size_t, MODULE_NAME_LEN, strlen(kp->name));
597 637
598 dot = memchr(kp->name, '.', max_name_len); 638 dot = strchr(kp->name, '.');
599 if (!dot) { 639 if (!dot) {
600 DEBUGP("couldn't find period in first %d characters " 640 /* This happens for core_param() */
601 "of %s\n", MODULE_NAME_LEN, kp->name); 641 strcpy(modname, "kernel");
602 continue; 642 name_len = 0;
603 } 643 } else {
604 name_len = dot - kp->name; 644 name_len = dot - kp->name + 1;
605 645 strlcpy(modname, kp->name, name_len);
606 /* new kbuild_modname? */
607 if (strlen(modname) != name_len
608 || strncmp(modname, kp->name, name_len) != 0) {
609 /* add a new kobject for previous kernel_params. */
610 if (count)
611 kernel_param_sysfs_setup(modname,
612 kp_begin,
613 count,
614 strlen(modname)+1);
615
616 strncpy(modname, kp->name, name_len);
617 modname[name_len] = '\0';
618 count = 0;
619 kp_begin = kp;
620 } 646 }
621 count++; 647 kernel_add_sysfs_param(modname, kp, name_len);
622 } 648 }
623
624 /* last kernel_params need to be registered as well */
625 if (count)
626 kernel_param_sysfs_setup(modname, kp_begin, count,
627 strlen(modname)+1);
628} 649}
629 650
630 651
631/* module-related sysfs stuff */ 652/* module-related sysfs stuff */
632 653
633#define to_module_attr(n) container_of(n, struct module_attribute, attr);
634#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
635
636static ssize_t module_attr_show(struct kobject *kobj, 654static ssize_t module_attr_show(struct kobject *kobj,
637 struct attribute *attr, 655 struct attribute *attr,
638 char *buf) 656 char *buf)
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index b931d7cedbfa..5e79c662294b 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -639,7 +639,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
639 (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) 639 (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
640 timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); 640 timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
641 641
642 remaining = ktime_sub(timer->expires, now); 642 remaining = ktime_sub(hrtimer_get_expires(timer), now);
643 /* Return 0 only, when the timer is expired and not pending */ 643 /* Return 0 only, when the timer is expired and not pending */
644 if (remaining.tv64 <= 0) { 644 if (remaining.tv64 <= 0) {
645 /* 645 /*
@@ -733,7 +733,7 @@ common_timer_set(struct k_itimer *timr, int flags,
733 hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); 733 hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
734 timr->it.real.timer.function = posix_timer_fn; 734 timr->it.real.timer.function = posix_timer_fn;
735 735
736 timer->expires = timespec_to_ktime(new_setting->it_value); 736 hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value));
737 737
738 /* Convert interval */ 738 /* Convert interval */
739 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); 739 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
@@ -742,14 +742,12 @@ common_timer_set(struct k_itimer *timr, int flags,
742 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { 742 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
743 /* Setup correct expiry time for relative timers */ 743 /* Setup correct expiry time for relative timers */
744 if (mode == HRTIMER_MODE_REL) { 744 if (mode == HRTIMER_MODE_REL) {
745 timer->expires = 745 hrtimer_add_expires(timer, timer->base->get_time());
746 ktime_add_safe(timer->expires,
747 timer->base->get_time());
748 } 746 }
749 return 0; 747 return 0;
750 } 748 }
751 749
752 hrtimer_start(timer, timer->expires, mode); 750 hrtimer_start_expires(timer, mode);
753 return 0; 751 return 0;
754} 752}
755 753
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 331f9836383f..c9d74083746f 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -651,7 +651,7 @@ static int software_resume(void)
651 pr_debug("PM: Preparing processes for restore.\n"); 651 pr_debug("PM: Preparing processes for restore.\n");
652 error = prepare_processes(); 652 error = prepare_processes();
653 if (error) { 653 if (error) {
654 swsusp_close(); 654 swsusp_close(FMODE_READ);
655 goto Done; 655 goto Done;
656 } 656 }
657 657
diff --git a/kernel/power/power.h b/kernel/power/power.h
index acc0c101dbd5..46b5ec7a3afb 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -153,7 +153,7 @@ extern int swsusp_shrink_memory(void);
153extern void swsusp_free(void); 153extern void swsusp_free(void);
154extern int swsusp_read(unsigned int *flags_p); 154extern int swsusp_read(unsigned int *flags_p);
155extern int swsusp_write(unsigned int flags); 155extern int swsusp_write(unsigned int flags);
156extern void swsusp_close(void); 156extern void swsusp_close(fmode_t);
157 157
158struct timeval; 158struct timeval;
159/* kernel/power/swsusp.c */ 159/* kernel/power/swsusp.c */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 80ccac849e46..b7713b53d07a 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -172,13 +172,13 @@ static int swsusp_swap_check(void) /* This is called before saving image */
172 return res; 172 return res;
173 173
174 root_swap = res; 174 root_swap = res;
175 res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR); 175 res = blkdev_get(resume_bdev, FMODE_WRITE);
176 if (res) 176 if (res)
177 return res; 177 return res;
178 178
179 res = set_blocksize(resume_bdev, PAGE_SIZE); 179 res = set_blocksize(resume_bdev, PAGE_SIZE);
180 if (res < 0) 180 if (res < 0)
181 blkdev_put(resume_bdev); 181 blkdev_put(resume_bdev, FMODE_WRITE);
182 182
183 return res; 183 return res;
184} 184}
@@ -426,7 +426,7 @@ int swsusp_write(unsigned int flags)
426 426
427 release_swap_writer(&handle); 427 release_swap_writer(&handle);
428 out: 428 out:
429 swsusp_close(); 429 swsusp_close(FMODE_WRITE);
430 return error; 430 return error;
431} 431}
432 432
@@ -574,7 +574,7 @@ int swsusp_read(unsigned int *flags_p)
574 error = load_image(&handle, &snapshot, header->pages - 1); 574 error = load_image(&handle, &snapshot, header->pages - 1);
575 release_swap_reader(&handle); 575 release_swap_reader(&handle);
576 576
577 blkdev_put(resume_bdev); 577 blkdev_put(resume_bdev, FMODE_READ);
578 578
579 if (!error) 579 if (!error)
580 pr_debug("PM: Image successfully loaded\n"); 580 pr_debug("PM: Image successfully loaded\n");
@@ -609,7 +609,7 @@ int swsusp_check(void)
609 return -EINVAL; 609 return -EINVAL;
610 } 610 }
611 if (error) 611 if (error)
612 blkdev_put(resume_bdev); 612 blkdev_put(resume_bdev, FMODE_READ);
613 else 613 else
614 pr_debug("PM: Signature found, resuming\n"); 614 pr_debug("PM: Signature found, resuming\n");
615 } else { 615 } else {
@@ -626,14 +626,14 @@ int swsusp_check(void)
626 * swsusp_close - close swap device. 626 * swsusp_close - close swap device.
627 */ 627 */
628 628
629void swsusp_close(void) 629void swsusp_close(fmode_t mode)
630{ 630{
631 if (IS_ERR(resume_bdev)) { 631 if (IS_ERR(resume_bdev)) {
632 pr_debug("PM: Image device not initialised\n"); 632 pr_debug("PM: Image device not initialised\n");
633 return; 633 return;
634 } 634 }
635 635
636 blkdev_put(resume_bdev); 636 blkdev_put(resume_bdev, mode); /* move up */
637} 637}
638 638
639static int swsusp_header_init(void) 639static int swsusp_header_init(void)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 467d5940f624..ad63af8b2521 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -119,18 +119,19 @@ static void _rcu_barrier(enum rcu_barrier type)
119 /* Take cpucontrol mutex to protect against CPU hotplug */ 119 /* Take cpucontrol mutex to protect against CPU hotplug */
120 mutex_lock(&rcu_barrier_mutex); 120 mutex_lock(&rcu_barrier_mutex);
121 init_completion(&rcu_barrier_completion); 121 init_completion(&rcu_barrier_completion);
122 atomic_set(&rcu_barrier_cpu_count, 0);
123 /* 122 /*
124 * The queueing of callbacks in all CPUs must be atomic with 123 * Initialize rcu_barrier_cpu_count to 1, then invoke
125 * respect to RCU, otherwise one CPU may queue a callback, 124 * rcu_barrier_func() on each CPU, so that each CPU also has
126 * wait for a grace period, decrement barrier count and call 125 * incremented rcu_barrier_cpu_count. Only then is it safe to
127 * complete(), while other CPUs have not yet queued anything. 126 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
128 * So, we need to make sure that grace periods cannot complete 127 * might complete its grace period before all of the other CPUs
129 * until all the callbacks are queued. 128 * did their increment, causing this function to return too
129 * early.
130 */ 130 */
131 rcu_read_lock(); 131 atomic_set(&rcu_barrier_cpu_count, 1);
132 on_each_cpu(rcu_barrier_func, (void *)type, 1); 132 on_each_cpu(rcu_barrier_func, (void *)type, 1);
133 rcu_read_unlock(); 133 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
134 complete(&rcu_barrier_completion);
134 wait_for_completion(&rcu_barrier_completion); 135 wait_for_completion(&rcu_barrier_completion);
135 mutex_unlock(&rcu_barrier_mutex); 136 mutex_unlock(&rcu_barrier_mutex);
136} 137}
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 6522ae5b14a2..69d9cb921ffa 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -631,8 +631,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
631 631
632 /* Setup the timer, when timeout != NULL */ 632 /* Setup the timer, when timeout != NULL */
633 if (unlikely(timeout)) { 633 if (unlikely(timeout)) {
634 hrtimer_start(&timeout->timer, timeout->timer.expires, 634 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
635 HRTIMER_MODE_ABS);
636 if (!hrtimer_active(&timeout->timer)) 635 if (!hrtimer_active(&timeout->timer))
637 timeout->task = NULL; 636 timeout->task = NULL;
638 } 637 }
diff --git a/kernel/sched.c b/kernel/sched.c
index d906f72b42d2..6625c3c4b10d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -55,6 +55,7 @@
55#include <linux/cpuset.h> 55#include <linux/cpuset.h>
56#include <linux/percpu.h> 56#include <linux/percpu.h>
57#include <linux/kthread.h> 57#include <linux/kthread.h>
58#include <linux/proc_fs.h>
58#include <linux/seq_file.h> 59#include <linux/seq_file.h>
59#include <linux/sysctl.h> 60#include <linux/sysctl.h>
60#include <linux/syscalls.h> 61#include <linux/syscalls.h>
@@ -227,9 +228,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
227 228
228 now = hrtimer_cb_get_time(&rt_b->rt_period_timer); 229 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
229 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); 230 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
230 hrtimer_start(&rt_b->rt_period_timer, 231 hrtimer_start_expires(&rt_b->rt_period_timer,
231 rt_b->rt_period_timer.expires, 232 HRTIMER_MODE_ABS);
232 HRTIMER_MODE_ABS);
233 } 233 }
234 spin_unlock(&rt_b->rt_runtime_lock); 234 spin_unlock(&rt_b->rt_runtime_lock);
235} 235}
@@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
819unsigned int sysctl_sched_shares_ratelimit = 250000; 819unsigned int sysctl_sched_shares_ratelimit = 250000;
820 820
821/* 821/*
822 * Inject some fuzzyness into changing the per-cpu group shares
823 * this avoids remote rq-locks at the expense of fairness.
824 * default: 4
825 */
826unsigned int sysctl_sched_shares_thresh = 4;
827
828/*
822 * period over which we measure -rt task cpu usage in us. 829 * period over which we measure -rt task cpu usage in us.
823 * default: 1s 830 * default: 1s
824 */ 831 */
@@ -1064,7 +1071,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
1064 struct hrtimer *timer = &rq->hrtick_timer; 1071 struct hrtimer *timer = &rq->hrtick_timer;
1065 ktime_t time = ktime_add_ns(timer->base->get_time(), delay); 1072 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
1066 1073
1067 timer->expires = time; 1074 hrtimer_set_expires(timer, time);
1068 1075
1069 if (rq == this_rq()) { 1076 if (rq == this_rq()) {
1070 hrtimer_restart(timer); 1077 hrtimer_restart(timer);
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1454 * Calculate and set the cpu's group shares. 1461 * Calculate and set the cpu's group shares.
1455 */ 1462 */
1456static void 1463static void
1457__update_group_shares_cpu(struct task_group *tg, int cpu, 1464update_group_shares_cpu(struct task_group *tg, int cpu,
1458 unsigned long sd_shares, unsigned long sd_rq_weight) 1465 unsigned long sd_shares, unsigned long sd_rq_weight)
1459{ 1466{
1460 int boost = 0; 1467 int boost = 0;
1461 unsigned long shares; 1468 unsigned long shares;
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
1486 * 1493 *
1487 */ 1494 */
1488 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); 1495 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
1496 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1489 1497
1490 /* 1498 if (abs(shares - tg->se[cpu]->load.weight) >
1491 * record the actual number of shares, not the boosted amount. 1499 sysctl_sched_shares_thresh) {
1492 */ 1500 struct rq *rq = cpu_rq(cpu);
1493 tg->cfs_rq[cpu]->shares = boost ? 0 : shares; 1501 unsigned long flags;
1494 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1495 1502
1496 if (shares < MIN_SHARES) 1503 spin_lock_irqsave(&rq->lock, flags);
1497 shares = MIN_SHARES; 1504 /*
1498 else if (shares > MAX_SHARES) 1505 * record the actual number of shares, not the boosted amount.
1499 shares = MAX_SHARES; 1506 */
1507 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1508 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1500 1509
1501 __set_se_shares(tg->se[cpu], shares); 1510 __set_se_shares(tg->se[cpu], shares);
1511 spin_unlock_irqrestore(&rq->lock, flags);
1512 }
1502} 1513}
1503 1514
1504/* 1515/*
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
1527 if (!rq_weight) 1538 if (!rq_weight)
1528 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; 1539 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1529 1540
1530 for_each_cpu_mask(i, sd->span) { 1541 for_each_cpu_mask(i, sd->span)
1531 struct rq *rq = cpu_rq(i); 1542 update_group_shares_cpu(tg, i, shares, rq_weight);
1532 unsigned long flags;
1533
1534 spin_lock_irqsave(&rq->lock, flags);
1535 __update_group_shares_cpu(tg, i, shares, rq_weight);
1536 spin_unlock_irqrestore(&rq->lock, flags);
1537 }
1538 1543
1539 return 0; 1544 return 0;
1540} 1545}
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible:
4443 if (sched_feat(HRTICK)) 4448 if (sched_feat(HRTICK))
4444 hrtick_clear(rq); 4449 hrtick_clear(rq);
4445 4450
4446 /* 4451 spin_lock_irq(&rq->lock);
4447 * Do the rq-clock update outside the rq lock:
4448 */
4449 local_irq_disable();
4450 update_rq_clock(rq); 4452 update_rq_clock(rq);
4451 spin_lock(&rq->lock);
4452 clear_tsk_need_resched(prev); 4453 clear_tsk_need_resched(prev);
4453 4454
4454 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4455 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f604dae71316..9573c33688b8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
73 73
74const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 74const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
75 75
76static const struct sched_class fair_sched_class;
77
76/************************************************************** 78/**************************************************************
77 * CFS operations on generic schedulable entities: 79 * CFS operations on generic schedulable entities:
78 */ 80 */
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
334#endif 336#endif
335 337
336/* 338/*
337 * delta *= w / rw 339 * delta *= P[w / rw]
338 */ 340 */
339static inline unsigned long 341static inline unsigned long
340calc_delta_weight(unsigned long delta, struct sched_entity *se) 342calc_delta_weight(unsigned long delta, struct sched_entity *se)
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se)
348} 350}
349 351
350/* 352/*
351 * delta *= rw / w 353 * delta /= w
352 */ 354 */
353static inline unsigned long 355static inline unsigned long
354calc_delta_fair(unsigned long delta, struct sched_entity *se) 356calc_delta_fair(unsigned long delta, struct sched_entity *se)
355{ 357{
356 for_each_sched_entity(se) { 358 if (unlikely(se->load.weight != NICE_0_LOAD))
357 delta = calc_delta_mine(delta, 359 delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
358 cfs_rq_of(se)->load.weight, &se->load);
359 }
360 360
361 return delta; 361 return delta;
362} 362}
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running)
386 * We calculate the wall-time slice from the period by taking a part 386 * We calculate the wall-time slice from the period by taking a part
387 * proportional to the weight. 387 * proportional to the weight.
388 * 388 *
389 * s = p*w/rw 389 * s = p*P[w/rw]
390 */ 390 */
391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) 391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
392{ 392{
393 return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); 393 unsigned long nr_running = cfs_rq->nr_running;
394
395 if (unlikely(!se->on_rq))
396 nr_running++;
397
398 return calc_delta_weight(__sched_period(nr_running), se);
394} 399}
395 400
396/* 401/*
397 * We calculate the vruntime slice of a to be inserted task 402 * We calculate the vruntime slice of a to be inserted task
398 * 403 *
399 * vs = s*rw/w = p 404 * vs = s/w
400 */ 405 */
401static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) 406static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
402{ 407{
403 unsigned long nr_running = cfs_rq->nr_running; 408 return calc_delta_fair(sched_slice(cfs_rq, se), se);
404
405 if (!se->on_rq)
406 nr_running++;
407
408 return __sched_period(nr_running);
409} 409}
410 410
411/* 411/*
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
628 * stays open at the end. 628 * stays open at the end.
629 */ 629 */
630 if (initial && sched_feat(START_DEBIT)) 630 if (initial && sched_feat(START_DEBIT))
631 vruntime += sched_vslice_add(cfs_rq, se); 631 vruntime += sched_vslice(cfs_rq, se);
632 632
633 if (!initial) { 633 if (!initial) {
634 /* sleeps upto a single latency don't count. */ 634 /* sleeps upto a single latency don't count. */
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
748 struct rq *rq = rq_of(cfs_rq); 748 struct rq *rq = rq_of(cfs_rq);
749 u64 pair_slice = rq->clock - cfs_rq->pair_start; 749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750 750
751 if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { 751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock; 752 cfs_rq->pair_start = rq->clock;
753 return se; 753 return se;
754 } 754 }
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
849 hrtick_start(rq, delta); 849 hrtick_start(rq, delta);
850 } 850 }
851} 851}
852
853/*
854 * called from enqueue/dequeue and updates the hrtick when the
855 * current task is from our class and nr_running is low enough
856 * to matter.
857 */
858static void hrtick_update(struct rq *rq)
859{
860 struct task_struct *curr = rq->curr;
861
862 if (curr->sched_class != &fair_sched_class)
863 return;
864
865 if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
866 hrtick_start_fair(rq, curr);
867}
852#else /* !CONFIG_SCHED_HRTICK */ 868#else /* !CONFIG_SCHED_HRTICK */
853static inline void 869static inline void
854hrtick_start_fair(struct rq *rq, struct task_struct *p) 870hrtick_start_fair(struct rq *rq, struct task_struct *p)
855{ 871{
856} 872}
873
874static inline void hrtick_update(struct rq *rq)
875{
876}
857#endif 877#endif
858 878
859/* 879/*
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
874 wakeup = 1; 894 wakeup = 1;
875 } 895 }
876 896
877 hrtick_start_fair(rq, rq->curr); 897 hrtick_update(rq);
878} 898}
879 899
880/* 900/*
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
896 sleep = 1; 916 sleep = 1;
897 } 917 }
898 918
899 hrtick_start_fair(rq, rq->curr); 919 hrtick_update(rq);
900} 920}
901 921
902/* 922/*
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p)
1002 1022
1003#ifdef CONFIG_SMP 1023#ifdef CONFIG_SMP
1004 1024
1005static const struct sched_class fair_sched_class;
1006
1007#ifdef CONFIG_FAIR_GROUP_SCHED 1025#ifdef CONFIG_FAIR_GROUP_SCHED
1008/* 1026/*
1009 * effective_load() calculates the load change as seen from the root_task_group 1027 * effective_load() calculates the load change as seen from the root_task_group
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 7c9e8f4a049f..fda016218296 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1)
5SCHED_FEAT(AFFINE_WAKEUPS, 1) 5SCHED_FEAT(AFFINE_WAKEUPS, 1)
6SCHED_FEAT(CACHE_HOT_BUDDY, 1) 6SCHED_FEAT(CACHE_HOT_BUDDY, 1)
7SCHED_FEAT(SYNC_WAKEUPS, 1) 7SCHED_FEAT(SYNC_WAKEUPS, 1)
8SCHED_FEAT(HRTICK, 1) 8SCHED_FEAT(HRTICK, 0)
9SCHED_FEAT(DOUBLE_TICK, 0) 9SCHED_FEAT(DOUBLE_TICK, 0)
10SCHED_FEAT(ASYM_GRAN, 1) 10SCHED_FEAT(ASYM_GRAN, 1)
11SCHED_FEAT(LB_BIAS, 1) 11SCHED_FEAT(LB_BIAS, 1)
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index b8c156979cf2..ee71bec1da66 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -9,7 +9,7 @@
9static int show_schedstat(struct seq_file *seq, void *v) 9static int show_schedstat(struct seq_file *seq, void *v)
10{ 10{
11 int cpu; 11 int cpu;
12 int mask_len = NR_CPUS/32 * 9; 12 int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
13 char *mask_str = kmalloc(mask_len, GFP_KERNEL); 13 char *mask_str = kmalloc(mask_len, GFP_KERNEL);
14 14
15 if (mask_str == NULL) 15 if (mask_str == NULL)
@@ -90,13 +90,20 @@ static int schedstat_open(struct inode *inode, struct file *file)
90 return res; 90 return res;
91} 91}
92 92
93const struct file_operations proc_schedstat_operations = { 93static const struct file_operations proc_schedstat_operations = {
94 .open = schedstat_open, 94 .open = schedstat_open,
95 .read = seq_read, 95 .read = seq_read,
96 .llseek = seq_lseek, 96 .llseek = seq_lseek,
97 .release = single_release, 97 .release = single_release,
98}; 98};
99 99
100static int __init proc_schedstat_init(void)
101{
102 proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
103 return 0;
104}
105module_init(proc_schedstat_init);
106
100/* 107/*
101 * Expects runqueue lock to be held for atomicity of update 108 * Expects runqueue lock to be held for atomicity of update
102 */ 109 */
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index af3c7cea258b..9bc4c00872c9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -37,9 +37,13 @@ struct stop_machine_data {
37/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 37/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
38static unsigned int num_threads; 38static unsigned int num_threads;
39static atomic_t thread_ack; 39static atomic_t thread_ack;
40static struct completion finished;
41static DEFINE_MUTEX(lock); 40static DEFINE_MUTEX(lock);
42 41
42static struct workqueue_struct *stop_machine_wq;
43static struct stop_machine_data active, idle;
44static const cpumask_t *active_cpus;
45static void *stop_machine_work;
46
43static void set_state(enum stopmachine_state newstate) 47static void set_state(enum stopmachine_state newstate)
44{ 48{
45 /* Reset ack counter. */ 49 /* Reset ack counter. */
@@ -51,21 +55,26 @@ static void set_state(enum stopmachine_state newstate)
51/* Last one to ack a state moves to the next state. */ 55/* Last one to ack a state moves to the next state. */
52static void ack_state(void) 56static void ack_state(void)
53{ 57{
54 if (atomic_dec_and_test(&thread_ack)) { 58 if (atomic_dec_and_test(&thread_ack))
55 /* If we're the last one to ack the EXIT, we're finished. */ 59 set_state(state + 1);
56 if (state == STOPMACHINE_EXIT)
57 complete(&finished);
58 else
59 set_state(state + 1);
60 }
61} 60}
62 61
63/* This is the actual thread which stops the CPU. It exits by itself rather 62/* This is the actual function which stops the CPU. It runs
64 * than waiting for kthread_stop(), because it's easier for hotplug CPU. */ 63 * in the context of a dedicated stopmachine workqueue. */
65static int stop_cpu(struct stop_machine_data *smdata) 64static void stop_cpu(struct work_struct *unused)
66{ 65{
67 enum stopmachine_state curstate = STOPMACHINE_NONE; 66 enum stopmachine_state curstate = STOPMACHINE_NONE;
68 67 struct stop_machine_data *smdata = &idle;
68 int cpu = smp_processor_id();
69 int err;
70
71 if (!active_cpus) {
72 if (cpu == first_cpu(cpu_online_map))
73 smdata = &active;
74 } else {
75 if (cpu_isset(cpu, *active_cpus))
76 smdata = &active;
77 }
69 /* Simple state machine */ 78 /* Simple state machine */
70 do { 79 do {
71 /* Chill out and ensure we re-read stopmachine_state. */ 80 /* Chill out and ensure we re-read stopmachine_state. */
@@ -78,9 +87,11 @@ static int stop_cpu(struct stop_machine_data *smdata)
78 hard_irq_disable(); 87 hard_irq_disable();
79 break; 88 break;
80 case STOPMACHINE_RUN: 89 case STOPMACHINE_RUN:
81 /* |= allows error detection if functions on 90 /* On multiple CPUs only a single error code
82 * multiple CPUs. */ 91 * is needed to tell that something failed. */
83 smdata->fnret |= smdata->fn(smdata->data); 92 err = smdata->fn(smdata->data);
93 if (err)
94 smdata->fnret = err;
84 break; 95 break;
85 default: 96 default:
86 break; 97 break;
@@ -90,7 +101,6 @@ static int stop_cpu(struct stop_machine_data *smdata)
90 } while (curstate != STOPMACHINE_EXIT); 101 } while (curstate != STOPMACHINE_EXIT);
91 102
92 local_irq_enable(); 103 local_irq_enable();
93 do_exit(0);
94} 104}
95 105
96/* Callback for CPUs which aren't supposed to do anything. */ 106/* Callback for CPUs which aren't supposed to do anything. */
@@ -101,78 +111,34 @@ static int chill(void *unused)
101 111
102int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
103{ 113{
104 int i, err; 114 struct work_struct *sm_work;
105 struct stop_machine_data active, idle; 115 int i;
106 struct task_struct **threads;
107 116
117 /* Set up initial state. */
118 mutex_lock(&lock);
119 num_threads = num_online_cpus();
120 active_cpus = cpus;
108 active.fn = fn; 121 active.fn = fn;
109 active.data = data; 122 active.data = data;
110 active.fnret = 0; 123 active.fnret = 0;
111 idle.fn = chill; 124 idle.fn = chill;
112 idle.data = NULL; 125 idle.data = NULL;
113 126
114 /* This could be too big for stack on large machines. */
115 threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
116 if (!threads)
117 return -ENOMEM;
118
119 /* Set up initial state. */
120 mutex_lock(&lock);
121 init_completion(&finished);
122 num_threads = num_online_cpus();
123 set_state(STOPMACHINE_PREPARE); 127 set_state(STOPMACHINE_PREPARE);
124 128
125 for_each_online_cpu(i) { 129 /* Schedule the stop_cpu work on all cpus: hold this CPU so one
126 struct stop_machine_data *smdata = &idle;
127 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
128
129 if (!cpus) {
130 if (i == first_cpu(cpu_online_map))
131 smdata = &active;
132 } else {
133 if (cpu_isset(i, *cpus))
134 smdata = &active;
135 }
136
137 threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
138 i);
139 if (IS_ERR(threads[i])) {
140 err = PTR_ERR(threads[i]);
141 threads[i] = NULL;
142 goto kill_threads;
143 }
144
145 /* Place it onto correct cpu. */
146 kthread_bind(threads[i], i);
147
148 /* Make it highest prio. */
149 if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
150 BUG();
151 }
152
153 /* We've created all the threads. Wake them all: hold this CPU so one
154 * doesn't hit this CPU until we're ready. */ 130 * doesn't hit this CPU until we're ready. */
155 get_cpu(); 131 get_cpu();
156 for_each_online_cpu(i) 132 for_each_online_cpu(i) {
157 wake_up_process(threads[i]); 133 sm_work = percpu_ptr(stop_machine_work, i);
158 134 INIT_WORK(sm_work, stop_cpu);
135 queue_work_on(i, stop_machine_wq, sm_work);
136 }
159 /* This will release the thread on our CPU. */ 137 /* This will release the thread on our CPU. */
160 put_cpu(); 138 put_cpu();
161 wait_for_completion(&finished); 139 flush_workqueue(stop_machine_wq);
162 mutex_unlock(&lock); 140 mutex_unlock(&lock);
163
164 kfree(threads);
165
166 return active.fnret; 141 return active.fnret;
167
168kill_threads:
169 for_each_online_cpu(i)
170 if (threads[i])
171 kthread_stop(threads[i]);
172 mutex_unlock(&lock);
173
174 kfree(threads);
175 return err;
176} 142}
177 143
178int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 144int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
@@ -187,3 +153,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
187 return ret; 153 return ret;
188} 154}
189EXPORT_SYMBOL_GPL(stop_machine); 155EXPORT_SYMBOL_GPL(stop_machine);
156
157static int __init stop_machine_init(void)
158{
159 stop_machine_wq = create_rt_workqueue("kstop");
160 stop_machine_work = alloc_percpu(struct work_struct);
161 return 0;
162}
163core_initcall(stop_machine_init);
diff --git a/kernel/sys.c b/kernel/sys.c
index 53879cdae483..31deba8f7d16 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1716,6 +1716,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1716 case PR_SET_TSC: 1716 case PR_SET_TSC:
1717 error = SET_TSC_CTL(arg2); 1717 error = SET_TSC_CTL(arg2);
1718 break; 1718 break;
1719 case PR_GET_TIMERSLACK:
1720 error = current->timer_slack_ns;
1721 break;
1722 case PR_SET_TIMERSLACK:
1723 if (arg2 <= 0)
1724 current->timer_slack_ns =
1725 current->default_timer_slack_ns;
1726 else
1727 current->timer_slack_ns = arg2;
1728 break;
1719 default: 1729 default:
1720 error = -EINVAL; 1730 error = -EINVAL;
1721 break; 1731 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index edb1075f80d2..9d048fa2d902 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = {
276 }, 276 },
277 { 277 {
278 .ctl_name = CTL_UNNUMBERED, 278 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_shares_thresh",
280 .data = &sysctl_sched_shares_thresh,
281 .maxlen = sizeof(unsigned int),
282 .mode = 0644,
283 .proc_handler = &proc_dointvec_minmax,
284 .strategy = &sysctl_intvec,
285 .extra1 = &zero,
286 },
287 {
288 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_child_runs_first", 289 .procname = "sched_child_runs_first",
280 .data = &sysctl_sched_child_runs_first, 290 .data = &sysctl_sched_child_runs_first,
281 .maxlen = sizeof(unsigned int), 291 .maxlen = sizeof(unsigned int),
diff --git a/kernel/time.c b/kernel/time.c
index 6a08660b4fac..d63a4336fad6 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64);
669#endif 669#endif
670 670
671EXPORT_SYMBOL(jiffies); 671EXPORT_SYMBOL(jiffies);
672
673/*
674 * Add two timespec values and do a safety check for overflow.
675 * It's assumed that both values are valid (>= 0)
676 */
677struct timespec timespec_add_safe(const struct timespec lhs,
678 const struct timespec rhs)
679{
680 struct timespec res;
681
682 set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
683 lhs.tv_nsec + rhs.tv_nsec);
684
685 if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
686 res.tv_sec = TIME_T_MAX;
687
688 return res;
689}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 1a20715bfd6e..8ff15e5d486b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -142,8 +142,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
142 time_state = TIME_OOP; 142 time_state = TIME_OOP;
143 printk(KERN_NOTICE "Clock: " 143 printk(KERN_NOTICE "Clock: "
144 "inserting leap second 23:59:60 UTC\n"); 144 "inserting leap second 23:59:60 UTC\n");
145 leap_timer.expires = ktime_add_ns(leap_timer.expires, 145 hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
146 NSEC_PER_SEC);
147 res = HRTIMER_RESTART; 146 res = HRTIMER_RESTART;
148 break; 147 break;
149 case TIME_DEL: 148 case TIME_DEL:
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0581c11fe6c6..5bbb1044f847 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -300,7 +300,7 @@ void tick_nohz_stop_sched_tick(int inidle)
300 goto out; 300 goto out;
301 } 301 }
302 302
303 ts->idle_tick = ts->sched_timer.expires; 303 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
304 ts->tick_stopped = 1; 304 ts->tick_stopped = 1;
305 ts->idle_jiffies = last_jiffies; 305 ts->idle_jiffies = last_jiffies;
306 rcu_enter_nohz(); 306 rcu_enter_nohz();
@@ -380,21 +380,21 @@ ktime_t tick_nohz_get_sleep_length(void)
380static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) 380static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
381{ 381{
382 hrtimer_cancel(&ts->sched_timer); 382 hrtimer_cancel(&ts->sched_timer);
383 ts->sched_timer.expires = ts->idle_tick; 383 hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
384 384
385 while (1) { 385 while (1) {
386 /* Forward the time to expire in the future */ 386 /* Forward the time to expire in the future */
387 hrtimer_forward(&ts->sched_timer, now, tick_period); 387 hrtimer_forward(&ts->sched_timer, now, tick_period);
388 388
389 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 389 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
390 hrtimer_start(&ts->sched_timer, 390 hrtimer_start_expires(&ts->sched_timer,
391 ts->sched_timer.expires,
392 HRTIMER_MODE_ABS); 391 HRTIMER_MODE_ABS);
393 /* Check, if the timer was already in the past */ 392 /* Check, if the timer was already in the past */
394 if (hrtimer_active(&ts->sched_timer)) 393 if (hrtimer_active(&ts->sched_timer))
395 break; 394 break;
396 } else { 395 } else {
397 if (!tick_program_event(ts->sched_timer.expires, 0)) 396 if (!tick_program_event(
397 hrtimer_get_expires(&ts->sched_timer), 0))
398 break; 398 break;
399 } 399 }
400 /* Update jiffies and reread time */ 400 /* Update jiffies and reread time */
@@ -456,14 +456,16 @@ void tick_nohz_restart_sched_tick(void)
456 */ 456 */
457 ts->tick_stopped = 0; 457 ts->tick_stopped = 0;
458 ts->idle_exittime = now; 458 ts->idle_exittime = now;
459
459 tick_nohz_restart(ts, now); 460 tick_nohz_restart(ts, now);
461
460 local_irq_enable(); 462 local_irq_enable();
461} 463}
462 464
463static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) 465static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
464{ 466{
465 hrtimer_forward(&ts->sched_timer, now, tick_period); 467 hrtimer_forward(&ts->sched_timer, now, tick_period);
466 return tick_program_event(ts->sched_timer.expires, 0); 468 return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
467} 469}
468 470
469/* 471/*
@@ -542,7 +544,7 @@ static void tick_nohz_switch_to_nohz(void)
542 next = tick_init_jiffy_update(); 544 next = tick_init_jiffy_update();
543 545
544 for (;;) { 546 for (;;) {
545 ts->sched_timer.expires = next; 547 hrtimer_set_expires(&ts->sched_timer, next);
546 if (!tick_program_event(next, 0)) 548 if (!tick_program_event(next, 0))
547 break; 549 break;
548 next = ktime_add(next, tick_period); 550 next = ktime_add(next, tick_period);
@@ -567,11 +569,21 @@ static void tick_nohz_switch_to_nohz(void)
567static void tick_nohz_kick_tick(int cpu) 569static void tick_nohz_kick_tick(int cpu)
568{ 570{
569 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 571 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
572 ktime_t delta, now;
570 573
571 if (!ts->tick_stopped) 574 if (!ts->tick_stopped)
572 return; 575 return;
573 576
574 tick_nohz_restart(ts, ktime_get()); 577 /*
578 * Do not touch the tick device, when the next expiry is either
579 * already reached or less/equal than the tick period.
580 */
581 now = ktime_get();
582 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
583 if (delta.tv64 <= tick_period.tv64)
584 return;
585
586 tick_nohz_restart(ts, now);
575} 587}
576 588
577#else 589#else
@@ -668,16 +680,15 @@ void tick_setup_sched_timer(void)
668 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; 680 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
669 681
670 /* Get the next period (per cpu) */ 682 /* Get the next period (per cpu) */
671 ts->sched_timer.expires = tick_init_jiffy_update(); 683 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
672 offset = ktime_to_ns(tick_period) >> 1; 684 offset = ktime_to_ns(tick_period) >> 1;
673 do_div(offset, num_possible_cpus()); 685 do_div(offset, num_possible_cpus());
674 offset *= smp_processor_id(); 686 offset *= smp_processor_id();
675 ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); 687 hrtimer_add_expires_ns(&ts->sched_timer, offset);
676 688
677 for (;;) { 689 for (;;) {
678 hrtimer_forward(&ts->sched_timer, now, tick_period); 690 hrtimer_forward(&ts->sched_timer, now, tick_period);
679 hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, 691 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
680 HRTIMER_MODE_ABS);
681 /* Check, if the timer was already in the past */ 692 /* Check, if the timer was already in the past */
682 if (hrtimer_active(&ts->sched_timer)) 693 if (hrtimer_active(&ts->sched_timer))
683 break; 694 break;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index f6426911e35a..a999b92a1277 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -66,9 +66,11 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
66 SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); 66 SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
67#endif 67#endif
68 SEQ_printf(m, "\n"); 68 SEQ_printf(m, "\n");
69 SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", 69 SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
70 (unsigned long long)ktime_to_ns(timer->expires), 70 (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
71 (long long)(ktime_to_ns(timer->expires) - now)); 71 (unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)),
72 (long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now),
73 (long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now));
72} 74}
73 75
74static void 76static void
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 714afad46539..f928f2a87b9b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -62,6 +62,7 @@ struct workqueue_struct {
62 const char *name; 62 const char *name;
63 int singlethread; 63 int singlethread;
64 int freezeable; /* Freeze threads during suspend */ 64 int freezeable; /* Freeze threads during suspend */
65 int rt;
65#ifdef CONFIG_LOCKDEP 66#ifdef CONFIG_LOCKDEP
66 struct lockdep_map lockdep_map; 67 struct lockdep_map lockdep_map;
67#endif 68#endif
@@ -766,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
766 767
767static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 768static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
768{ 769{
770 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
769 struct workqueue_struct *wq = cwq->wq; 771 struct workqueue_struct *wq = cwq->wq;
770 const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; 772 const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
771 struct task_struct *p; 773 struct task_struct *p;
@@ -781,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
781 */ 783 */
782 if (IS_ERR(p)) 784 if (IS_ERR(p))
783 return PTR_ERR(p); 785 return PTR_ERR(p);
784 786 if (cwq->wq->rt)
787 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
785 cwq->thread = p; 788 cwq->thread = p;
786 789
787 return 0; 790 return 0;
@@ -801,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
801struct workqueue_struct *__create_workqueue_key(const char *name, 804struct workqueue_struct *__create_workqueue_key(const char *name,
802 int singlethread, 805 int singlethread,
803 int freezeable, 806 int freezeable,
807 int rt,
804 struct lock_class_key *key, 808 struct lock_class_key *key,
805 const char *lock_name) 809 const char *lock_name)
806{ 810{
@@ -822,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
822 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); 826 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
823 wq->singlethread = singlethread; 827 wq->singlethread = singlethread;
824 wq->freezeable = freezeable; 828 wq->freezeable = freezeable;
829 wq->rt = rt;
825 INIT_LIST_HEAD(&wq->list); 830 INIT_LIST_HEAD(&wq->list);
826 831
827 if (singlethread) { 832 if (singlethread) {