aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/compat.c3
-rw-r--r--kernel/cpuset.c52
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/futex.c176
-rw-r--r--kernel/hrtimer.c9
-rw-r--r--kernel/irq/manage.c49
-rw-r--r--kernel/irq/spurious.c4
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kgdb.c8
-rw-r--r--kernel/kmod.c1
-rw-r--r--kernel/module.c384
-rw-r--r--kernel/posix-cpu-timers.c11
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/sched.c354
-rw-r--r--kernel/sched_clock.c236
-rw-r--r--kernel/sched_debug.c11
-rw-r--r--kernel/sched_fair.c50
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_rt.c9
-rw-r--r--kernel/softirq.c20
-rw-r--r--kernel/time.c62
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/time/ntp.c398
-rw-r--r--kernel/time/timekeeping.c17
-rw-r--r--kernel/timeconst.pl120
-rw-r--r--kernel/workqueue.c6
29 files changed, 1024 insertions, 972 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 188c43223f5..1c9938addb9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
15obj-$(CONFIG_STACKTRACE) += stacktrace.o 15obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/compat.c b/kernel/compat.c
index 4a856a3643b..32c254a8ab9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -955,7 +955,8 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
955 __put_user(txc.jitcnt, &utp->jitcnt) || 955 __put_user(txc.jitcnt, &utp->jitcnt) ||
956 __put_user(txc.calcnt, &utp->calcnt) || 956 __put_user(txc.calcnt, &utp->calcnt) ||
957 __put_user(txc.errcnt, &utp->errcnt) || 957 __put_user(txc.errcnt, &utp->errcnt) ||
958 __put_user(txc.stbcnt, &utp->stbcnt)) 958 __put_user(txc.stbcnt, &utp->stbcnt) ||
959 __put_user(txc.tai, &utp->tai))
959 ret = -EFAULT; 960 ret = -EFAULT;
960 961
961 return ret; 962 return ret;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8da627d3380..86ea9e34e32 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1031,11 +1031,9 @@ int current_cpuset_is_being_rebound(void)
1031 return task_cs(current) == cpuset_being_rebound; 1031 return task_cs(current) == cpuset_being_rebound;
1032} 1032}
1033 1033
1034static int update_relax_domain_level(struct cpuset *cs, char *buf) 1034static int update_relax_domain_level(struct cpuset *cs, s64 val)
1035{ 1035{
1036 int val = simple_strtol(buf, NULL, 10); 1036 if ((int)val < 0)
1037
1038 if (val < 0)
1039 val = -1; 1037 val = -1;
1040 1038
1041 if (val != cs->relax_domain_level) { 1039 if (val != cs->relax_domain_level) {
@@ -1280,9 +1278,6 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
1280 case FILE_MEMLIST: 1278 case FILE_MEMLIST:
1281 retval = update_nodemask(cs, buffer); 1279 retval = update_nodemask(cs, buffer);
1282 break; 1280 break;
1283 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1284 retval = update_relax_domain_level(cs, buffer);
1285 break;
1286 default: 1281 default:
1287 retval = -EINVAL; 1282 retval = -EINVAL;
1288 goto out2; 1283 goto out2;
@@ -1348,6 +1343,30 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1348 return retval; 1343 return retval;
1349} 1344}
1350 1345
1346static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
1347{
1348 int retval = 0;
1349 struct cpuset *cs = cgroup_cs(cgrp);
1350 cpuset_filetype_t type = cft->private;
1351
1352 cgroup_lock();
1353
1354 if (cgroup_is_removed(cgrp)) {
1355 cgroup_unlock();
1356 return -ENODEV;
1357 }
1358 switch (type) {
1359 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1360 retval = update_relax_domain_level(cs, val);
1361 break;
1362 default:
1363 retval = -EINVAL;
1364 break;
1365 }
1366 cgroup_unlock();
1367 return retval;
1368}
1369
1351/* 1370/*
1352 * These ascii lists should be read in a single call, by using a user 1371 * These ascii lists should be read in a single call, by using a user
1353 * buffer large enough to hold the entire map. If read in smaller 1372 * buffer large enough to hold the entire map. If read in smaller
@@ -1406,9 +1425,6 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont,
1406 case FILE_MEMLIST: 1425 case FILE_MEMLIST:
1407 s += cpuset_sprintf_memlist(s, cs); 1426 s += cpuset_sprintf_memlist(s, cs);
1408 break; 1427 break;
1409 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1410 s += sprintf(s, "%d", cs->relax_domain_level);
1411 break;
1412 default: 1428 default:
1413 retval = -EINVAL; 1429 retval = -EINVAL;
1414 goto out; 1430 goto out;
@@ -1449,6 +1465,18 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
1449 } 1465 }
1450} 1466}
1451 1467
1468static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
1469{
1470 struct cpuset *cs = cgroup_cs(cont);
1471 cpuset_filetype_t type = cft->private;
1472 switch (type) {
1473 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1474 return cs->relax_domain_level;
1475 default:
1476 BUG();
1477 }
1478}
1479
1452 1480
1453/* 1481/*
1454 * for the common functions, 'private' gives the type of file 1482 * for the common functions, 'private' gives the type of file
@@ -1499,8 +1527,8 @@ static struct cftype files[] = {
1499 1527
1500 { 1528 {
1501 .name = "sched_relax_domain_level", 1529 .name = "sched_relax_domain_level",
1502 .read_u64 = cpuset_read_u64, 1530 .read_s64 = cpuset_read_s64,
1503 .write_u64 = cpuset_write_u64, 1531 .write_s64 = cpuset_write_s64,
1504 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, 1532 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
1505 }, 1533 },
1506 1534
diff --git a/kernel/exit.c b/kernel/exit.c
index d3ad54677f9..1510f78a0ff 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -19,6 +19,7 @@
19#include <linux/acct.h> 19#include <linux/acct.h>
20#include <linux/tsacct_kern.h> 20#include <linux/tsacct_kern.h>
21#include <linux/file.h> 21#include <linux/file.h>
22#include <linux/fdtable.h>
22#include <linux/binfmts.h> 23#include <linux/binfmts.h>
23#include <linux/nsproxy.h> 24#include <linux/nsproxy.h>
24#include <linux/pid_namespace.h> 25#include <linux/pid_namespace.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 2bb675af4de..933e60ebcca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -22,6 +22,7 @@
22#include <linux/mempolicy.h> 22#include <linux/mempolicy.h>
23#include <linux/sem.h> 23#include <linux/sem.h>
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/fdtable.h>
25#include <linux/key.h> 26#include <linux/key.h>
26#include <linux/binfmts.h> 27#include <linux/binfmts.h>
27#include <linux/mman.h> 28#include <linux/mman.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 98092c9817f..449def8074f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -104,10 +104,6 @@ struct futex_q {
104 /* Key which the futex is hashed on: */ 104 /* Key which the futex is hashed on: */
105 union futex_key key; 105 union futex_key key;
106 106
107 /* For fd, sigio sent using these: */
108 int fd;
109 struct file *filp;
110
111 /* Optional priority inheritance state: */ 107 /* Optional priority inheritance state: */
112 struct futex_pi_state *pi_state; 108 struct futex_pi_state *pi_state;
113 struct task_struct *task; 109 struct task_struct *task;
@@ -126,9 +122,6 @@ struct futex_hash_bucket {
126 122
127static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
128 124
129/* Futex-fs vfsmount entry: */
130static struct vfsmount *futex_mnt;
131
132/* 125/*
133 * Take mm->mmap_sem, when futex is shared 126 * Take mm->mmap_sem, when futex is shared
134 */ 127 */
@@ -610,8 +603,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
610static void wake_futex(struct futex_q *q) 603static void wake_futex(struct futex_q *q)
611{ 604{
612 plist_del(&q->list, &q->list.plist); 605 plist_del(&q->list, &q->list.plist);
613 if (q->filp)
614 send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
615 /* 606 /*
616 * The lock in wake_up_all() is a crucial memory barrier after the 607 * The lock in wake_up_all() is a crucial memory barrier after the
617 * plist_del() and also before assigning to q->lock_ptr. 608 * plist_del() and also before assigning to q->lock_ptr.
@@ -988,14 +979,10 @@ out:
988} 979}
989 980
990/* The key must be already stored in q->key. */ 981/* The key must be already stored in q->key. */
991static inline struct futex_hash_bucket * 982static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
992queue_lock(struct futex_q *q, int fd, struct file *filp)
993{ 983{
994 struct futex_hash_bucket *hb; 984 struct futex_hash_bucket *hb;
995 985
996 q->fd = fd;
997 q->filp = filp;
998
999 init_waitqueue_head(&q->waiters); 986 init_waitqueue_head(&q->waiters);
1000 987
1001 get_futex_key_refs(&q->key); 988 get_futex_key_refs(&q->key);
@@ -1006,7 +993,7 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
1006 return hb; 993 return hb;
1007} 994}
1008 995
1009static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) 996static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1010{ 997{
1011 int prio; 998 int prio;
1012 999
@@ -1041,15 +1028,6 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1041 * exactly once. They are called with the hashed spinlock held. 1028 * exactly once. They are called with the hashed spinlock held.
1042 */ 1029 */
1043 1030
1044/* The key must be already stored in q->key. */
1045static void queue_me(struct futex_q *q, int fd, struct file *filp)
1046{
1047 struct futex_hash_bucket *hb;
1048
1049 hb = queue_lock(q, fd, filp);
1050 __queue_me(q, hb);
1051}
1052
1053/* Return 1 if we were still queued (ie. 0 means we were woken) */ 1031/* Return 1 if we were still queued (ie. 0 means we were woken) */
1054static int unqueue_me(struct futex_q *q) 1032static int unqueue_me(struct futex_q *q)
1055{ 1033{
@@ -1194,7 +1172,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1194 if (unlikely(ret != 0)) 1172 if (unlikely(ret != 0))
1195 goto out_release_sem; 1173 goto out_release_sem;
1196 1174
1197 hb = queue_lock(&q, -1, NULL); 1175 hb = queue_lock(&q);
1198 1176
1199 /* 1177 /*
1200 * Access the page AFTER the futex is queued. 1178 * Access the page AFTER the futex is queued.
@@ -1238,7 +1216,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1238 goto out_unlock_release_sem; 1216 goto out_unlock_release_sem;
1239 1217
1240 /* Only actually queue if *uaddr contained val. */ 1218 /* Only actually queue if *uaddr contained val. */
1241 __queue_me(&q, hb); 1219 queue_me(&q, hb);
1242 1220
1243 /* 1221 /*
1244 * Now the futex is queued and we have checked the data, we 1222 * Now the futex is queued and we have checked the data, we
@@ -1386,7 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1386 goto out_release_sem; 1364 goto out_release_sem;
1387 1365
1388 retry_unlocked: 1366 retry_unlocked:
1389 hb = queue_lock(&q, -1, NULL); 1367 hb = queue_lock(&q);
1390 1368
1391 retry_locked: 1369 retry_locked:
1392 ret = lock_taken = 0; 1370 ret = lock_taken = 0;
@@ -1499,7 +1477,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1499 /* 1477 /*
1500 * Only actually queue now that the atomic ops are done: 1478 * Only actually queue now that the atomic ops are done:
1501 */ 1479 */
1502 __queue_me(&q, hb); 1480 queue_me(&q, hb);
1503 1481
1504 /* 1482 /*
1505 * Now the futex is queued and we have checked the data, we 1483 * Now the futex is queued and we have checked the data, we
@@ -1746,121 +1724,6 @@ pi_faulted:
1746 return ret; 1724 return ret;
1747} 1725}
1748 1726
1749static int futex_close(struct inode *inode, struct file *filp)
1750{
1751 struct futex_q *q = filp->private_data;
1752
1753 unqueue_me(q);
1754 kfree(q);
1755
1756 return 0;
1757}
1758
1759/* This is one-shot: once it's gone off you need a new fd */
1760static unsigned int futex_poll(struct file *filp,
1761 struct poll_table_struct *wait)
1762{
1763 struct futex_q *q = filp->private_data;
1764 int ret = 0;
1765
1766 poll_wait(filp, &q->waiters, wait);
1767
1768 /*
1769 * plist_node_empty() is safe here without any lock.
1770 * q->lock_ptr != 0 is not safe, because of ordering against wakeup.
1771 */
1772 if (plist_node_empty(&q->list))
1773 ret = POLLIN | POLLRDNORM;
1774
1775 return ret;
1776}
1777
1778static const struct file_operations futex_fops = {
1779 .release = futex_close,
1780 .poll = futex_poll,
1781};
1782
1783/*
1784 * Signal allows caller to avoid the race which would occur if they
1785 * set the sigio stuff up afterwards.
1786 */
1787static int futex_fd(u32 __user *uaddr, int signal)
1788{
1789 struct futex_q *q;
1790 struct file *filp;
1791 int ret, err;
1792 struct rw_semaphore *fshared;
1793 static unsigned long printk_interval;
1794
1795 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
1796 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
1797 "will be removed from the kernel in June 2007\n",
1798 current->comm);
1799 }
1800
1801 ret = -EINVAL;
1802 if (!valid_signal(signal))
1803 goto out;
1804
1805 ret = get_unused_fd();
1806 if (ret < 0)
1807 goto out;
1808 filp = get_empty_filp();
1809 if (!filp) {
1810 put_unused_fd(ret);
1811 ret = -ENFILE;
1812 goto out;
1813 }
1814 filp->f_op = &futex_fops;
1815 filp->f_path.mnt = mntget(futex_mnt);
1816 filp->f_path.dentry = dget(futex_mnt->mnt_root);
1817 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
1818
1819 if (signal) {
1820 err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
1821 if (err < 0) {
1822 goto error;
1823 }
1824 filp->f_owner.signum = signal;
1825 }
1826
1827 q = kmalloc(sizeof(*q), GFP_KERNEL);
1828 if (!q) {
1829 err = -ENOMEM;
1830 goto error;
1831 }
1832 q->pi_state = NULL;
1833
1834 fshared = &current->mm->mmap_sem;
1835 down_read(fshared);
1836 err = get_futex_key(uaddr, fshared, &q->key);
1837
1838 if (unlikely(err != 0)) {
1839 up_read(fshared);
1840 kfree(q);
1841 goto error;
1842 }
1843
1844 /*
1845 * queue_me() must be called before releasing mmap_sem, because
1846 * key->shared.inode needs to be referenced while holding it.
1847 */
1848 filp->private_data = q;
1849
1850 queue_me(q, ret, filp);
1851 up_read(fshared);
1852
1853 /* Now we map fd to filp, so userspace can access it */
1854 fd_install(ret, filp);
1855out:
1856 return ret;
1857error:
1858 put_unused_fd(ret);
1859 put_filp(filp);
1860 ret = err;
1861 goto out;
1862}
1863
1864/* 1727/*
1865 * Support for robust futexes: the kernel cleans up held futexes at 1728 * Support for robust futexes: the kernel cleans up held futexes at
1866 * thread exit time. 1729 * thread exit time.
@@ -2092,10 +1955,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2092 case FUTEX_WAKE_BITSET: 1955 case FUTEX_WAKE_BITSET:
2093 ret = futex_wake(uaddr, fshared, val, val3); 1956 ret = futex_wake(uaddr, fshared, val, val3);
2094 break; 1957 break;
2095 case FUTEX_FD:
2096 /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
2097 ret = futex_fd(uaddr, val);
2098 break;
2099 case FUTEX_REQUEUE: 1958 case FUTEX_REQUEUE:
2100 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); 1959 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
2101 break; 1960 break;
@@ -2156,19 +2015,6 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
2156 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 2015 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2157} 2016}
2158 2017
2159static int futexfs_get_sb(struct file_system_type *fs_type,
2160 int flags, const char *dev_name, void *data,
2161 struct vfsmount *mnt)
2162{
2163 return get_sb_pseudo(fs_type, "futex", NULL, FUTEXFS_SUPER_MAGIC, mnt);
2164}
2165
2166static struct file_system_type futex_fs_type = {
2167 .name = "futexfs",
2168 .get_sb = futexfs_get_sb,
2169 .kill_sb = kill_anon_super,
2170};
2171
2172static int __init futex_init(void) 2018static int __init futex_init(void)
2173{ 2019{
2174 u32 curval; 2020 u32 curval;
@@ -2193,16 +2039,6 @@ static int __init futex_init(void)
2193 spin_lock_init(&futex_queues[i].lock); 2039 spin_lock_init(&futex_queues[i].lock);
2194 } 2040 }
2195 2041
2196 i = register_filesystem(&futex_fs_type);
2197 if (i)
2198 return i;
2199
2200 futex_mnt = kern_mount(&futex_fs_type);
2201 if (IS_ERR(futex_mnt)) {
2202 unregister_filesystem(&futex_fs_type);
2203 return PTR_ERR(futex_mnt);
2204 }
2205
2206 return 0; 2042 return 0;
2207} 2043}
2208__initcall(futex_init); 2044__initcall(futex_init);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9af1d6a8095..421be5fe5cc 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -154,15 +154,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
154} 154}
155 155
156/* 156/*
157 * Helper function to check, whether the timer is running the callback
158 * function
159 */
160static inline int hrtimer_callback_running(struct hrtimer *timer)
161{
162 return timer->state & HRTIMER_STATE_CALLBACK;
163}
164
165/*
166 * Functions and macros which are different for UP/SMP systems are kept in a 157 * Functions and macros which are different for UP/SMP systems are kept in a
167 * single place 158 * single place
168 */ 159 */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 46e4ad1723f..46d6611a33b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -150,6 +150,26 @@ void disable_irq(unsigned int irq)
150} 150}
151EXPORT_SYMBOL(disable_irq); 151EXPORT_SYMBOL(disable_irq);
152 152
153static void __enable_irq(struct irq_desc *desc, unsigned int irq)
154{
155 switch (desc->depth) {
156 case 0:
157 printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
158 WARN_ON(1);
159 break;
160 case 1: {
161 unsigned int status = desc->status & ~IRQ_DISABLED;
162
163 /* Prevent probing on this irq: */
164 desc->status = status | IRQ_NOPROBE;
165 check_irq_resend(desc, irq);
166 /* fall-through */
167 }
168 default:
169 desc->depth--;
170 }
171}
172
153/** 173/**
154 * enable_irq - enable handling of an irq 174 * enable_irq - enable handling of an irq
155 * @irq: Interrupt to enable 175 * @irq: Interrupt to enable
@@ -169,22 +189,7 @@ void enable_irq(unsigned int irq)
169 return; 189 return;
170 190
171 spin_lock_irqsave(&desc->lock, flags); 191 spin_lock_irqsave(&desc->lock, flags);
172 switch (desc->depth) { 192 __enable_irq(desc, irq);
173 case 0:
174 printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
175 WARN_ON(1);
176 break;
177 case 1: {
178 unsigned int status = desc->status & ~IRQ_DISABLED;
179
180 /* Prevent probing on this irq: */
181 desc->status = status | IRQ_NOPROBE;
182 check_irq_resend(desc, irq);
183 /* fall-through */
184 }
185 default:
186 desc->depth--;
187 }
188 spin_unlock_irqrestore(&desc->lock, flags); 193 spin_unlock_irqrestore(&desc->lock, flags);
189} 194}
190EXPORT_SYMBOL(enable_irq); 195EXPORT_SYMBOL(enable_irq);
@@ -365,7 +370,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
365 compat_irq_chip_set_default_handler(desc); 370 compat_irq_chip_set_default_handler(desc);
366 371
367 desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | 372 desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
368 IRQ_INPROGRESS); 373 IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
369 374
370 if (!(desc->status & IRQ_NOAUTOEN)) { 375 if (!(desc->status & IRQ_NOAUTOEN)) {
371 desc->depth = 0; 376 desc->depth = 0;
@@ -381,6 +386,16 @@ int setup_irq(unsigned int irq, struct irqaction *new)
381 /* Reset broken irq detection when installing new handler */ 386 /* Reset broken irq detection when installing new handler */
382 desc->irq_count = 0; 387 desc->irq_count = 0;
383 desc->irqs_unhandled = 0; 388 desc->irqs_unhandled = 0;
389
390 /*
391 * Check whether we disabled the irq via the spurious handler
392 * before. Reenable it and give it another chance.
393 */
394 if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) {
395 desc->status &= ~IRQ_SPURIOUS_DISABLED;
396 __enable_irq(desc, irq);
397 }
398
384 spin_unlock_irqrestore(&desc->lock, flags); 399 spin_unlock_irqrestore(&desc->lock, flags);
385 400
386 new->irq = irq; 401 new->irq = irq;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 088dabbf2d6..c66d3f10e85 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -209,8 +209,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
209 * Now kill the IRQ 209 * Now kill the IRQ
210 */ 210 */
211 printk(KERN_EMERG "Disabling IRQ #%d\n", irq); 211 printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
212 desc->status |= IRQ_DISABLED; 212 desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
213 desc->depth = 1; 213 desc->depth++;
214 desc->chip->disable(irq); 214 desc->chip->disable(irq);
215 } 215 }
216 desc->irqs_unhandled = 0; 216 desc->irqs_unhandled = 0;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index cb85c79989b..1c5fcacbcf3 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1217,7 +1217,7 @@ static int __init parse_crashkernel_mem(char *cmdline,
1217 } 1217 }
1218 1218
1219 /* match ? */ 1219 /* match ? */
1220 if (system_ram >= start && system_ram <= end) { 1220 if (system_ram >= start && system_ram < end) {
1221 *crash_size = size; 1221 *crash_size = size;
1222 break; 1222 break;
1223 } 1223 }
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 1bd0ec1c80b..39e31a036f5 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -61,7 +61,7 @@ struct kgdb_state {
61 int err_code; 61 int err_code;
62 int cpu; 62 int cpu;
63 int pass_exception; 63 int pass_exception;
64 long threadid; 64 unsigned long threadid;
65 long kgdb_usethreadid; 65 long kgdb_usethreadid;
66 struct pt_regs *linux_regs; 66 struct pt_regs *linux_regs;
67}; 67};
@@ -146,7 +146,7 @@ atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
146 * the other CPUs might interfere with your debugging context, so 146 * the other CPUs might interfere with your debugging context, so
147 * use this with care: 147 * use this with care:
148 */ 148 */
149int kgdb_do_roundup = 1; 149static int kgdb_do_roundup = 1;
150 150
151static int __init opt_nokgdbroundup(char *str) 151static int __init opt_nokgdbroundup(char *str)
152{ 152{
@@ -438,7 +438,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
438 * While we find nice hex chars, build a long_val. 438 * While we find nice hex chars, build a long_val.
439 * Return number of chars processed. 439 * Return number of chars processed.
440 */ 440 */
441int kgdb_hex2long(char **ptr, long *long_val) 441int kgdb_hex2long(char **ptr, unsigned long *long_val)
442{ 442{
443 int hex_val; 443 int hex_val;
444 int num = 0; 444 int num = 0;
@@ -709,7 +709,7 @@ int kgdb_isremovedbreak(unsigned long addr)
709 return 0; 709 return 0;
710} 710}
711 711
712int remove_all_break(void) 712static int remove_all_break(void)
713{ 713{
714 unsigned long addr; 714 unsigned long addr;
715 int error; 715 int error;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index e2764047ec0..8df97d3dfda 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -27,6 +27,7 @@
27#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
28#include <linux/completion.h> 28#include <linux/completion.h>
29#include <linux/file.h> 29#include <linux/file.h>
30#include <linux/fdtable.h>
30#include <linux/workqueue.h> 31#include <linux/workqueue.h>
31#include <linux/security.h> 32#include <linux/security.h>
32#include <linux/mount.h> 33#include <linux/mount.h>
diff --git a/kernel/module.c b/kernel/module.c
index 8d6cccc6c3c..f5e9491ef7a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -164,131 +164,140 @@ static const struct kernel_symbol *lookup_symbol(const char *name,
164 return NULL; 164 return NULL;
165} 165}
166 166
167static void printk_unused_warning(const char *name) 167static bool always_ok(bool gplok, bool warn, const char *name)
168{ 168{
169 printk(KERN_WARNING "Symbol %s is marked as UNUSED, " 169 return true;
170 "however this module is using it.\n", name);
171 printk(KERN_WARNING "This symbol will go away in the future.\n");
172 printk(KERN_WARNING "Please evalute if this is the right api to use, "
173 "and if it really is, submit a report the linux kernel "
174 "mailinglist together with submitting your code for "
175 "inclusion.\n");
176} 170}
177 171
178/* Find a symbol, return value, crc and module which owns it */ 172static bool printk_unused_warning(bool gplok, bool warn, const char *name)
179static unsigned long __find_symbol(const char *name,
180 struct module **owner,
181 const unsigned long **crc,
182 int gplok)
183{ 173{
184 struct module *mod; 174 if (warn) {
185 const struct kernel_symbol *ks; 175 printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
186 176 "however this module is using it.\n", name);
187 /* Core kernel first. */ 177 printk(KERN_WARNING
188 *owner = NULL; 178 "This symbol will go away in the future.\n");
189 ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); 179 printk(KERN_WARNING
190 if (ks) { 180 "Please evalute if this is the right api to use and if "
191 *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); 181 "it really is, submit a report the linux kernel "
192 return ks->value; 182 "mailinglist together with submitting your code for "
193 } 183 "inclusion.\n");
194 if (gplok) {
195 ks = lookup_symbol(name, __start___ksymtab_gpl,
196 __stop___ksymtab_gpl);
197 if (ks) {
198 *crc = symversion(__start___kcrctab_gpl,
199 (ks - __start___ksymtab_gpl));
200 return ks->value;
201 }
202 } 184 }
203 ks = lookup_symbol(name, __start___ksymtab_gpl_future, 185 return true;
204 __stop___ksymtab_gpl_future); 186}
205 if (ks) { 187
206 if (!gplok) { 188static bool gpl_only_unused_warning(bool gplok, bool warn, const char *name)
207 printk(KERN_WARNING "Symbol %s is being used " 189{
208 "by a non-GPL module, which will not " 190 if (!gplok)
209 "be allowed in the future\n", name); 191 return false;
210 printk(KERN_WARNING "Please see the file " 192 return printk_unused_warning(gplok, warn, name);
211 "Documentation/feature-removal-schedule.txt " 193}
212 "in the kernel source tree for more " 194
213 "details.\n"); 195static bool gpl_only(bool gplok, bool warn, const char *name)
214 } 196{
215 *crc = symversion(__start___kcrctab_gpl_future, 197 return gplok;
216 (ks - __start___ksymtab_gpl_future)); 198}
217 return ks->value; 199
200static bool warn_if_not_gpl(bool gplok, bool warn, const char *name)
201{
202 if (!gplok && warn) {
203 printk(KERN_WARNING "Symbol %s is being used "
204 "by a non-GPL module, which will not "
205 "be allowed in the future\n", name);
206 printk(KERN_WARNING "Please see the file "
207 "Documentation/feature-removal-schedule.txt "
208 "in the kernel source tree for more details.\n");
218 } 209 }
210 return true;
211}
219 212
220 ks = lookup_symbol(name, __start___ksymtab_unused, 213struct symsearch {
221 __stop___ksymtab_unused); 214 const struct kernel_symbol *start, *stop;
222 if (ks) { 215 const unsigned long *crcs;
223 printk_unused_warning(name); 216 bool (*check)(bool gplok, bool warn, const char *name);
224 *crc = symversion(__start___kcrctab_unused, 217};
225 (ks - __start___ksymtab_unused)); 218
226 return ks->value; 219/* Look through this array of symbol tables for a symbol match which
220 * passes the check function. */
221static const struct kernel_symbol *search_symarrays(const struct symsearch *arr,
222 unsigned int num,
223 const char *name,
224 bool gplok,
225 bool warn,
226 const unsigned long **crc)
227{
228 unsigned int i;
229 const struct kernel_symbol *ks;
230
231 for (i = 0; i < num; i++) {
232 ks = lookup_symbol(name, arr[i].start, arr[i].stop);
233 if (!ks || !arr[i].check(gplok, warn, name))
234 continue;
235
236 if (crc)
237 *crc = symversion(arr[i].crcs, ks - arr[i].start);
238 return ks;
227 } 239 }
240 return NULL;
241}
228 242
229 if (gplok) 243/* Find a symbol, return value, (optional) crc and (optional) module
230 ks = lookup_symbol(name, __start___ksymtab_unused_gpl, 244 * which owns it */
231 __stop___ksymtab_unused_gpl); 245static unsigned long find_symbol(const char *name,
246 struct module **owner,
247 const unsigned long **crc,
248 bool gplok,
249 bool warn)
250{
251 struct module *mod;
252 const struct kernel_symbol *ks;
253 const struct symsearch arr[] = {
254 { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
255 always_ok },
256 { __start___ksymtab_gpl, __stop___ksymtab_gpl,
257 __start___kcrctab_gpl, gpl_only },
258 { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
259 __start___kcrctab_gpl_future, warn_if_not_gpl },
260 { __start___ksymtab_unused, __stop___ksymtab_unused,
261 __start___kcrctab_unused, printk_unused_warning },
262 { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
263 __start___kcrctab_unused_gpl, gpl_only_unused_warning },
264 };
265
266 /* Core kernel first. */
267 ks = search_symarrays(arr, ARRAY_SIZE(arr), name, gplok, warn, crc);
232 if (ks) { 268 if (ks) {
233 printk_unused_warning(name); 269 if (owner)
234 *crc = symversion(__start___kcrctab_unused_gpl, 270 *owner = NULL;
235 (ks - __start___ksymtab_unused_gpl));
236 return ks->value; 271 return ks->value;
237 } 272 }
238 273
239 /* Now try modules. */ 274 /* Now try modules. */
240 list_for_each_entry(mod, &modules, list) { 275 list_for_each_entry(mod, &modules, list) {
241 *owner = mod; 276 struct symsearch arr[] = {
242 ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); 277 { mod->syms, mod->syms + mod->num_syms, mod->crcs,
278 always_ok },
279 { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
280 mod->gpl_crcs, gpl_only },
281 { mod->gpl_future_syms,
282 mod->gpl_future_syms + mod->num_gpl_future_syms,
283 mod->gpl_future_crcs, warn_if_not_gpl },
284 { mod->unused_syms,
285 mod->unused_syms + mod->num_unused_syms,
286 mod->unused_crcs, printk_unused_warning },
287 { mod->unused_gpl_syms,
288 mod->unused_gpl_syms + mod->num_unused_gpl_syms,
289 mod->unused_gpl_crcs, gpl_only_unused_warning },
290 };
291
292 ks = search_symarrays(arr, ARRAY_SIZE(arr),
293 name, gplok, warn, crc);
243 if (ks) { 294 if (ks) {
244 *crc = symversion(mod->crcs, (ks - mod->syms)); 295 if (owner)
245 return ks->value; 296 *owner = mod;
246 }
247
248 if (gplok) {
249 ks = lookup_symbol(name, mod->gpl_syms,
250 mod->gpl_syms + mod->num_gpl_syms);
251 if (ks) {
252 *crc = symversion(mod->gpl_crcs,
253 (ks - mod->gpl_syms));
254 return ks->value;
255 }
256 }
257 ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms);
258 if (ks) {
259 printk_unused_warning(name);
260 *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms));
261 return ks->value;
262 }
263
264 if (gplok) {
265 ks = lookup_symbol(name, mod->unused_gpl_syms,
266 mod->unused_gpl_syms + mod->num_unused_gpl_syms);
267 if (ks) {
268 printk_unused_warning(name);
269 *crc = symversion(mod->unused_gpl_crcs,
270 (ks - mod->unused_gpl_syms));
271 return ks->value;
272 }
273 }
274 ks = lookup_symbol(name, mod->gpl_future_syms,
275 (mod->gpl_future_syms +
276 mod->num_gpl_future_syms));
277 if (ks) {
278 if (!gplok) {
279 printk(KERN_WARNING "Symbol %s is being used "
280 "by a non-GPL module, which will not "
281 "be allowed in the future\n", name);
282 printk(KERN_WARNING "Please see the file "
283 "Documentation/feature-removal-schedule.txt "
284 "in the kernel source tree for more "
285 "details.\n");
286 }
287 *crc = symversion(mod->gpl_future_crcs,
288 (ks - mod->gpl_future_syms));
289 return ks->value; 297 return ks->value;
290 } 298 }
291 } 299 }
300
292 DEBUGP("Failed to find symbol %s\n", name); 301 DEBUGP("Failed to find symbol %s\n", name);
293 return -ENOENT; 302 return -ENOENT;
294} 303}
@@ -736,12 +745,13 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
736 if (!forced && module_refcount(mod) != 0) 745 if (!forced && module_refcount(mod) != 0)
737 wait_for_zero_refcount(mod); 746 wait_for_zero_refcount(mod);
738 747
748 mutex_unlock(&module_mutex);
739 /* Final destruction now noone is using it. */ 749 /* Final destruction now noone is using it. */
740 if (mod->exit != NULL) { 750 if (mod->exit != NULL)
741 mutex_unlock(&module_mutex);
742 mod->exit(); 751 mod->exit();
743 mutex_lock(&module_mutex); 752 blocking_notifier_call_chain(&module_notify_list,
744 } 753 MODULE_STATE_GOING, mod);
754 mutex_lock(&module_mutex);
745 /* Store the name of the last unloaded module for diagnostic purposes */ 755 /* Store the name of the last unloaded module for diagnostic purposes */
746 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); 756 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
747 free_module(mod); 757 free_module(mod);
@@ -777,10 +787,9 @@ static void print_unload_info(struct seq_file *m, struct module *mod)
777void __symbol_put(const char *symbol) 787void __symbol_put(const char *symbol)
778{ 788{
779 struct module *owner; 789 struct module *owner;
780 const unsigned long *crc;
781 790
782 preempt_disable(); 791 preempt_disable();
783 if (IS_ERR_VALUE(__find_symbol(symbol, &owner, &crc, 1))) 792 if (IS_ERR_VALUE(find_symbol(symbol, &owner, NULL, true, false)))
784 BUG(); 793 BUG();
785 module_put(owner); 794 module_put(owner);
786 preempt_enable(); 795 preempt_enable();
@@ -881,6 +890,19 @@ static struct module_attribute *modinfo_attrs[] = {
881 890
882static const char vermagic[] = VERMAGIC_STRING; 891static const char vermagic[] = VERMAGIC_STRING;
883 892
893static int try_to_force_load(struct module *mod, const char *symname)
894{
895#ifdef CONFIG_MODULE_FORCE_LOAD
896 if (!(tainted & TAINT_FORCED_MODULE))
897 printk("%s: no version for \"%s\" found: kernel tainted.\n",
898 mod->name, symname);
899 add_taint_module(mod, TAINT_FORCED_MODULE);
900 return 0;
901#else
902 return -ENOEXEC;
903#endif
904}
905
884#ifdef CONFIG_MODVERSIONS 906#ifdef CONFIG_MODVERSIONS
885static int check_version(Elf_Shdr *sechdrs, 907static int check_version(Elf_Shdr *sechdrs,
886 unsigned int versindex, 908 unsigned int versindex,
@@ -895,6 +917,10 @@ static int check_version(Elf_Shdr *sechdrs,
895 if (!crc) 917 if (!crc)
896 return 1; 918 return 1;
897 919
920 /* No versions at all? modprobe --force does this. */
921 if (versindex == 0)
922 return try_to_force_load(mod, symname) == 0;
923
898 versions = (void *) sechdrs[versindex].sh_addr; 924 versions = (void *) sechdrs[versindex].sh_addr;
899 num_versions = sechdrs[versindex].sh_size 925 num_versions = sechdrs[versindex].sh_size
900 / sizeof(struct modversion_info); 926 / sizeof(struct modversion_info);
@@ -905,18 +931,19 @@ static int check_version(Elf_Shdr *sechdrs,
905 931
906 if (versions[i].crc == *crc) 932 if (versions[i].crc == *crc)
907 return 1; 933 return 1;
908 printk("%s: disagrees about version of symbol %s\n",
909 mod->name, symname);
910 DEBUGP("Found checksum %lX vs module %lX\n", 934 DEBUGP("Found checksum %lX vs module %lX\n",
911 *crc, versions[i].crc); 935 *crc, versions[i].crc);
912 return 0; 936 goto bad_version;
913 } 937 }
914 /* Not in module's version table. OK, but that taints the kernel. */ 938
915 if (!(tainted & TAINT_FORCED_MODULE)) 939 printk(KERN_WARNING "%s: no symbol version for %s\n",
916 printk("%s: no version for \"%s\" found: kernel tainted.\n", 940 mod->name, symname);
917 mod->name, symname); 941 return 0;
918 add_taint_module(mod, TAINT_FORCED_MODULE); 942
919 return 1; 943bad_version:
944 printk("%s: disagrees about version of symbol %s\n",
945 mod->name, symname);
946 return 0;
920} 947}
921 948
922static inline int check_modstruct_version(Elf_Shdr *sechdrs, 949static inline int check_modstruct_version(Elf_Shdr *sechdrs,
@@ -924,20 +951,20 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
924 struct module *mod) 951 struct module *mod)
925{ 952{
926 const unsigned long *crc; 953 const unsigned long *crc;
927 struct module *owner;
928 954
929 if (IS_ERR_VALUE(__find_symbol("struct_module", 955 if (IS_ERR_VALUE(find_symbol("struct_module", NULL, &crc, true, false)))
930 &owner, &crc, 1)))
931 BUG(); 956 BUG();
932 return check_version(sechdrs, versindex, "struct_module", mod, 957 return check_version(sechdrs, versindex, "struct_module", mod, crc);
933 crc);
934} 958}
935 959
936/* First part is kernel version, which we ignore. */ 960/* First part is kernel version, which we ignore if module has crcs. */
937static inline int same_magic(const char *amagic, const char *bmagic) 961static inline int same_magic(const char *amagic, const char *bmagic,
962 bool has_crcs)
938{ 963{
939 amagic += strcspn(amagic, " "); 964 if (has_crcs) {
940 bmagic += strcspn(bmagic, " "); 965 amagic += strcspn(amagic, " ");
966 bmagic += strcspn(bmagic, " ");
967 }
941 return strcmp(amagic, bmagic) == 0; 968 return strcmp(amagic, bmagic) == 0;
942} 969}
943#else 970#else
@@ -957,7 +984,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
957 return 1; 984 return 1;
958} 985}
959 986
960static inline int same_magic(const char *amagic, const char *bmagic) 987static inline int same_magic(const char *amagic, const char *bmagic,
988 bool has_crcs)
961{ 989{
962 return strcmp(amagic, bmagic) == 0; 990 return strcmp(amagic, bmagic) == 0;
963} 991}
@@ -974,8 +1002,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
974 unsigned long ret; 1002 unsigned long ret;
975 const unsigned long *crc; 1003 const unsigned long *crc;
976 1004
977 ret = __find_symbol(name, &owner, &crc, 1005 ret = find_symbol(name, &owner, &crc,
978 !(mod->taints & TAINT_PROPRIETARY_MODULE)); 1006 !(mod->taints & TAINT_PROPRIETARY_MODULE), true);
979 if (!IS_ERR_VALUE(ret)) { 1007 if (!IS_ERR_VALUE(ret)) {
980 /* use_module can fail due to OOM, 1008 /* use_module can fail due to OOM,
981 or module initialization or unloading */ 1009 or module initialization or unloading */
@@ -991,6 +1019,20 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
991 * J. Corbet <corbet@lwn.net> 1019 * J. Corbet <corbet@lwn.net>
992 */ 1020 */
993#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) 1021#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
1022struct module_sect_attr
1023{
1024 struct module_attribute mattr;
1025 char *name;
1026 unsigned long address;
1027};
1028
1029struct module_sect_attrs
1030{
1031 struct attribute_group grp;
1032 unsigned int nsections;
1033 struct module_sect_attr attrs[0];
1034};
1035
994static ssize_t module_sect_show(struct module_attribute *mattr, 1036static ssize_t module_sect_show(struct module_attribute *mattr,
995 struct module *mod, char *buf) 1037 struct module *mod, char *buf)
996{ 1038{
@@ -1001,7 +1043,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
1001 1043
1002static void free_sect_attrs(struct module_sect_attrs *sect_attrs) 1044static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
1003{ 1045{
1004 int section; 1046 unsigned int section;
1005 1047
1006 for (section = 0; section < sect_attrs->nsections; section++) 1048 for (section = 0; section < sect_attrs->nsections; section++)
1007 kfree(sect_attrs->attrs[section].name); 1049 kfree(sect_attrs->attrs[section].name);
@@ -1362,10 +1404,9 @@ void *__symbol_get(const char *symbol)
1362{ 1404{
1363 struct module *owner; 1405 struct module *owner;
1364 unsigned long value; 1406 unsigned long value;
1365 const unsigned long *crc;
1366 1407
1367 preempt_disable(); 1408 preempt_disable();
1368 value = __find_symbol(symbol, &owner, &crc, 1); 1409 value = find_symbol(symbol, &owner, NULL, true, true);
1369 if (IS_ERR_VALUE(value)) 1410 if (IS_ERR_VALUE(value))
1370 value = 0; 1411 value = 0;
1371 else if (strong_try_module_get(owner)) 1412 else if (strong_try_module_get(owner))
@@ -1382,33 +1423,33 @@ EXPORT_SYMBOL_GPL(__symbol_get);
1382 */ 1423 */
1383static int verify_export_symbols(struct module *mod) 1424static int verify_export_symbols(struct module *mod)
1384{ 1425{
1385 const char *name = NULL; 1426 unsigned int i;
1386 unsigned long i, ret = 0;
1387 struct module *owner; 1427 struct module *owner;
1388 const unsigned long *crc; 1428 const struct kernel_symbol *s;
1389 1429 struct {
1390 for (i = 0; i < mod->num_syms; i++) 1430 const struct kernel_symbol *sym;
1391 if (!IS_ERR_VALUE(__find_symbol(mod->syms[i].name, 1431 unsigned int num;
1392 &owner, &crc, 1))) { 1432 } arr[] = {
1393 name = mod->syms[i].name; 1433 { mod->syms, mod->num_syms },
1394 ret = -ENOEXEC; 1434 { mod->gpl_syms, mod->num_gpl_syms },
1395 goto dup; 1435 { mod->gpl_future_syms, mod->num_gpl_future_syms },
1396 } 1436 { mod->unused_syms, mod->num_unused_syms },
1437 { mod->unused_gpl_syms, mod->num_unused_gpl_syms },
1438 };
1397 1439
1398 for (i = 0; i < mod->num_gpl_syms; i++) 1440 for (i = 0; i < ARRAY_SIZE(arr); i++) {
1399 if (!IS_ERR_VALUE(__find_symbol(mod->gpl_syms[i].name, 1441 for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
1400 &owner, &crc, 1))) { 1442 if (!IS_ERR_VALUE(find_symbol(s->name, &owner,
1401 name = mod->gpl_syms[i].name; 1443 NULL, true, false))) {
1402 ret = -ENOEXEC; 1444 printk(KERN_ERR
1403 goto dup; 1445 "%s: exports duplicate symbol %s"
1446 " (owned by %s)\n",
1447 mod->name, s->name, module_name(owner));
1448 return -ENOEXEC;
1449 }
1404 } 1450 }
1405 1451 }
1406dup: 1452 return 0;
1407 if (ret)
1408 printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n",
1409 mod->name, name, module_name(owner));
1410
1411 return ret;
1412} 1453}
1413 1454
1414/* Change all symbols so that st_value encodes the pointer directly. */ 1455/* Change all symbols so that st_value encodes the pointer directly. */
@@ -1814,8 +1855,9 @@ static struct module *load_module(void __user *umod,
1814 unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); 1855 unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
1815#endif 1856#endif
1816 1857
1817 /* Don't keep modinfo section */ 1858 /* Don't keep modinfo and version sections. */
1818 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 1859 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
1860 sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
1819#ifdef CONFIG_KALLSYMS 1861#ifdef CONFIG_KALLSYMS
1820 /* Keep symbol and string tables for decoding later. */ 1862 /* Keep symbol and string tables for decoding later. */
1821 sechdrs[symindex].sh_flags |= SHF_ALLOC; 1863 sechdrs[symindex].sh_flags |= SHF_ALLOC;
@@ -1833,10 +1875,10 @@ static struct module *load_module(void __user *umod,
1833 modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); 1875 modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
1834 /* This is allowed: modprobe --force will invalidate it. */ 1876 /* This is allowed: modprobe --force will invalidate it. */
1835 if (!modmagic) { 1877 if (!modmagic) {
1836 add_taint_module(mod, TAINT_FORCED_MODULE); 1878 err = try_to_force_load(mod, "magic");
1837 printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", 1879 if (err)
1838 mod->name); 1880 goto free_hdr;
1839 } else if (!same_magic(modmagic, vermagic)) { 1881 } else if (!same_magic(modmagic, vermagic, versindex)) {
1840 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", 1882 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
1841 mod->name, modmagic, vermagic); 1883 mod->name, modmagic, vermagic);
1842 err = -ENOEXEC; 1884 err = -ENOEXEC;
@@ -1977,7 +2019,8 @@ static struct module *load_module(void __user *umod,
1977 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; 2019 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
1978 mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; 2020 mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr;
1979 if (unusedgplcrcindex) 2021 if (unusedgplcrcindex)
1980 mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr; 2022 mod->unused_gpl_crcs
2023 = (void *)sechdrs[unusedgplcrcindex].sh_addr;
1981 2024
1982#ifdef CONFIG_MODVERSIONS 2025#ifdef CONFIG_MODVERSIONS
1983 if ((mod->num_syms && !crcindex) || 2026 if ((mod->num_syms && !crcindex) ||
@@ -1985,9 +2028,10 @@ static struct module *load_module(void __user *umod,
1985 (mod->num_gpl_future_syms && !gplfuturecrcindex) || 2028 (mod->num_gpl_future_syms && !gplfuturecrcindex) ||
1986 (mod->num_unused_syms && !unusedcrcindex) || 2029 (mod->num_unused_syms && !unusedcrcindex) ||
1987 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { 2030 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
1988 printk(KERN_WARNING "%s: No versions for exported symbols." 2031 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
1989 " Tainting kernel.\n", mod->name); 2032 err = try_to_force_load(mod, "nocrc");
1990 add_taint_module(mod, TAINT_FORCED_MODULE); 2033 if (err)
2034 goto cleanup;
1991 } 2035 }
1992#endif 2036#endif
1993 markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); 2037 markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
@@ -2171,6 +2215,8 @@ sys_init_module(void __user *umod,
2171 mod->state = MODULE_STATE_GOING; 2215 mod->state = MODULE_STATE_GOING;
2172 synchronize_sched(); 2216 synchronize_sched();
2173 module_put(mod); 2217 module_put(mod);
2218 blocking_notifier_call_chain(&module_notify_list,
2219 MODULE_STATE_GOING, mod);
2174 mutex_lock(&module_mutex); 2220 mutex_lock(&module_mutex);
2175 free_module(mod); 2221 free_module(mod);
2176 mutex_unlock(&module_mutex); 2222 mutex_unlock(&module_mutex);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index ae5c6c147c4..f1525ad06cb 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -4,8 +4,9 @@
4 4
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/posix-timers.h> 6#include <linux/posix-timers.h>
7#include <asm/uaccess.h>
8#include <linux/errno.h> 7#include <linux/errno.h>
8#include <linux/math64.h>
9#include <asm/uaccess.h>
9 10
10static int check_clock(const clockid_t which_clock) 11static int check_clock(const clockid_t which_clock)
11{ 12{
@@ -47,12 +48,10 @@ static void sample_to_timespec(const clockid_t which_clock,
47 union cpu_time_count cpu, 48 union cpu_time_count cpu,
48 struct timespec *tp) 49 struct timespec *tp)
49{ 50{
50 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 51 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
51 tp->tv_sec = div_long_long_rem(cpu.sched, 52 *tp = ns_to_timespec(cpu.sched);
52 NSEC_PER_SEC, &tp->tv_nsec); 53 else
53 } else {
54 cputime_to_timespec(cpu.cpu, tp); 54 cputime_to_timespec(cpu.cpu, tp);
55 }
56} 55}
57 56
58static inline int cpu_time_before(const clockid_t which_clock, 57static inline int cpu_time_before(const clockid_t which_clock,
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index dcc199c43a1..6c19e94fd0a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -534,7 +534,6 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
534#define arch_ptrace_attach(child) do { } while (0) 534#define arch_ptrace_attach(child) do { } while (0)
535#endif 535#endif
536 536
537#ifndef __ARCH_SYS_PTRACE
538asmlinkage long sys_ptrace(long request, long pid, long addr, long data) 537asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
539{ 538{
540 struct task_struct *child; 539 struct task_struct *child;
@@ -582,7 +581,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
582 unlock_kernel(); 581 unlock_kernel();
583 return ret; 582 return ret;
584} 583}
585#endif /* __ARCH_SYS_PTRACE */
586 584
587int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) 585int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data)
588{ 586{
diff --git a/kernel/relay.c b/kernel/relay.c
index 7de644cdec4..bc24dcdc570 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in,
1191 ret = 0; 1191 ret = 0;
1192 spliced = 0; 1192 spliced = 0;
1193 1193
1194 while (len && !spliced) { 1194 while (len) {
1195 ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); 1195 ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
1196 if (ret < 0) 1196 if (ret < 0)
1197 break; 1197 break;
diff --git a/kernel/sched.c b/kernel/sched.c
index e2f7f5acc80..8841a915545 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,16 +75,6 @@
75#include <asm/irq_regs.h> 75#include <asm/irq_regs.h>
76 76
77/* 77/*
78 * Scheduler clock - returns current time in nanosec units.
79 * This is default implementation.
80 * Architectures and sub-architectures can override this.
81 */
82unsigned long long __attribute__((weak)) sched_clock(void)
83{
84 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
85}
86
87/*
88 * Convert user-nice values [ -20 ... 0 ... 19 ] 78 * Convert user-nice values [ -20 ... 0 ... 19 ]
89 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 79 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
90 * and back. 80 * and back.
@@ -242,6 +232,12 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
242} 232}
243#endif 233#endif
244 234
235/*
236 * sched_domains_mutex serializes calls to arch_init_sched_domains,
237 * detach_destroy_domains and partition_sched_domains.
238 */
239static DEFINE_MUTEX(sched_domains_mutex);
240
245#ifdef CONFIG_GROUP_SCHED 241#ifdef CONFIG_GROUP_SCHED
246 242
247#include <linux/cgroup.h> 243#include <linux/cgroup.h>
@@ -308,9 +304,6 @@ static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
308 */ 304 */
309static DEFINE_SPINLOCK(task_group_lock); 305static DEFINE_SPINLOCK(task_group_lock);
310 306
311/* doms_cur_mutex serializes access to doms_cur[] array */
312static DEFINE_MUTEX(doms_cur_mutex);
313
314#ifdef CONFIG_FAIR_GROUP_SCHED 307#ifdef CONFIG_FAIR_GROUP_SCHED
315#ifdef CONFIG_USER_SCHED 308#ifdef CONFIG_USER_SCHED
316# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) 309# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
@@ -318,7 +311,13 @@ static DEFINE_MUTEX(doms_cur_mutex);
318# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 311# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
319#endif 312#endif
320 313
314/*
315 * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems.
316 * (The default weight is 1024 - so there's no practical
317 * limitation from this.)
318 */
321#define MIN_SHARES 2 319#define MIN_SHARES 2
320#define MAX_SHARES (ULONG_MAX - 1)
322 321
323static int init_task_group_load = INIT_TASK_GROUP_LOAD; 322static int init_task_group_load = INIT_TASK_GROUP_LOAD;
324#endif 323#endif
@@ -358,21 +357,9 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
358#endif 357#endif
359} 358}
360 359
361static inline void lock_doms_cur(void)
362{
363 mutex_lock(&doms_cur_mutex);
364}
365
366static inline void unlock_doms_cur(void)
367{
368 mutex_unlock(&doms_cur_mutex);
369}
370
371#else 360#else
372 361
373static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } 362static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
374static inline void lock_doms_cur(void) { }
375static inline void unlock_doms_cur(void) { }
376 363
377#endif /* CONFIG_GROUP_SCHED */ 364#endif /* CONFIG_GROUP_SCHED */
378 365
@@ -560,13 +547,7 @@ struct rq {
560 unsigned long next_balance; 547 unsigned long next_balance;
561 struct mm_struct *prev_mm; 548 struct mm_struct *prev_mm;
562 549
563 u64 clock, prev_clock_raw; 550 u64 clock;
564 s64 clock_max_delta;
565
566 unsigned int clock_warps, clock_overflows, clock_underflows;
567 u64 idle_clock;
568 unsigned int clock_deep_idle_events;
569 u64 tick_timestamp;
570 551
571 atomic_t nr_iowait; 552 atomic_t nr_iowait;
572 553
@@ -631,82 +612,6 @@ static inline int cpu_of(struct rq *rq)
631#endif 612#endif
632} 613}
633 614
634#ifdef CONFIG_NO_HZ
635static inline bool nohz_on(int cpu)
636{
637 return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
638}
639
640static inline u64 max_skipped_ticks(struct rq *rq)
641{
642 return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
643}
644
645static inline void update_last_tick_seen(struct rq *rq)
646{
647 rq->last_tick_seen = jiffies;
648}
649#else
650static inline u64 max_skipped_ticks(struct rq *rq)
651{
652 return 1;
653}
654
655static inline void update_last_tick_seen(struct rq *rq)
656{
657}
658#endif
659
660/*
661 * Update the per-runqueue clock, as finegrained as the platform can give
662 * us, but without assuming monotonicity, etc.:
663 */
664static void __update_rq_clock(struct rq *rq)
665{
666 u64 prev_raw = rq->prev_clock_raw;
667 u64 now = sched_clock();
668 s64 delta = now - prev_raw;
669 u64 clock = rq->clock;
670
671#ifdef CONFIG_SCHED_DEBUG
672 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
673#endif
674 /*
675 * Protect against sched_clock() occasionally going backwards:
676 */
677 if (unlikely(delta < 0)) {
678 clock++;
679 rq->clock_warps++;
680 } else {
681 /*
682 * Catch too large forward jumps too:
683 */
684 u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
685 u64 max_time = rq->tick_timestamp + max_jump;
686
687 if (unlikely(clock + delta > max_time)) {
688 if (clock < max_time)
689 clock = max_time;
690 else
691 clock++;
692 rq->clock_overflows++;
693 } else {
694 if (unlikely(delta > rq->clock_max_delta))
695 rq->clock_max_delta = delta;
696 clock += delta;
697 }
698 }
699
700 rq->prev_clock_raw = now;
701 rq->clock = clock;
702}
703
704static void update_rq_clock(struct rq *rq)
705{
706 if (likely(smp_processor_id() == cpu_of(rq)))
707 __update_rq_clock(rq);
708}
709
710/* 615/*
711 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 616 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
712 * See detach_destroy_domains: synchronize_sched for details. 617 * See detach_destroy_domains: synchronize_sched for details.
@@ -722,6 +627,11 @@ static void update_rq_clock(struct rq *rq)
722#define task_rq(p) cpu_rq(task_cpu(p)) 627#define task_rq(p) cpu_rq(task_cpu(p))
723#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 628#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
724 629
630static inline void update_rq_clock(struct rq *rq)
631{
632 rq->clock = sched_clock_cpu(cpu_of(rq));
633}
634
725/* 635/*
726 * Tunables that become constants when CONFIG_SCHED_DEBUG is off: 636 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
727 */ 637 */
@@ -757,14 +667,14 @@ const_debug unsigned int sysctl_sched_features =
757#define SCHED_FEAT(name, enabled) \ 667#define SCHED_FEAT(name, enabled) \
758 #name , 668 #name ,
759 669
760__read_mostly char *sched_feat_names[] = { 670static __read_mostly char *sched_feat_names[] = {
761#include "sched_features.h" 671#include "sched_features.h"
762 NULL 672 NULL
763}; 673};
764 674
765#undef SCHED_FEAT 675#undef SCHED_FEAT
766 676
767int sched_feat_open(struct inode *inode, struct file *filp) 677static int sched_feat_open(struct inode *inode, struct file *filp)
768{ 678{
769 filp->private_data = inode->i_private; 679 filp->private_data = inode->i_private;
770 return 0; 680 return 0;
@@ -899,7 +809,7 @@ static inline u64 global_rt_runtime(void)
899 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 809 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
900} 810}
901 811
902static const unsigned long long time_sync_thresh = 100000; 812unsigned long long time_sync_thresh = 100000;
903 813
904static DEFINE_PER_CPU(unsigned long long, time_offset); 814static DEFINE_PER_CPU(unsigned long long, time_offset);
905static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); 815static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
@@ -913,11 +823,14 @@ static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
913static DEFINE_SPINLOCK(time_sync_lock); 823static DEFINE_SPINLOCK(time_sync_lock);
914static unsigned long long prev_global_time; 824static unsigned long long prev_global_time;
915 825
916static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) 826static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
917{ 827{
918 unsigned long flags; 828 /*
919 829 * We want this inlined, to not get tracer function calls
920 spin_lock_irqsave(&time_sync_lock, flags); 830 * in this critical section:
831 */
832 spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_);
833 __raw_spin_lock(&time_sync_lock.raw_lock);
921 834
922 if (time < prev_global_time) { 835 if (time < prev_global_time) {
923 per_cpu(time_offset, cpu) += prev_global_time - time; 836 per_cpu(time_offset, cpu) += prev_global_time - time;
@@ -926,7 +839,8 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
926 prev_global_time = time; 839 prev_global_time = time;
927 } 840 }
928 841
929 spin_unlock_irqrestore(&time_sync_lock, flags); 842 __raw_spin_unlock(&time_sync_lock.raw_lock);
843 spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_);
930 844
931 return time; 845 return time;
932} 846}
@@ -934,8 +848,6 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
934static unsigned long long __cpu_clock(int cpu) 848static unsigned long long __cpu_clock(int cpu)
935{ 849{
936 unsigned long long now; 850 unsigned long long now;
937 unsigned long flags;
938 struct rq *rq;
939 851
940 /* 852 /*
941 * Only call sched_clock() if the scheduler has already been 853 * Only call sched_clock() if the scheduler has already been
@@ -944,11 +856,7 @@ static unsigned long long __cpu_clock(int cpu)
944 if (unlikely(!scheduler_running)) 856 if (unlikely(!scheduler_running))
945 return 0; 857 return 0;
946 858
947 local_irq_save(flags); 859 now = sched_clock_cpu(cpu);
948 rq = cpu_rq(cpu);
949 update_rq_clock(rq);
950 now = rq->clock;
951 local_irq_restore(flags);
952 860
953 return now; 861 return now;
954} 862}
@@ -960,13 +868,18 @@ static unsigned long long __cpu_clock(int cpu)
960unsigned long long cpu_clock(int cpu) 868unsigned long long cpu_clock(int cpu)
961{ 869{
962 unsigned long long prev_cpu_time, time, delta_time; 870 unsigned long long prev_cpu_time, time, delta_time;
871 unsigned long flags;
963 872
873 local_irq_save(flags);
964 prev_cpu_time = per_cpu(prev_cpu_time, cpu); 874 prev_cpu_time = per_cpu(prev_cpu_time, cpu);
965 time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); 875 time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
966 delta_time = time-prev_cpu_time; 876 delta_time = time-prev_cpu_time;
967 877
968 if (unlikely(delta_time > time_sync_thresh)) 878 if (unlikely(delta_time > time_sync_thresh)) {
969 time = __sync_cpu_clock(time, cpu); 879 time = __sync_cpu_clock(time, cpu);
880 per_cpu(prev_cpu_time, cpu) = time;
881 }
882 local_irq_restore(flags);
970 883
971 return time; 884 return time;
972} 885}
@@ -1117,43 +1030,6 @@ static struct rq *this_rq_lock(void)
1117 return rq; 1030 return rq;
1118} 1031}
1119 1032
1120/*
1121 * We are going deep-idle (irqs are disabled):
1122 */
1123void sched_clock_idle_sleep_event(void)
1124{
1125 struct rq *rq = cpu_rq(smp_processor_id());
1126
1127 spin_lock(&rq->lock);
1128 __update_rq_clock(rq);
1129 spin_unlock(&rq->lock);
1130 rq->clock_deep_idle_events++;
1131}
1132EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
1133
1134/*
1135 * We just idled delta nanoseconds (called with irqs disabled):
1136 */
1137void sched_clock_idle_wakeup_event(u64 delta_ns)
1138{
1139 struct rq *rq = cpu_rq(smp_processor_id());
1140 u64 now = sched_clock();
1141
1142 rq->idle_clock += delta_ns;
1143 /*
1144 * Override the previous timestamp and ignore all
1145 * sched_clock() deltas that occured while we idled,
1146 * and use the PM-provided delta_ns to advance the
1147 * rq clock:
1148 */
1149 spin_lock(&rq->lock);
1150 rq->prev_clock_raw = now;
1151 rq->clock += delta_ns;
1152 spin_unlock(&rq->lock);
1153 touch_softlockup_watchdog();
1154}
1155EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
1156
1157static void __resched_task(struct task_struct *p, int tif_bit); 1033static void __resched_task(struct task_struct *p, int tif_bit);
1158 1034
1159static inline void resched_task(struct task_struct *p) 1035static inline void resched_task(struct task_struct *p)
@@ -1189,6 +1065,7 @@ static inline void resched_rq(struct rq *rq)
1189enum { 1065enum {
1190 HRTICK_SET, /* re-programm hrtick_timer */ 1066 HRTICK_SET, /* re-programm hrtick_timer */
1191 HRTICK_RESET, /* not a new slice */ 1067 HRTICK_RESET, /* not a new slice */
1068 HRTICK_BLOCK, /* stop hrtick operations */
1192}; 1069};
1193 1070
1194/* 1071/*
@@ -1200,6 +1077,8 @@ static inline int hrtick_enabled(struct rq *rq)
1200{ 1077{
1201 if (!sched_feat(HRTICK)) 1078 if (!sched_feat(HRTICK))
1202 return 0; 1079 return 0;
1080 if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
1081 return 0;
1203 return hrtimer_is_hres_active(&rq->hrtick_timer); 1082 return hrtimer_is_hres_active(&rq->hrtick_timer);
1204} 1083}
1205 1084
@@ -1275,14 +1154,70 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1275 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); 1154 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1276 1155
1277 spin_lock(&rq->lock); 1156 spin_lock(&rq->lock);
1278 __update_rq_clock(rq); 1157 update_rq_clock(rq);
1279 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1158 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1280 spin_unlock(&rq->lock); 1159 spin_unlock(&rq->lock);
1281 1160
1282 return HRTIMER_NORESTART; 1161 return HRTIMER_NORESTART;
1283} 1162}
1284 1163
1285static inline void init_rq_hrtick(struct rq *rq) 1164static void hotplug_hrtick_disable(int cpu)
1165{
1166 struct rq *rq = cpu_rq(cpu);
1167 unsigned long flags;
1168
1169 spin_lock_irqsave(&rq->lock, flags);
1170 rq->hrtick_flags = 0;
1171 __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1172 spin_unlock_irqrestore(&rq->lock, flags);
1173
1174 hrtick_clear(rq);
1175}
1176
1177static void hotplug_hrtick_enable(int cpu)
1178{
1179 struct rq *rq = cpu_rq(cpu);
1180 unsigned long flags;
1181
1182 spin_lock_irqsave(&rq->lock, flags);
1183 __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1184 spin_unlock_irqrestore(&rq->lock, flags);
1185}
1186
1187static int
1188hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1189{
1190 int cpu = (int)(long)hcpu;
1191
1192 switch (action) {
1193 case CPU_UP_CANCELED:
1194 case CPU_UP_CANCELED_FROZEN:
1195 case CPU_DOWN_PREPARE:
1196 case CPU_DOWN_PREPARE_FROZEN:
1197 case CPU_DEAD:
1198 case CPU_DEAD_FROZEN:
1199 hotplug_hrtick_disable(cpu);
1200 return NOTIFY_OK;
1201
1202 case CPU_UP_PREPARE:
1203 case CPU_UP_PREPARE_FROZEN:
1204 case CPU_DOWN_FAILED:
1205 case CPU_DOWN_FAILED_FROZEN:
1206 case CPU_ONLINE:
1207 case CPU_ONLINE_FROZEN:
1208 hotplug_hrtick_enable(cpu);
1209 return NOTIFY_OK;
1210 }
1211
1212 return NOTIFY_DONE;
1213}
1214
1215static void init_hrtick(void)
1216{
1217 hotcpu_notifier(hotplug_hrtick, 0);
1218}
1219
1220static void init_rq_hrtick(struct rq *rq)
1286{ 1221{
1287 rq->hrtick_flags = 0; 1222 rq->hrtick_flags = 0;
1288 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1223 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -1319,6 +1254,10 @@ static inline void init_rq_hrtick(struct rq *rq)
1319void hrtick_resched(void) 1254void hrtick_resched(void)
1320{ 1255{
1321} 1256}
1257
1258static inline void init_hrtick(void)
1259{
1260}
1322#endif 1261#endif
1323 1262
1324/* 1263/*
@@ -1438,8 +1377,8 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1438{ 1377{
1439 u64 tmp; 1378 u64 tmp;
1440 1379
1441 if (unlikely(!lw->inv_weight)) 1380 if (!lw->inv_weight)
1442 lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1); 1381 lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1);
1443 1382
1444 tmp = (u64)delta_exec * weight; 1383 tmp = (u64)delta_exec * weight;
1445 /* 1384 /*
@@ -1748,6 +1687,8 @@ __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd,
1748 1687
1749 if (shares < MIN_SHARES) 1688 if (shares < MIN_SHARES)
1750 shares = MIN_SHARES; 1689 shares = MIN_SHARES;
1690 else if (shares > MAX_SHARES)
1691 shares = MAX_SHARES;
1751 1692
1752 __set_se_shares(tg->se[tcpu], shares); 1693 __set_se_shares(tg->se[tcpu], shares);
1753} 1694}
@@ -4339,8 +4280,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4339 struct rq *rq = this_rq(); 4280 struct rq *rq = this_rq();
4340 cputime64_t tmp; 4281 cputime64_t tmp;
4341 4282
4342 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) 4283 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
4343 return account_guest_time(p, cputime); 4284 account_guest_time(p, cputime);
4285 return;
4286 }
4344 4287
4345 p->stime = cputime_add(p->stime, cputime); 4288 p->stime = cputime_add(p->stime, cputime);
4346 4289
@@ -4404,19 +4347,11 @@ void scheduler_tick(void)
4404 int cpu = smp_processor_id(); 4347 int cpu = smp_processor_id();
4405 struct rq *rq = cpu_rq(cpu); 4348 struct rq *rq = cpu_rq(cpu);
4406 struct task_struct *curr = rq->curr; 4349 struct task_struct *curr = rq->curr;
4407 u64 next_tick = rq->tick_timestamp + TICK_NSEC; 4350
4351 sched_clock_tick();
4408 4352
4409 spin_lock(&rq->lock); 4353 spin_lock(&rq->lock);
4410 __update_rq_clock(rq); 4354 update_rq_clock(rq);
4411 /*
4412 * Let rq->clock advance by at least TICK_NSEC:
4413 */
4414 if (unlikely(rq->clock < next_tick)) {
4415 rq->clock = next_tick;
4416 rq->clock_underflows++;
4417 }
4418 rq->tick_timestamp = rq->clock;
4419 update_last_tick_seen(rq);
4420 update_cpu_load(rq); 4355 update_cpu_load(rq);
4421 curr->sched_class->task_tick(rq, curr, 0); 4356 curr->sched_class->task_tick(rq, curr, 0);
4422 spin_unlock(&rq->lock); 4357 spin_unlock(&rq->lock);
@@ -4570,7 +4505,7 @@ need_resched_nonpreemptible:
4570 * Do the rq-clock update outside the rq lock: 4505 * Do the rq-clock update outside the rq lock:
4571 */ 4506 */
4572 local_irq_disable(); 4507 local_irq_disable();
4573 __update_rq_clock(rq); 4508 update_rq_clock(rq);
4574 spin_lock(&rq->lock); 4509 spin_lock(&rq->lock);
4575 clear_tsk_need_resched(prev); 4510 clear_tsk_need_resched(prev);
4576 4511
@@ -4595,9 +4530,9 @@ need_resched_nonpreemptible:
4595 prev->sched_class->put_prev_task(rq, prev); 4530 prev->sched_class->put_prev_task(rq, prev);
4596 next = pick_next_task(rq, prev); 4531 next = pick_next_task(rq, prev);
4597 4532
4598 sched_info_switch(prev, next);
4599
4600 if (likely(prev != next)) { 4533 if (likely(prev != next)) {
4534 sched_info_switch(prev, next);
4535
4601 rq->nr_switches++; 4536 rq->nr_switches++;
4602 rq->curr = next; 4537 rq->curr = next;
4603 ++*switch_count; 4538 ++*switch_count;
@@ -4632,8 +4567,6 @@ EXPORT_SYMBOL(schedule);
4632asmlinkage void __sched preempt_schedule(void) 4567asmlinkage void __sched preempt_schedule(void)
4633{ 4568{
4634 struct thread_info *ti = current_thread_info(); 4569 struct thread_info *ti = current_thread_info();
4635 struct task_struct *task = current;
4636 int saved_lock_depth;
4637 4570
4638 /* 4571 /*
4639 * If there is a non-zero preempt_count or interrupts are disabled, 4572 * If there is a non-zero preempt_count or interrupts are disabled,
@@ -4644,16 +4577,7 @@ asmlinkage void __sched preempt_schedule(void)
4644 4577
4645 do { 4578 do {
4646 add_preempt_count(PREEMPT_ACTIVE); 4579 add_preempt_count(PREEMPT_ACTIVE);
4647
4648 /*
4649 * We keep the big kernel semaphore locked, but we
4650 * clear ->lock_depth so that schedule() doesnt
4651 * auto-release the semaphore:
4652 */
4653 saved_lock_depth = task->lock_depth;
4654 task->lock_depth = -1;
4655 schedule(); 4580 schedule();
4656 task->lock_depth = saved_lock_depth;
4657 sub_preempt_count(PREEMPT_ACTIVE); 4581 sub_preempt_count(PREEMPT_ACTIVE);
4658 4582
4659 /* 4583 /*
@@ -4674,26 +4598,15 @@ EXPORT_SYMBOL(preempt_schedule);
4674asmlinkage void __sched preempt_schedule_irq(void) 4598asmlinkage void __sched preempt_schedule_irq(void)
4675{ 4599{
4676 struct thread_info *ti = current_thread_info(); 4600 struct thread_info *ti = current_thread_info();
4677 struct task_struct *task = current;
4678 int saved_lock_depth;
4679 4601
4680 /* Catch callers which need to be fixed */ 4602 /* Catch callers which need to be fixed */
4681 BUG_ON(ti->preempt_count || !irqs_disabled()); 4603 BUG_ON(ti->preempt_count || !irqs_disabled());
4682 4604
4683 do { 4605 do {
4684 add_preempt_count(PREEMPT_ACTIVE); 4606 add_preempt_count(PREEMPT_ACTIVE);
4685
4686 /*
4687 * We keep the big kernel semaphore locked, but we
4688 * clear ->lock_depth so that schedule() doesnt
4689 * auto-release the semaphore:
4690 */
4691 saved_lock_depth = task->lock_depth;
4692 task->lock_depth = -1;
4693 local_irq_enable(); 4607 local_irq_enable();
4694 schedule(); 4608 schedule();
4695 local_irq_disable(); 4609 local_irq_disable();
4696 task->lock_depth = saved_lock_depth;
4697 sub_preempt_count(PREEMPT_ACTIVE); 4610 sub_preempt_count(PREEMPT_ACTIVE);
4698 4611
4699 /* 4612 /*
@@ -5612,7 +5525,6 @@ static void __cond_resched(void)
5612 } while (need_resched()); 5525 } while (need_resched());
5613} 5526}
5614 5527
5615#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
5616int __sched _cond_resched(void) 5528int __sched _cond_resched(void)
5617{ 5529{
5618 if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && 5530 if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
@@ -5623,7 +5535,6 @@ int __sched _cond_resched(void)
5623 return 0; 5535 return 0;
5624} 5536}
5625EXPORT_SYMBOL(_cond_resched); 5537EXPORT_SYMBOL(_cond_resched);
5626#endif
5627 5538
5628/* 5539/*
5629 * cond_resched_lock() - if a reschedule is pending, drop the given lock, 5540 * cond_resched_lock() - if a reschedule is pending, drop the given lock,
@@ -5918,8 +5829,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5918 spin_unlock_irqrestore(&rq->lock, flags); 5829 spin_unlock_irqrestore(&rq->lock, flags);
5919 5830
5920 /* Set the preempt count _outside_ the spinlocks! */ 5831 /* Set the preempt count _outside_ the spinlocks! */
5832#if defined(CONFIG_PREEMPT)
5833 task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
5834#else
5921 task_thread_info(idle)->preempt_count = 0; 5835 task_thread_info(idle)->preempt_count = 0;
5922 5836#endif
5923 /* 5837 /*
5924 * The idle tasks have their own, simple scheduling class: 5838 * The idle tasks have their own, simple scheduling class:
5925 */ 5839 */
@@ -7755,7 +7669,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7755{ 7669{
7756 int i, j; 7670 int i, j;
7757 7671
7758 lock_doms_cur(); 7672 mutex_lock(&sched_domains_mutex);
7759 7673
7760 /* always unregister in case we don't destroy any domains */ 7674 /* always unregister in case we don't destroy any domains */
7761 unregister_sched_domain_sysctl(); 7675 unregister_sched_domain_sysctl();
@@ -7804,7 +7718,7 @@ match2:
7804 7718
7805 register_sched_domain_sysctl(); 7719 register_sched_domain_sysctl();
7806 7720
7807 unlock_doms_cur(); 7721 mutex_unlock(&sched_domains_mutex);
7808} 7722}
7809 7723
7810#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7724#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -7813,8 +7727,10 @@ int arch_reinit_sched_domains(void)
7813 int err; 7727 int err;
7814 7728
7815 get_online_cpus(); 7729 get_online_cpus();
7730 mutex_lock(&sched_domains_mutex);
7816 detach_destroy_domains(&cpu_online_map); 7731 detach_destroy_domains(&cpu_online_map);
7817 err = arch_init_sched_domains(&cpu_online_map); 7732 err = arch_init_sched_domains(&cpu_online_map);
7733 mutex_unlock(&sched_domains_mutex);
7818 put_online_cpus(); 7734 put_online_cpus();
7819 7735
7820 return err; 7736 return err;
@@ -7932,13 +7848,16 @@ void __init sched_init_smp(void)
7932 BUG_ON(sched_group_nodes_bycpu == NULL); 7848 BUG_ON(sched_group_nodes_bycpu == NULL);
7933#endif 7849#endif
7934 get_online_cpus(); 7850 get_online_cpus();
7851 mutex_lock(&sched_domains_mutex);
7935 arch_init_sched_domains(&cpu_online_map); 7852 arch_init_sched_domains(&cpu_online_map);
7936 cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); 7853 cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
7937 if (cpus_empty(non_isolated_cpus)) 7854 if (cpus_empty(non_isolated_cpus))
7938 cpu_set(smp_processor_id(), non_isolated_cpus); 7855 cpu_set(smp_processor_id(), non_isolated_cpus);
7856 mutex_unlock(&sched_domains_mutex);
7939 put_online_cpus(); 7857 put_online_cpus();
7940 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7858 /* XXX: Theoretical race here - CPU may be hotplugged now */
7941 hotcpu_notifier(update_sched_domains, 0); 7859 hotcpu_notifier(update_sched_domains, 0);
7860 init_hrtick();
7942 7861
7943 /* Move init over to a non-isolated CPU */ 7862 /* Move init over to a non-isolated CPU */
7944 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) 7863 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
@@ -8025,7 +7944,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
8025 7944
8026 se->my_q = cfs_rq; 7945 se->my_q = cfs_rq;
8027 se->load.weight = tg->shares; 7946 se->load.weight = tg->shares;
8028 se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); 7947 se->load.inv_weight = 0;
8029 se->parent = parent; 7948 se->parent = parent;
8030} 7949}
8031#endif 7950#endif
@@ -8149,8 +8068,6 @@ void __init sched_init(void)
8149 spin_lock_init(&rq->lock); 8068 spin_lock_init(&rq->lock);
8150 lockdep_set_class(&rq->lock, &rq->rq_lock_key); 8069 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
8151 rq->nr_running = 0; 8070 rq->nr_running = 0;
8152 rq->clock = 1;
8153 update_last_tick_seen(rq);
8154 init_cfs_rq(&rq->cfs, rq); 8071 init_cfs_rq(&rq->cfs, rq);
8155 init_rt_rq(&rq->rt, rq); 8072 init_rt_rq(&rq->rt, rq);
8156#ifdef CONFIG_FAIR_GROUP_SCHED 8073#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8294,6 +8211,7 @@ EXPORT_SYMBOL(__might_sleep);
8294static void normalize_task(struct rq *rq, struct task_struct *p) 8211static void normalize_task(struct rq *rq, struct task_struct *p)
8295{ 8212{
8296 int on_rq; 8213 int on_rq;
8214
8297 update_rq_clock(rq); 8215 update_rq_clock(rq);
8298 on_rq = p->se.on_rq; 8216 on_rq = p->se.on_rq;
8299 if (on_rq) 8217 if (on_rq)
@@ -8325,7 +8243,6 @@ void normalize_rt_tasks(void)
8325 p->se.sleep_start = 0; 8243 p->se.sleep_start = 0;
8326 p->se.block_start = 0; 8244 p->se.block_start = 0;
8327#endif 8245#endif
8328 task_rq(p)->clock = 0;
8329 8246
8330 if (!rt_task(p)) { 8247 if (!rt_task(p)) {
8331 /* 8248 /*
@@ -8692,7 +8609,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
8692 dequeue_entity(cfs_rq, se, 0); 8609 dequeue_entity(cfs_rq, se, 0);
8693 8610
8694 se->load.weight = shares; 8611 se->load.weight = shares;
8695 se->load.inv_weight = div64_64((1ULL<<32), shares); 8612 se->load.inv_weight = 0;
8696 8613
8697 if (on_rq) 8614 if (on_rq)
8698 enqueue_entity(cfs_rq, se, 0); 8615 enqueue_entity(cfs_rq, se, 0);
@@ -8722,13 +8639,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8722 if (!tg->se[0]) 8639 if (!tg->se[0])
8723 return -EINVAL; 8640 return -EINVAL;
8724 8641
8725 /*
8726 * A weight of 0 or 1 can cause arithmetics problems.
8727 * (The default weight is 1024 - so there's no practical
8728 * limitation from this.)
8729 */
8730 if (shares < MIN_SHARES) 8642 if (shares < MIN_SHARES)
8731 shares = MIN_SHARES; 8643 shares = MIN_SHARES;
8644 else if (shares > MAX_SHARES)
8645 shares = MAX_SHARES;
8732 8646
8733 mutex_lock(&shares_mutex); 8647 mutex_lock(&shares_mutex);
8734 if (tg->shares == shares) 8648 if (tg->shares == shares)
@@ -8753,7 +8667,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8753 * force a rebalance 8667 * force a rebalance
8754 */ 8668 */
8755 cfs_rq_set_shares(tg->cfs_rq[i], 0); 8669 cfs_rq_set_shares(tg->cfs_rq[i], 0);
8756 set_se_shares(tg->se[i], shares/nr_cpu_ids); 8670 set_se_shares(tg->se[i], shares);
8757 } 8671 }
8758 8672
8759 /* 8673 /*
@@ -8787,7 +8701,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
8787 if (runtime == RUNTIME_INF) 8701 if (runtime == RUNTIME_INF)
8788 return 1ULL << 16; 8702 return 1ULL << 16;
8789 8703
8790 return div64_64(runtime << 16, period); 8704 return div64_u64(runtime << 16, period);
8791} 8705}
8792 8706
8793#ifdef CONFIG_CGROUP_SCHED 8707#ifdef CONFIG_CGROUP_SCHED
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
new file mode 100644
index 00000000000..9c597e37f7d
--- /dev/null
+++ b/kernel/sched_clock.c
@@ -0,0 +1,236 @@
1/*
2 * sched_clock for unstable cpu clocks
3 *
4 * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 * Based on code by:
7 * Ingo Molnar <mingo@redhat.com>
8 * Guillaume Chazarain <guichaz@gmail.com>
9 *
10 * Create a semi stable clock from a mixture of other events, including:
11 * - gtod
12 * - jiffies
13 * - sched_clock()
14 * - explicit idle events
15 *
16 * We use gtod as base and the unstable clock deltas. The deltas are filtered,
17 * making it monotonic and keeping it within an expected window. This window
18 * is set up using jiffies.
19 *
20 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
21 * that is otherwise invisible (TSC gets stopped).
22 *
23 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
24 * consistent between cpus (never more than 1 jiffies difference).
25 */
26#include <linux/sched.h>
27#include <linux/percpu.h>
28#include <linux/spinlock.h>
29#include <linux/ktime.h>
30#include <linux/module.h>
31
32
33#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
34
35struct sched_clock_data {
36 /*
37 * Raw spinlock - this is a special case: this might be called
38 * from within instrumentation code so we dont want to do any
39 * instrumentation ourselves.
40 */
41 raw_spinlock_t lock;
42
43 unsigned long prev_jiffies;
44 u64 prev_raw;
45 u64 tick_raw;
46 u64 tick_gtod;
47 u64 clock;
48};
49
50static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
51
52static inline struct sched_clock_data *this_scd(void)
53{
54 return &__get_cpu_var(sched_clock_data);
55}
56
57static inline struct sched_clock_data *cpu_sdc(int cpu)
58{
59 return &per_cpu(sched_clock_data, cpu);
60}
61
62void sched_clock_init(void)
63{
64 u64 ktime_now = ktime_to_ns(ktime_get());
65 u64 now = 0;
66 int cpu;
67
68 for_each_possible_cpu(cpu) {
69 struct sched_clock_data *scd = cpu_sdc(cpu);
70
71 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
72 scd->prev_jiffies = jiffies;
73 scd->prev_raw = now;
74 scd->tick_raw = now;
75 scd->tick_gtod = ktime_now;
76 scd->clock = ktime_now;
77 }
78}
79
80/*
81 * update the percpu scd from the raw @now value
82 *
83 * - filter out backward motion
84 * - use jiffies to generate a min,max window to clip the raw values
85 */
86static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
87{
88 unsigned long now_jiffies = jiffies;
89 long delta_jiffies = now_jiffies - scd->prev_jiffies;
90 u64 clock = scd->clock;
91 u64 min_clock, max_clock;
92 s64 delta = now - scd->prev_raw;
93
94 WARN_ON_ONCE(!irqs_disabled());
95 min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
96
97 if (unlikely(delta < 0)) {
98 clock++;
99 goto out;
100 }
101
102 max_clock = min_clock + TICK_NSEC;
103
104 if (unlikely(clock + delta > max_clock)) {
105 if (clock < max_clock)
106 clock = max_clock;
107 else
108 clock++;
109 } else {
110 clock += delta;
111 }
112
113 out:
114 if (unlikely(clock < min_clock))
115 clock = min_clock;
116
117 scd->prev_raw = now;
118 scd->prev_jiffies = now_jiffies;
119 scd->clock = clock;
120}
121
122static void lock_double_clock(struct sched_clock_data *data1,
123 struct sched_clock_data *data2)
124{
125 if (data1 < data2) {
126 __raw_spin_lock(&data1->lock);
127 __raw_spin_lock(&data2->lock);
128 } else {
129 __raw_spin_lock(&data2->lock);
130 __raw_spin_lock(&data1->lock);
131 }
132}
133
134u64 sched_clock_cpu(int cpu)
135{
136 struct sched_clock_data *scd = cpu_sdc(cpu);
137 u64 now, clock;
138
139 WARN_ON_ONCE(!irqs_disabled());
140 now = sched_clock();
141
142 if (cpu != raw_smp_processor_id()) {
143 /*
144 * in order to update a remote cpu's clock based on our
145 * unstable raw time rebase it against:
146 * tick_raw (offset between raw counters)
147 * tick_gotd (tick offset between cpus)
148 */
149 struct sched_clock_data *my_scd = this_scd();
150
151 lock_double_clock(scd, my_scd);
152
153 now -= my_scd->tick_raw;
154 now += scd->tick_raw;
155
156 now -= my_scd->tick_gtod;
157 now += scd->tick_gtod;
158
159 __raw_spin_unlock(&my_scd->lock);
160 } else {
161 __raw_spin_lock(&scd->lock);
162 }
163
164 __update_sched_clock(scd, now);
165 clock = scd->clock;
166
167 __raw_spin_unlock(&scd->lock);
168
169 return clock;
170}
171
172void sched_clock_tick(void)
173{
174 struct sched_clock_data *scd = this_scd();
175 u64 now, now_gtod;
176
177 WARN_ON_ONCE(!irqs_disabled());
178
179 now = sched_clock();
180 now_gtod = ktime_to_ns(ktime_get());
181
182 __raw_spin_lock(&scd->lock);
183 __update_sched_clock(scd, now);
184 /*
185 * update tick_gtod after __update_sched_clock() because that will
186 * already observe 1 new jiffy; adding a new tick_gtod to that would
187 * increase the clock 2 jiffies.
188 */
189 scd->tick_raw = now;
190 scd->tick_gtod = now_gtod;
191 __raw_spin_unlock(&scd->lock);
192}
193
194/*
195 * We are going deep-idle (irqs are disabled):
196 */
197void sched_clock_idle_sleep_event(void)
198{
199 sched_clock_cpu(smp_processor_id());
200}
201EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
202
203/*
204 * We just idled delta nanoseconds (called with irqs disabled):
205 */
206void sched_clock_idle_wakeup_event(u64 delta_ns)
207{
208 struct sched_clock_data *scd = this_scd();
209 u64 now = sched_clock();
210
211 /*
212 * Override the previous timestamp and ignore all
213 * sched_clock() deltas that occured while we idled,
214 * and use the PM-provided delta_ns to advance the
215 * rq clock:
216 */
217 __raw_spin_lock(&scd->lock);
218 scd->prev_raw = now;
219 scd->clock += delta_ns;
220 __raw_spin_unlock(&scd->lock);
221
222 touch_softlockup_watchdog();
223}
224EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
225
226#endif
227
228/*
229 * Scheduler clock - returns current time in nanosec units.
230 * This is default implementation.
231 * Architectures and sub-architectures can override this.
232 */
233unsigned long long __attribute__((weak)) sched_clock(void)
234{
235 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
236}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 8a9498e7c83..5f06118fbc3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m, int cpu)
204 PN(next_balance); 204 PN(next_balance);
205 P(curr->pid); 205 P(curr->pid);
206 PN(clock); 206 PN(clock);
207 PN(idle_clock);
208 PN(prev_clock_raw);
209 P(clock_warps);
210 P(clock_overflows);
211 P(clock_underflows);
212 P(clock_deep_idle_events);
213 PN(clock_max_delta);
214 P(cpu_load[0]); 207 P(cpu_load[0]);
215 P(cpu_load[1]); 208 P(cpu_load[1]);
216 P(cpu_load[2]); 209 P(cpu_load[2]);
@@ -357,8 +350,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
357 350
358 avg_per_cpu = p->se.sum_exec_runtime; 351 avg_per_cpu = p->se.sum_exec_runtime;
359 if (p->se.nr_migrations) { 352 if (p->se.nr_migrations) {
360 avg_per_cpu = div64_64(avg_per_cpu, 353 avg_per_cpu = div64_u64(avg_per_cpu,
361 p->se.nr_migrations); 354 p->se.nr_migrations);
362 } else { 355 } else {
363 avg_per_cpu = -1LL; 356 avg_per_cpu = -1LL;
364 } 357 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 89fa32b4edf..e24ecd39c4b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -662,10 +662,15 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
662 if (!initial) { 662 if (!initial) {
663 /* sleeps upto a single latency don't count. */ 663 /* sleeps upto a single latency don't count. */
664 if (sched_feat(NEW_FAIR_SLEEPERS)) { 664 if (sched_feat(NEW_FAIR_SLEEPERS)) {
665 unsigned long thresh = sysctl_sched_latency;
666
667 /*
668 * convert the sleeper threshold into virtual time
669 */
665 if (sched_feat(NORMALIZED_SLEEPER)) 670 if (sched_feat(NORMALIZED_SLEEPER))
666 vruntime -= calc_delta_weight(sysctl_sched_latency, se); 671 thresh = calc_delta_fair(thresh, se);
667 else 672
668 vruntime -= sysctl_sched_latency; 673 vruntime -= thresh;
669 } 674 }
670 675
671 /* ensure we never gain time by being placed backwards. */ 676 /* ensure we never gain time by being placed backwards. */
@@ -682,6 +687,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
682 * Update run-time statistics of the 'current'. 687 * Update run-time statistics of the 'current'.
683 */ 688 */
684 update_curr(cfs_rq); 689 update_curr(cfs_rq);
690 account_entity_enqueue(cfs_rq, se);
685 691
686 if (wakeup) { 692 if (wakeup) {
687 place_entity(cfs_rq, se, 0); 693 place_entity(cfs_rq, se, 0);
@@ -692,7 +698,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
692 check_spread(cfs_rq, se); 698 check_spread(cfs_rq, se);
693 if (se != cfs_rq->curr) 699 if (se != cfs_rq->curr)
694 __enqueue_entity(cfs_rq, se); 700 __enqueue_entity(cfs_rq, se);
695 account_entity_enqueue(cfs_rq, se);
696} 701}
697 702
698static void update_avg(u64 *avg, u64 sample) 703static void update_avg(u64 *avg, u64 sample)
@@ -841,8 +846,10 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
841 * queued ticks are scheduled to match the slice, so don't bother 846 * queued ticks are scheduled to match the slice, so don't bother
842 * validating it and just reschedule. 847 * validating it and just reschedule.
843 */ 848 */
844 if (queued) 849 if (queued) {
845 return resched_task(rq_of(cfs_rq)->curr); 850 resched_task(rq_of(cfs_rq)->curr);
851 return;
852 }
846 /* 853 /*
847 * don't let the period tick interfere with the hrtick preemption 854 * don't let the period tick interfere with the hrtick preemption
848 */ 855 */
@@ -957,7 +964,7 @@ static void yield_task_fair(struct rq *rq)
957 return; 964 return;
958 965
959 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { 966 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
960 __update_rq_clock(rq); 967 update_rq_clock(rq);
961 /* 968 /*
962 * Update run-time statistics of the 'current'. 969 * Update run-time statistics of the 'current'.
963 */ 970 */
@@ -1007,7 +1014,7 @@ static int wake_idle(int cpu, struct task_struct *p)
1007 * sibling runqueue info. This will avoid the checks and cache miss 1014 * sibling runqueue info. This will avoid the checks and cache miss
1008 * penalities associated with that. 1015 * penalities associated with that.
1009 */ 1016 */
1010 if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1) 1017 if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
1011 return cpu; 1018 return cpu;
1012 1019
1013 for_each_domain(cpu, sd) { 1020 for_each_domain(cpu, sd) {
@@ -1611,30 +1618,6 @@ static const struct sched_class fair_sched_class = {
1611}; 1618};
1612 1619
1613#ifdef CONFIG_SCHED_DEBUG 1620#ifdef CONFIG_SCHED_DEBUG
1614static void
1615print_cfs_rq_tasks(struct seq_file *m, struct cfs_rq *cfs_rq, int depth)
1616{
1617 struct sched_entity *se;
1618
1619 if (!cfs_rq)
1620 return;
1621
1622 list_for_each_entry_rcu(se, &cfs_rq->tasks, group_node) {
1623 int i;
1624
1625 for (i = depth; i; i--)
1626 seq_puts(m, " ");
1627
1628 seq_printf(m, "%lu %s %lu\n",
1629 se->load.weight,
1630 entity_is_task(se) ? "T" : "G",
1631 calc_delta_weight(SCHED_LOAD_SCALE, se)
1632 );
1633 if (!entity_is_task(se))
1634 print_cfs_rq_tasks(m, group_cfs_rq(se), depth + 1);
1635 }
1636}
1637
1638static void print_cfs_stats(struct seq_file *m, int cpu) 1621static void print_cfs_stats(struct seq_file *m, int cpu)
1639{ 1622{
1640 struct cfs_rq *cfs_rq; 1623 struct cfs_rq *cfs_rq;
@@ -1642,9 +1625,6 @@ static void print_cfs_stats(struct seq_file *m, int cpu)
1642 rcu_read_lock(); 1625 rcu_read_lock();
1643 for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) 1626 for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
1644 print_cfs_rq(m, cpu, cfs_rq); 1627 print_cfs_rq(m, cpu, cfs_rq);
1645
1646 seq_printf(m, "\nWeight tree:\n");
1647 print_cfs_rq_tasks(m, &cpu_rq(cpu)->cfs, 1);
1648 rcu_read_unlock(); 1628 rcu_read_unlock();
1649} 1629}
1650#endif 1630#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 2bcafa37563..3a4f92dbbe6 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -99,7 +99,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
99/* 99/*
100 * Simple, special scheduling class for the per-CPU idle tasks: 100 * Simple, special scheduling class for the per-CPU idle tasks:
101 */ 101 */
102const struct sched_class idle_sched_class = { 102static const struct sched_class idle_sched_class = {
103 /* .next is NULL */ 103 /* .next is NULL */
104 /* no enqueue/yield_task for idle tasks */ 104 /* no enqueue/yield_task for idle tasks */
105 105
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c2730a5a4f0..060e87b0cb1 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1098,11 +1098,14 @@ static void post_schedule_rt(struct rq *rq)
1098 } 1098 }
1099} 1099}
1100 1100
1101 1101/*
1102 * If we are not running and we are not going to reschedule soon, we should
1103 * try to push tasks away now
1104 */
1102static void task_wake_up_rt(struct rq *rq, struct task_struct *p) 1105static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
1103{ 1106{
1104 if (!task_running(rq, p) && 1107 if (!task_running(rq, p) &&
1105 (p->prio >= rq->rt.highest_prio) && 1108 !test_tsk_need_resched(rq->curr) &&
1106 rq->rt.overloaded) 1109 rq->rt.overloaded)
1107 push_rt_tasks(rq); 1110 push_rt_tasks(rq);
1108} 1111}
@@ -1309,7 +1312,7 @@ static void set_curr_task_rt(struct rq *rq)
1309 p->se.exec_start = rq->clock; 1312 p->se.exec_start = rq->clock;
1310} 1313}
1311 1314
1312const struct sched_class rt_sched_class = { 1315static const struct sched_class rt_sched_class = {
1313 .next = &fair_sched_class, 1316 .next = &fair_sched_class,
1314 .enqueue_task = enqueue_task_rt, 1317 .enqueue_task = enqueue_task_rt,
1315 .dequeue_task = dequeue_task_rt, 1318 .dequeue_task = dequeue_task_rt,
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3c44956ee7e..36e06174004 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -589,16 +589,20 @@ static void takeover_tasklets(unsigned int cpu)
589 local_irq_disable(); 589 local_irq_disable();
590 590
591 /* Find end, append list for that CPU. */ 591 /* Find end, append list for that CPU. */
592 *__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).head; 592 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
593 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; 593 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
594 per_cpu(tasklet_vec, cpu).head = NULL; 594 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
595 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; 595 per_cpu(tasklet_vec, cpu).head = NULL;
596 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
597 }
596 raise_softirq_irqoff(TASKLET_SOFTIRQ); 598 raise_softirq_irqoff(TASKLET_SOFTIRQ);
597 599
598 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; 600 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
599 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; 601 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
600 per_cpu(tasklet_hi_vec, cpu).head = NULL; 602 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
601 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; 603 per_cpu(tasklet_hi_vec, cpu).head = NULL;
604 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
605 }
602 raise_softirq_irqoff(HI_SOFTIRQ); 606 raise_softirq_irqoff(HI_SOFTIRQ);
603 607
604 local_irq_enable(); 608 local_irq_enable();
diff --git a/kernel/time.c b/kernel/time.c
index 86729042e4c..6a08660b4fa 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -36,6 +36,7 @@
36#include <linux/security.h> 36#include <linux/security.h>
37#include <linux/fs.h> 37#include <linux/fs.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/math64.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/unistd.h> 42#include <asm/unistd.h>
@@ -245,7 +246,7 @@ unsigned int inline jiffies_to_msecs(const unsigned long j)
245 return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); 246 return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
246#else 247#else
247# if BITS_PER_LONG == 32 248# if BITS_PER_LONG == 32
248 return ((u64)HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32; 249 return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
249# else 250# else
250 return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN; 251 return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
251# endif 252# endif
@@ -261,7 +262,7 @@ unsigned int inline jiffies_to_usecs(const unsigned long j)
261 return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); 262 return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
262#else 263#else
263# if BITS_PER_LONG == 32 264# if BITS_PER_LONG == 32
264 return ((u64)HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; 265 return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
265# else 266# else
266 return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN; 267 return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
267# endif 268# endif
@@ -391,13 +392,17 @@ EXPORT_SYMBOL(set_normalized_timespec);
391struct timespec ns_to_timespec(const s64 nsec) 392struct timespec ns_to_timespec(const s64 nsec)
392{ 393{
393 struct timespec ts; 394 struct timespec ts;
395 s32 rem;
394 396
395 if (!nsec) 397 if (!nsec)
396 return (struct timespec) {0, 0}; 398 return (struct timespec) {0, 0};
397 399
398 ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec); 400 ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
399 if (unlikely(nsec < 0)) 401 if (unlikely(rem < 0)) {
400 set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec); 402 ts.tv_sec--;
403 rem += NSEC_PER_SEC;
404 }
405 ts.tv_nsec = rem;
401 406
402 return ts; 407 return ts;
403} 408}
@@ -471,7 +476,7 @@ unsigned long msecs_to_jiffies(const unsigned int m)
471 if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) 476 if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
472 return MAX_JIFFY_OFFSET; 477 return MAX_JIFFY_OFFSET;
473 478
474 return ((u64)MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) 479 return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
475 >> MSEC_TO_HZ_SHR32; 480 >> MSEC_TO_HZ_SHR32;
476#endif 481#endif
477} 482}
@@ -486,7 +491,7 @@ unsigned long usecs_to_jiffies(const unsigned int u)
486#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) 491#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
487 return u * (HZ / USEC_PER_SEC); 492 return u * (HZ / USEC_PER_SEC);
488#else 493#else
489 return ((u64)USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) 494 return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
490 >> USEC_TO_HZ_SHR32; 495 >> USEC_TO_HZ_SHR32;
491#endif 496#endif
492} 497}
@@ -527,8 +532,10 @@ jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
527 * Convert jiffies to nanoseconds and separate with 532 * Convert jiffies to nanoseconds and separate with
528 * one divide. 533 * one divide.
529 */ 534 */
530 u64 nsec = (u64)jiffies * TICK_NSEC; 535 u32 rem;
531 value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec); 536 value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
537 NSEC_PER_SEC, &rem);
538 value->tv_nsec = rem;
532} 539}
533EXPORT_SYMBOL(jiffies_to_timespec); 540EXPORT_SYMBOL(jiffies_to_timespec);
534 541
@@ -566,12 +573,11 @@ void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
566 * Convert jiffies to nanoseconds and separate with 573 * Convert jiffies to nanoseconds and separate with
567 * one divide. 574 * one divide.
568 */ 575 */
569 u64 nsec = (u64)jiffies * TICK_NSEC; 576 u32 rem;
570 long tv_usec;
571 577
572 value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec); 578 value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
573 tv_usec /= NSEC_PER_USEC; 579 NSEC_PER_SEC, &rem);
574 value->tv_usec = tv_usec; 580 value->tv_usec = rem / NSEC_PER_USEC;
575} 581}
576EXPORT_SYMBOL(jiffies_to_timeval); 582EXPORT_SYMBOL(jiffies_to_timeval);
577 583
@@ -587,9 +593,7 @@ clock_t jiffies_to_clock_t(long x)
587 return x / (HZ / USER_HZ); 593 return x / (HZ / USER_HZ);
588# endif 594# endif
589#else 595#else
590 u64 tmp = (u64)x * TICK_NSEC; 596 return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);
591 do_div(tmp, (NSEC_PER_SEC / USER_HZ));
592 return (long)tmp;
593#endif 597#endif
594} 598}
595EXPORT_SYMBOL(jiffies_to_clock_t); 599EXPORT_SYMBOL(jiffies_to_clock_t);
@@ -601,16 +605,12 @@ unsigned long clock_t_to_jiffies(unsigned long x)
601 return ~0UL; 605 return ~0UL;
602 return x * (HZ / USER_HZ); 606 return x * (HZ / USER_HZ);
603#else 607#else
604 u64 jif;
605
606 /* Don't worry about loss of precision here .. */ 608 /* Don't worry about loss of precision here .. */
607 if (x >= ~0UL / HZ * USER_HZ) 609 if (x >= ~0UL / HZ * USER_HZ)
608 return ~0UL; 610 return ~0UL;
609 611
610 /* .. but do try to contain it here */ 612 /* .. but do try to contain it here */
611 jif = x * (u64) HZ; 613 return div_u64((u64)x * HZ, USER_HZ);
612 do_div(jif, USER_HZ);
613 return jif;
614#endif 614#endif
615} 615}
616EXPORT_SYMBOL(clock_t_to_jiffies); 616EXPORT_SYMBOL(clock_t_to_jiffies);
@@ -619,10 +619,9 @@ u64 jiffies_64_to_clock_t(u64 x)
619{ 619{
620#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 620#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
621# if HZ < USER_HZ 621# if HZ < USER_HZ
622 x *= USER_HZ; 622 x = div_u64(x * USER_HZ, HZ);
623 do_div(x, HZ);
624# elif HZ > USER_HZ 623# elif HZ > USER_HZ
625 do_div(x, HZ / USER_HZ); 624 x = div_u64(x, HZ / USER_HZ);
626# else 625# else
627 /* Nothing to do */ 626 /* Nothing to do */
628# endif 627# endif
@@ -632,8 +631,7 @@ u64 jiffies_64_to_clock_t(u64 x)
632 * but even this doesn't overflow in hundreds of years 631 * but even this doesn't overflow in hundreds of years
633 * in 64 bits, so.. 632 * in 64 bits, so..
634 */ 633 */
635 x *= TICK_NSEC; 634 x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ));
636 do_div(x, (NSEC_PER_SEC / USER_HZ));
637#endif 635#endif
638 return x; 636 return x;
639} 637}
@@ -642,21 +640,17 @@ EXPORT_SYMBOL(jiffies_64_to_clock_t);
642u64 nsec_to_clock_t(u64 x) 640u64 nsec_to_clock_t(u64 x)
643{ 641{
644#if (NSEC_PER_SEC % USER_HZ) == 0 642#if (NSEC_PER_SEC % USER_HZ) == 0
645 do_div(x, (NSEC_PER_SEC / USER_HZ)); 643 return div_u64(x, NSEC_PER_SEC / USER_HZ);
646#elif (USER_HZ % 512) == 0 644#elif (USER_HZ % 512) == 0
647 x *= USER_HZ/512; 645 return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
648 do_div(x, (NSEC_PER_SEC / 512));
649#else 646#else
650 /* 647 /*
651 * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, 648 * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
652 * overflow after 64.99 years. 649 * overflow after 64.99 years.
653 * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... 650 * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
654 */ 651 */
655 x *= 9; 652 return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
656 do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) /
657 USER_HZ));
658#endif 653#endif
659 return x;
660} 654}
661 655
662#if (BITS_PER_LONG < 64) 656#if (BITS_PER_LONG < 64)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 73961f35fdc..dadde5361f3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -471,10 +471,10 @@ sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
471/* 471/*
472 * Sysfs setup bits: 472 * Sysfs setup bits:
473 */ 473 */
474static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources, 474static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
475 sysfs_override_clocksource); 475 sysfs_override_clocksource);
476 476
477static SYSDEV_ATTR(available_clocksource, 0600, 477static SYSDEV_ATTR(available_clocksource, 0444,
478 sysfs_show_available_clocksources, NULL); 478 sysfs_show_available_clocksources, NULL);
479 479
480static struct sysdev_class clocksource_sysclass = { 480static struct sysdev_class clocksource_sysclass = {
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5fd9b946977..5125ddd8196 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -15,7 +15,8 @@
15#include <linux/jiffies.h> 15#include <linux/jiffies.h>
16#include <linux/hrtimer.h> 16#include <linux/hrtimer.h>
17#include <linux/capability.h> 17#include <linux/capability.h>
18#include <asm/div64.h> 18#include <linux/math64.h>
19#include <linux/clocksource.h>
19#include <asm/timex.h> 20#include <asm/timex.h>
20 21
21/* 22/*
@@ -23,11 +24,14 @@
23 */ 24 */
24unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ 25unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */
25unsigned long tick_nsec; /* ACTHZ period (nsec) */ 26unsigned long tick_nsec; /* ACTHZ period (nsec) */
26static u64 tick_length, tick_length_base; 27u64 tick_length;
28static u64 tick_length_base;
29
30static struct hrtimer leap_timer;
27 31
28#define MAX_TICKADJ 500 /* microsecs */ 32#define MAX_TICKADJ 500 /* microsecs */
29#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ 33#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
30 TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) 34 NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
31 35
32/* 36/*
33 * phase-lock loop variables 37 * phase-lock loop variables
@@ -35,11 +39,12 @@ static u64 tick_length, tick_length_base;
35/* TIME_ERROR prevents overwriting the CMOS clock */ 39/* TIME_ERROR prevents overwriting the CMOS clock */
36static int time_state = TIME_OK; /* clock synchronization status */ 40static int time_state = TIME_OK; /* clock synchronization status */
37int time_status = STA_UNSYNC; /* clock status bits */ 41int time_status = STA_UNSYNC; /* clock status bits */
38static s64 time_offset; /* time adjustment (ns) */ 42static long time_tai; /* TAI offset (s) */
43static s64 time_offset; /* time adjustment (ns) */
39static long time_constant = 2; /* pll time constant */ 44static long time_constant = 2; /* pll time constant */
40long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ 45long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
41long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ 46long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
42long time_freq; /* frequency offset (scaled ppm)*/ 47static s64 time_freq; /* frequency offset (scaled ns/s)*/
43static long time_reftime; /* time at last adjustment (s) */ 48static long time_reftime; /* time at last adjustment (s) */
44long time_adjust; 49long time_adjust;
45static long ntp_tick_adj; 50static long ntp_tick_adj;
@@ -47,16 +52,56 @@ static long ntp_tick_adj;
47static void ntp_update_frequency(void) 52static void ntp_update_frequency(void)
48{ 53{
49 u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) 54 u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
50 << TICK_LENGTH_SHIFT; 55 << NTP_SCALE_SHIFT;
51 second_length += (s64)ntp_tick_adj << TICK_LENGTH_SHIFT; 56 second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT;
52 second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); 57 second_length += time_freq;
53 58
54 tick_length_base = second_length; 59 tick_length_base = second_length;
55 60
56 do_div(second_length, HZ); 61 tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
57 tick_nsec = second_length >> TICK_LENGTH_SHIFT; 62 tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ);
63}
64
65static void ntp_update_offset(long offset)
66{
67 long mtemp;
68 s64 freq_adj;
69
70 if (!(time_status & STA_PLL))
71 return;
58 72
59 do_div(tick_length_base, NTP_INTERVAL_FREQ); 73 if (!(time_status & STA_NANO))
74 offset *= NSEC_PER_USEC;
75
76 /*
77 * Scale the phase adjustment and
78 * clamp to the operating range.
79 */
80 offset = min(offset, MAXPHASE);
81 offset = max(offset, -MAXPHASE);
82
83 /*
84 * Select how the frequency is to be controlled
85 * and in which mode (PLL or FLL).
86 */
87 if (time_status & STA_FREQHOLD || time_reftime == 0)
88 time_reftime = xtime.tv_sec;
89 mtemp = xtime.tv_sec - time_reftime;
90 time_reftime = xtime.tv_sec;
91
92 freq_adj = (s64)offset * mtemp;
93 freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant);
94 time_status &= ~STA_MODE;
95 if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
96 freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL),
97 mtemp);
98 time_status |= STA_MODE;
99 }
100 freq_adj += time_freq;
101 freq_adj = min(freq_adj, MAXFREQ_SCALED);
102 time_freq = max(freq_adj, -MAXFREQ_SCALED);
103
104 time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
60} 105}
61 106
62/** 107/**
@@ -78,62 +123,70 @@ void ntp_clear(void)
78} 123}
79 124
80/* 125/*
81 * this routine handles the overflow of the microsecond field 126 * Leap second processing. If in leap-insert state at the end of the
82 * 127 * day, the system clock is set back one second; if in leap-delete
83 * The tricky bits of code to handle the accurate clock support 128 * state, the system clock is set ahead one second.
84 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
85 * They were originally developed for SUN and DEC kernels.
86 * All the kudos should go to Dave for this stuff.
87 */ 129 */
88void second_overflow(void) 130static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
89{ 131{
90 long time_adj; 132 enum hrtimer_restart res = HRTIMER_NORESTART;
91 133
92 /* Bump the maxerror field */ 134 write_seqlock_irq(&xtime_lock);
93 time_maxerror += MAXFREQ >> SHIFT_USEC;
94 if (time_maxerror > NTP_PHASE_LIMIT) {
95 time_maxerror = NTP_PHASE_LIMIT;
96 time_status |= STA_UNSYNC;
97 }
98 135
99 /*
100 * Leap second processing. If in leap-insert state at the end of the
101 * day, the system clock is set back one second; if in leap-delete
102 * state, the system clock is set ahead one second. The microtime()
103 * routine or external clock driver will insure that reported time is
104 * always monotonic. The ugly divides should be replaced.
105 */
106 switch (time_state) { 136 switch (time_state) {
107 case TIME_OK: 137 case TIME_OK:
108 if (time_status & STA_INS)
109 time_state = TIME_INS;
110 else if (time_status & STA_DEL)
111 time_state = TIME_DEL;
112 break; 138 break;
113 case TIME_INS: 139 case TIME_INS:
114 if (xtime.tv_sec % 86400 == 0) { 140 xtime.tv_sec--;
115 xtime.tv_sec--; 141 wall_to_monotonic.tv_sec++;
116 wall_to_monotonic.tv_sec++; 142 time_state = TIME_OOP;
117 time_state = TIME_OOP; 143 printk(KERN_NOTICE "Clock: "
118 printk(KERN_NOTICE "Clock: inserting leap second " 144 "inserting leap second 23:59:60 UTC\n");
119 "23:59:60 UTC\n"); 145 leap_timer.expires = ktime_add_ns(leap_timer.expires,
120 } 146 NSEC_PER_SEC);
147 res = HRTIMER_RESTART;
121 break; 148 break;
122 case TIME_DEL: 149 case TIME_DEL:
123 if ((xtime.tv_sec + 1) % 86400 == 0) { 150 xtime.tv_sec++;
124 xtime.tv_sec++; 151 time_tai--;
125 wall_to_monotonic.tv_sec--; 152 wall_to_monotonic.tv_sec--;
126 time_state = TIME_WAIT; 153 time_state = TIME_WAIT;
127 printk(KERN_NOTICE "Clock: deleting leap second " 154 printk(KERN_NOTICE "Clock: "
128 "23:59:59 UTC\n"); 155 "deleting leap second 23:59:59 UTC\n");
129 }
130 break; 156 break;
131 case TIME_OOP: 157 case TIME_OOP:
158 time_tai++;
132 time_state = TIME_WAIT; 159 time_state = TIME_WAIT;
133 break; 160 /* fall through */
134 case TIME_WAIT: 161 case TIME_WAIT:
135 if (!(time_status & (STA_INS | STA_DEL))) 162 if (!(time_status & (STA_INS | STA_DEL)))
136 time_state = TIME_OK; 163 time_state = TIME_OK;
164 break;
165 }
166 update_vsyscall(&xtime, clock);
167
168 write_sequnlock_irq(&xtime_lock);
169
170 return res;
171}
172
173/*
174 * this routine handles the overflow of the microsecond field
175 *
176 * The tricky bits of code to handle the accurate clock support
177 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
178 * They were originally developed for SUN and DEC kernels.
179 * All the kudos should go to Dave for this stuff.
180 */
181void second_overflow(void)
182{
183 s64 time_adj;
184
185 /* Bump the maxerror field */
186 time_maxerror += MAXFREQ / NSEC_PER_USEC;
187 if (time_maxerror > NTP_PHASE_LIMIT) {
188 time_maxerror = NTP_PHASE_LIMIT;
189 time_status |= STA_UNSYNC;
137 } 190 }
138 191
139 /* 192 /*
@@ -143,7 +196,7 @@ void second_overflow(void)
143 tick_length = tick_length_base; 196 tick_length = tick_length_base;
144 time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); 197 time_adj = shift_right(time_offset, SHIFT_PLL + time_constant);
145 time_offset -= time_adj; 198 time_offset -= time_adj;
146 tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE); 199 tick_length += time_adj;
147 200
148 if (unlikely(time_adjust)) { 201 if (unlikely(time_adjust)) {
149 if (time_adjust > MAX_TICKADJ) { 202 if (time_adjust > MAX_TICKADJ) {
@@ -154,25 +207,12 @@ void second_overflow(void)
154 tick_length -= MAX_TICKADJ_SCALED; 207 tick_length -= MAX_TICKADJ_SCALED;
155 } else { 208 } else {
156 tick_length += (s64)(time_adjust * NSEC_PER_USEC / 209 tick_length += (s64)(time_adjust * NSEC_PER_USEC /
157 NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; 210 NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT;
158 time_adjust = 0; 211 time_adjust = 0;
159 } 212 }
160 } 213 }
161} 214}
162 215
163/*
164 * Return how long ticks are at the moment, that is, how much time
165 * update_wall_time_one_tick will add to xtime next time we call it
166 * (assuming no calls to do_adjtimex in the meantime).
167 * The return value is in fixed-point nanoseconds shifted by the
168 * specified number of bits to the right of the binary point.
169 * This function has no side-effects.
170 */
171u64 current_tick_length(void)
172{
173 return tick_length;
174}
175
176#ifdef CONFIG_GENERIC_CMOS_UPDATE 216#ifdef CONFIG_GENERIC_CMOS_UPDATE
177 217
178/* Disable the cmos update - used by virtualization and embedded */ 218/* Disable the cmos update - used by virtualization and embedded */
@@ -236,8 +276,8 @@ static inline void notify_cmos_timer(void) { }
236 */ 276 */
237int do_adjtimex(struct timex *txc) 277int do_adjtimex(struct timex *txc)
238{ 278{
239 long mtemp, save_adjust, rem; 279 struct timespec ts;
240 s64 freq_adj, temp64; 280 long save_adjust, sec;
241 int result; 281 int result;
242 282
243 /* In order to modify anything, you gotta be super-user! */ 283 /* In order to modify anything, you gotta be super-user! */
@@ -247,147 +287,132 @@ int do_adjtimex(struct timex *txc)
247 /* Now we validate the data before disabling interrupts */ 287 /* Now we validate the data before disabling interrupts */
248 288
249 if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) { 289 if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) {
250 /* singleshot must not be used with any other mode bits */ 290 /* singleshot must not be used with any other mode bits */
251 if (txc->modes != ADJ_OFFSET_SINGLESHOT && 291 if (txc->modes & ~ADJ_OFFSET_SS_READ)
252 txc->modes != ADJ_OFFSET_SS_READ)
253 return -EINVAL; 292 return -EINVAL;
254 } 293 }
255 294
256 if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
257 /* adjustment Offset limited to +- .512 seconds */
258 if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
259 return -EINVAL;
260
261 /* if the quartz is off by more than 10% something is VERY wrong ! */ 295 /* if the quartz is off by more than 10% something is VERY wrong ! */
262 if (txc->modes & ADJ_TICK) 296 if (txc->modes & ADJ_TICK)
263 if (txc->tick < 900000/USER_HZ || 297 if (txc->tick < 900000/USER_HZ ||
264 txc->tick > 1100000/USER_HZ) 298 txc->tick > 1100000/USER_HZ)
265 return -EINVAL; 299 return -EINVAL;
266 300
301 if (time_state != TIME_OK && txc->modes & ADJ_STATUS)
302 hrtimer_cancel(&leap_timer);
303 getnstimeofday(&ts);
304
267 write_seqlock_irq(&xtime_lock); 305 write_seqlock_irq(&xtime_lock);
268 result = time_state; /* mostly `TIME_OK' */
269 306
270 /* Save for later - semantics of adjtime is to return old value */ 307 /* Save for later - semantics of adjtime is to return old value */
271 save_adjust = time_adjust; 308 save_adjust = time_adjust;
272 309
273#if 0 /* STA_CLOCKERR is never set yet */
274 time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */
275#endif
276 /* If there are input parameters, then process them */ 310 /* If there are input parameters, then process them */
277 if (txc->modes) 311 if (txc->modes) {
278 { 312 if (txc->modes & ADJ_STATUS) {
279 if (txc->modes & ADJ_STATUS) /* only set allowed bits */ 313 if ((time_status & STA_PLL) &&
280 time_status = (txc->status & ~STA_RONLY) | 314 !(txc->status & STA_PLL)) {
281 (time_status & STA_RONLY); 315 time_state = TIME_OK;
282 316 time_status = STA_UNSYNC;
283 if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ 317 }
284 if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { 318 /* only set allowed bits */
285 result = -EINVAL; 319 time_status &= STA_RONLY;
286 goto leave; 320 time_status |= txc->status & ~STA_RONLY;
287 } 321
288 time_freq = ((s64)txc->freq * NSEC_PER_USEC) 322 switch (time_state) {
289 >> (SHIFT_USEC - SHIFT_NSEC); 323 case TIME_OK:
290 } 324 start_timer:
291 325 sec = ts.tv_sec;
292 if (txc->modes & ADJ_MAXERROR) { 326 if (time_status & STA_INS) {
293 if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { 327 time_state = TIME_INS;
294 result = -EINVAL; 328 sec += 86400 - sec % 86400;
295 goto leave; 329 hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
330 } else if (time_status & STA_DEL) {
331 time_state = TIME_DEL;
332 sec += 86400 - (sec + 1) % 86400;
333 hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
334 }
335 break;
336 case TIME_INS:
337 case TIME_DEL:
338 time_state = TIME_OK;
339 goto start_timer;
340 break;
341 case TIME_WAIT:
342 if (!(time_status & (STA_INS | STA_DEL)))
343 time_state = TIME_OK;
344 break;
345 case TIME_OOP:
346 hrtimer_restart(&leap_timer);
347 break;
348 }
296 } 349 }
297 time_maxerror = txc->maxerror;
298 }
299 350
300 if (txc->modes & ADJ_ESTERROR) { 351 if (txc->modes & ADJ_NANO)
301 if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { 352 time_status |= STA_NANO;
302 result = -EINVAL; 353 if (txc->modes & ADJ_MICRO)
303 goto leave; 354 time_status &= ~STA_NANO;
355
356 if (txc->modes & ADJ_FREQUENCY) {
357 time_freq = (s64)txc->freq * PPM_SCALE;
358 time_freq = min(time_freq, MAXFREQ_SCALED);
359 time_freq = max(time_freq, -MAXFREQ_SCALED);
304 } 360 }
305 time_esterror = txc->esterror;
306 }
307 361
308 if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ 362 if (txc->modes & ADJ_MAXERROR)
309 if (txc->constant < 0) { /* NTP v4 uses values > 6 */ 363 time_maxerror = txc->maxerror;
310 result = -EINVAL; 364 if (txc->modes & ADJ_ESTERROR)
311 goto leave; 365 time_esterror = txc->esterror;
366
367 if (txc->modes & ADJ_TIMECONST) {
368 time_constant = txc->constant;
369 if (!(time_status & STA_NANO))
370 time_constant += 4;
371 time_constant = min(time_constant, (long)MAXTC);
372 time_constant = max(time_constant, 0l);
312 } 373 }
313 time_constant = min(txc->constant + 4, (long)MAXTC);
314 }
315 374
316 if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ 375 if (txc->modes & ADJ_TAI && txc->constant > 0)
317 if (txc->modes == ADJ_OFFSET_SINGLESHOT) { 376 time_tai = txc->constant;
318 /* adjtime() is independent from ntp_adjtime() */ 377
319 time_adjust = txc->offset; 378 if (txc->modes & ADJ_OFFSET) {
379 if (txc->modes == ADJ_OFFSET_SINGLESHOT)
380 /* adjtime() is independent from ntp_adjtime() */
381 time_adjust = txc->offset;
382 else
383 ntp_update_offset(txc->offset);
320 } 384 }
321 else if (time_status & STA_PLL) { 385 if (txc->modes & ADJ_TICK)
322 time_offset = txc->offset * NSEC_PER_USEC; 386 tick_usec = txc->tick;
323 387
324 /* 388 if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
325 * Scale the phase adjustment and 389 ntp_update_frequency();
326 * clamp to the operating range. 390 }
327 */ 391
328 time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC); 392 result = time_state; /* mostly `TIME_OK' */
329 time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC); 393 if (time_status & (STA_UNSYNC|STA_CLOCKERR))
330
331 /*
332 * Select whether the frequency is to be controlled
333 * and in which mode (PLL or FLL). Clamp to the operating
334 * range. Ugly multiply/divide should be replaced someday.
335 */
336
337 if (time_status & STA_FREQHOLD || time_reftime == 0)
338 time_reftime = xtime.tv_sec;
339 mtemp = xtime.tv_sec - time_reftime;
340 time_reftime = xtime.tv_sec;
341
342 freq_adj = time_offset * mtemp;
343 freq_adj = shift_right(freq_adj, time_constant * 2 +
344 (SHIFT_PLL + 2) * 2 - SHIFT_NSEC);
345 if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
346 u64 utemp64;
347 temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL);
348 if (time_offset < 0) {
349 utemp64 = -temp64;
350 do_div(utemp64, mtemp);
351 freq_adj -= utemp64;
352 } else {
353 utemp64 = temp64;
354 do_div(utemp64, mtemp);
355 freq_adj += utemp64;
356 }
357 }
358 freq_adj += time_freq;
359 freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
360 time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
361 time_offset = div_long_long_rem_signed(time_offset,
362 NTP_INTERVAL_FREQ,
363 &rem);
364 time_offset <<= SHIFT_UPDATE;
365 } /* STA_PLL */
366 } /* txc->modes & ADJ_OFFSET */
367 if (txc->modes & ADJ_TICK)
368 tick_usec = txc->tick;
369
370 if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
371 ntp_update_frequency();
372 } /* txc->modes */
373leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
374 result = TIME_ERROR; 394 result = TIME_ERROR;
375 395
376 if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || 396 if ((txc->modes == ADJ_OFFSET_SINGLESHOT) ||
377 (txc->modes == ADJ_OFFSET_SS_READ)) 397 (txc->modes == ADJ_OFFSET_SS_READ))
378 txc->offset = save_adjust; 398 txc->offset = save_adjust;
379 else 399 else {
380 txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) * 400 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
381 NTP_INTERVAL_FREQ / 1000; 401 NTP_SCALE_SHIFT);
382 txc->freq = (time_freq / NSEC_PER_USEC) << 402 if (!(time_status & STA_NANO))
383 (SHIFT_USEC - SHIFT_NSEC); 403 txc->offset /= NSEC_PER_USEC;
404 }
405 txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
406 (s64)PPM_SCALE_INV,
407 NTP_SCALE_SHIFT);
384 txc->maxerror = time_maxerror; 408 txc->maxerror = time_maxerror;
385 txc->esterror = time_esterror; 409 txc->esterror = time_esterror;
386 txc->status = time_status; 410 txc->status = time_status;
387 txc->constant = time_constant; 411 txc->constant = time_constant;
388 txc->precision = 1; 412 txc->precision = 1;
389 txc->tolerance = MAXFREQ; 413 txc->tolerance = MAXFREQ_SCALED / PPM_SCALE;
390 txc->tick = tick_usec; 414 txc->tick = tick_usec;
415 txc->tai = time_tai;
391 416
392 /* PPS is not implemented, so these are zero */ 417 /* PPS is not implemented, so these are zero */
393 txc->ppsfreq = 0; 418 txc->ppsfreq = 0;
@@ -399,9 +424,15 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
399 txc->errcnt = 0; 424 txc->errcnt = 0;
400 txc->stbcnt = 0; 425 txc->stbcnt = 0;
401 write_sequnlock_irq(&xtime_lock); 426 write_sequnlock_irq(&xtime_lock);
402 do_gettimeofday(&txc->time); 427
428 txc->time.tv_sec = ts.tv_sec;
429 txc->time.tv_usec = ts.tv_nsec;
430 if (!(time_status & STA_NANO))
431 txc->time.tv_usec /= NSEC_PER_USEC;
432
403 notify_cmos_timer(); 433 notify_cmos_timer();
404 return(result); 434
435 return result;
405} 436}
406 437
407static int __init ntp_tick_adj_setup(char *str) 438static int __init ntp_tick_adj_setup(char *str)
@@ -411,3 +442,10 @@ static int __init ntp_tick_adj_setup(char *str)
411} 442}
412 443
413__setup("ntp_tick_adj=", ntp_tick_adj_setup); 444__setup("ntp_tick_adj=", ntp_tick_adj_setup);
445
446void __init ntp_init(void)
447{
448 ntp_clear();
449 hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
450 leap_timer.function = ntp_leap_second;
451}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2d6087c7cf9..e91c29f961c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -53,7 +53,7 @@ void update_xtime_cache(u64 nsec)
53 timespec_add_ns(&xtime_cache, nsec); 53 timespec_add_ns(&xtime_cache, nsec);
54} 54}
55 55
56static struct clocksource *clock; /* pointer to current clocksource */ 56struct clocksource *clock;
57 57
58 58
59#ifdef CONFIG_GENERIC_TIME 59#ifdef CONFIG_GENERIC_TIME
@@ -246,7 +246,7 @@ void __init timekeeping_init(void)
246 246
247 write_seqlock_irqsave(&xtime_lock, flags); 247 write_seqlock_irqsave(&xtime_lock, flags);
248 248
249 ntp_clear(); 249 ntp_init();
250 250
251 clock = clocksource_get_next(); 251 clock = clocksource_get_next();
252 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 252 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -371,7 +371,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
371 * here. This is tuned so that an error of about 1 msec is adjusted 371 * here. This is tuned so that an error of about 1 msec is adjusted
372 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). 372 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
373 */ 373 */
374 error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); 374 error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
375 error2 = abs(error2); 375 error2 = abs(error2);
376 for (look_ahead = 0; error2 > 0; look_ahead++) 376 for (look_ahead = 0; error2 > 0; look_ahead++)
377 error2 >>= 2; 377 error2 >>= 2;
@@ -380,8 +380,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
380 * Now calculate the error in (1 << look_ahead) ticks, but first 380 * Now calculate the error in (1 << look_ahead) ticks, but first
381 * remove the single look ahead already included in the error. 381 * remove the single look ahead already included in the error.
382 */ 382 */
383 tick_error = current_tick_length() >> 383 tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1);
384 (TICK_LENGTH_SHIFT - clock->shift + 1);
385 tick_error -= clock->xtime_interval >> 1; 384 tick_error -= clock->xtime_interval >> 1;
386 error = ((error - tick_error) >> look_ahead) + tick_error; 385 error = ((error - tick_error) >> look_ahead) + tick_error;
387 386
@@ -412,7 +411,7 @@ static void clocksource_adjust(s64 offset)
412 s64 error, interval = clock->cycle_interval; 411 s64 error, interval = clock->cycle_interval;
413 int adj; 412 int adj;
414 413
415 error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); 414 error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1);
416 if (error > interval) { 415 if (error > interval) {
417 error >>= 2; 416 error >>= 2;
418 if (likely(error <= interval)) 417 if (likely(error <= interval))
@@ -434,7 +433,7 @@ static void clocksource_adjust(s64 offset)
434 clock->xtime_interval += interval; 433 clock->xtime_interval += interval;
435 clock->xtime_nsec -= offset; 434 clock->xtime_nsec -= offset;
436 clock->error -= (interval - offset) << 435 clock->error -= (interval - offset) <<
437 (TICK_LENGTH_SHIFT - clock->shift); 436 (NTP_SCALE_SHIFT - clock->shift);
438} 437}
439 438
440/** 439/**
@@ -473,8 +472,8 @@ void update_wall_time(void)
473 } 472 }
474 473
475 /* accumulate error between NTP and clock interval */ 474 /* accumulate error between NTP and clock interval */
476 clock->error += current_tick_length(); 475 clock->error += tick_length;
477 clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); 476 clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
478 } 477 }
479 478
480 /* correct the clock when NTP error is too big */ 479 /* correct the clock when NTP error is too big */
diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl
index 41468035473..eb51d76e058 100644
--- a/kernel/timeconst.pl
+++ b/kernel/timeconst.pl
@@ -1,7 +1,7 @@
1#!/usr/bin/perl 1#!/usr/bin/perl
2# ----------------------------------------------------------------------- 2# -----------------------------------------------------------------------
3# 3#
4# Copyright 2007 rPath, Inc. - All Rights Reserved 4# Copyright 2007-2008 rPath, Inc. - All Rights Reserved
5# 5#
6# This file is part of the Linux kernel, and is made available under 6# This file is part of the Linux kernel, and is made available under
7# the terms of the GNU General Public License version 2 or (at your 7# the terms of the GNU General Public License version 2 or (at your
@@ -20,198 +20,138 @@
20%canned_values = ( 20%canned_values = (
21 24 => [ 21 24 => [
22 '0xa6aaaaab','0x2aaaaaa',26, 22 '0xa6aaaaab','0x2aaaaaa',26,
23 '0xa6aaaaaaaaaaaaab','0x2aaaaaaaaaaaaaa',58,
24 125,3, 23 125,3,
25 '0xc49ba5e4','0x1fbe76c8b4',37, 24 '0xc49ba5e4','0x1fbe76c8b4',37,
26 '0xc49ba5e353f7ceda','0x1fbe76c8b439581062',69,
27 3,125, 25 3,125,
28 '0xa2c2aaab','0xaaaa',16, 26 '0xa2c2aaab','0xaaaa',16,
29 '0xa2c2aaaaaaaaaaab','0xaaaaaaaaaaaa',48,
30 125000,3, 27 125000,3,
31 '0xc9539b89','0x7fffbce4217d',47, 28 '0xc9539b89','0x7fffbce4217d',47,
32 '0xc9539b8887229e91','0x7fffbce4217d2849cb25',79,
33 3,125000, 29 3,125000,
34 ], 32 => [ 30 ], 32 => [
35 '0xfa000000','0x6000000',27, 31 '0xfa000000','0x6000000',27,
36 '0xfa00000000000000','0x600000000000000',59,
37 125,4, 32 125,4,
38 '0x83126e98','0xfdf3b645a',36, 33 '0x83126e98','0xfdf3b645a',36,
39 '0x83126e978d4fdf3c','0xfdf3b645a1cac0831',68,
40 4,125, 34 4,125,
41 '0xf4240000','0x0',17, 35 '0xf4240000','0x0',17,
42 '0xf424000000000000','0x0',49,
43 31250,1, 36 31250,1,
44 '0x8637bd06','0x3fff79c842fa',46, 37 '0x8637bd06','0x3fff79c842fa',46,
45 '0x8637bd05af6c69b6','0x3fff79c842fa5093964a',78,
46 1,31250, 38 1,31250,
47 ], 48 => [ 39 ], 48 => [
48 '0xa6aaaaab','0x6aaaaaa',27, 40 '0xa6aaaaab','0x6aaaaaa',27,
49 '0xa6aaaaaaaaaaaaab','0x6aaaaaaaaaaaaaa',59,
50 125,6, 41 125,6,
51 '0xc49ba5e4','0xfdf3b645a',36, 42 '0xc49ba5e4','0xfdf3b645a',36,
52 '0xc49ba5e353f7ceda','0xfdf3b645a1cac0831',68,
53 6,125, 43 6,125,
54 '0xa2c2aaab','0x15555',17, 44 '0xa2c2aaab','0x15555',17,
55 '0xa2c2aaaaaaaaaaab','0x1555555555555',49,
56 62500,3, 45 62500,3,
57 '0xc9539b89','0x3fffbce4217d',46, 46 '0xc9539b89','0x3fffbce4217d',46,
58 '0xc9539b8887229e91','0x3fffbce4217d2849cb25',78,
59 3,62500, 47 3,62500,
60 ], 64 => [ 48 ], 64 => [
61 '0xfa000000','0xe000000',28, 49 '0xfa000000','0xe000000',28,
62 '0xfa00000000000000','0xe00000000000000',60,
63 125,8, 50 125,8,
64 '0x83126e98','0x7ef9db22d',35, 51 '0x83126e98','0x7ef9db22d',35,
65 '0x83126e978d4fdf3c','0x7ef9db22d0e560418',67,
66 8,125, 52 8,125,
67 '0xf4240000','0x0',18, 53 '0xf4240000','0x0',18,
68 '0xf424000000000000','0x0',50,
69 15625,1, 54 15625,1,
70 '0x8637bd06','0x1fff79c842fa',45, 55 '0x8637bd06','0x1fff79c842fa',45,
71 '0x8637bd05af6c69b6','0x1fff79c842fa5093964a',77,
72 1,15625, 56 1,15625,
73 ], 100 => [ 57 ], 100 => [
74 '0xa0000000','0x0',28, 58 '0xa0000000','0x0',28,
75 '0xa000000000000000','0x0',60,
76 10,1, 59 10,1,
77 '0xcccccccd','0x733333333',35, 60 '0xcccccccd','0x733333333',35,
78 '0xcccccccccccccccd','0x73333333333333333',67,
79 1,10, 61 1,10,
80 '0x9c400000','0x0',18, 62 '0x9c400000','0x0',18,
81 '0x9c40000000000000','0x0',50,
82 10000,1, 63 10000,1,
83 '0xd1b71759','0x1fff2e48e8a7',45, 64 '0xd1b71759','0x1fff2e48e8a7',45,
84 '0xd1b71758e219652c','0x1fff2e48e8a71de69ad4',77,
85 1,10000, 65 1,10000,
86 ], 122 => [ 66 ], 122 => [
87 '0x8325c53f','0xfbcda3a',28, 67 '0x8325c53f','0xfbcda3a',28,
88 '0x8325c53ef368eb05','0xfbcda3ac10c9714',60,
89 500,61, 68 500,61,
90 '0xf9db22d1','0x7fbe76c8b',35, 69 '0xf9db22d1','0x7fbe76c8b',35,
91 '0xf9db22d0e560418a','0x7fbe76c8b43958106',67,
92 61,500, 70 61,500,
93 '0x8012e2a0','0x3ef36',18, 71 '0x8012e2a0','0x3ef36',18,
94 '0x8012e29f79b47583','0x3ef368eb04325',50,
95 500000,61, 72 500000,61,
96 '0xffda4053','0x1ffffbce4217',45, 73 '0xffda4053','0x1ffffbce4217',45,
97 '0xffda4052d666a983','0x1ffffbce4217d2849cb2',77,
98 61,500000, 74 61,500000,
99 ], 128 => [ 75 ], 128 => [
100 '0xfa000000','0x1e000000',29, 76 '0xfa000000','0x1e000000',29,
101 '0xfa00000000000000','0x1e00000000000000',61,
102 125,16, 77 125,16,
103 '0x83126e98','0x3f7ced916',34, 78 '0x83126e98','0x3f7ced916',34,
104 '0x83126e978d4fdf3c','0x3f7ced916872b020c',66,
105 16,125, 79 16,125,
106 '0xf4240000','0x40000',19, 80 '0xf4240000','0x40000',19,
107 '0xf424000000000000','0x4000000000000',51,
108 15625,2, 81 15625,2,
109 '0x8637bd06','0xfffbce4217d',44, 82 '0x8637bd06','0xfffbce4217d',44,
110 '0x8637bd05af6c69b6','0xfffbce4217d2849cb25',76,
111 2,15625, 83 2,15625,
112 ], 200 => [ 84 ], 200 => [
113 '0xa0000000','0x0',29, 85 '0xa0000000','0x0',29,
114 '0xa000000000000000','0x0',61,
115 5,1, 86 5,1,
116 '0xcccccccd','0x333333333',34, 87 '0xcccccccd','0x333333333',34,
117 '0xcccccccccccccccd','0x33333333333333333',66,
118 1,5, 88 1,5,
119 '0x9c400000','0x0',19, 89 '0x9c400000','0x0',19,
120 '0x9c40000000000000','0x0',51,
121 5000,1, 90 5000,1,
122 '0xd1b71759','0xfff2e48e8a7',44, 91 '0xd1b71759','0xfff2e48e8a7',44,
123 '0xd1b71758e219652c','0xfff2e48e8a71de69ad4',76,
124 1,5000, 92 1,5000,
125 ], 250 => [ 93 ], 250 => [
126 '0x80000000','0x0',29, 94 '0x80000000','0x0',29,
127 '0x8000000000000000','0x0',61,
128 4,1, 95 4,1,
129 '0x80000000','0x180000000',33, 96 '0x80000000','0x180000000',33,
130 '0x8000000000000000','0x18000000000000000',65,
131 1,4, 97 1,4,
132 '0xfa000000','0x0',20, 98 '0xfa000000','0x0',20,
133 '0xfa00000000000000','0x0',52,
134 4000,1, 99 4000,1,
135 '0x83126e98','0x7ff7ced9168',43, 100 '0x83126e98','0x7ff7ced9168',43,
136 '0x83126e978d4fdf3c','0x7ff7ced916872b020c4',75,
137 1,4000, 101 1,4000,
138 ], 256 => [ 102 ], 256 => [
139 '0xfa000000','0x3e000000',30, 103 '0xfa000000','0x3e000000',30,
140 '0xfa00000000000000','0x3e00000000000000',62,
141 125,32, 104 125,32,
142 '0x83126e98','0x1fbe76c8b',33, 105 '0x83126e98','0x1fbe76c8b',33,
143 '0x83126e978d4fdf3c','0x1fbe76c8b43958106',65,
144 32,125, 106 32,125,
145 '0xf4240000','0xc0000',20, 107 '0xf4240000','0xc0000',20,
146 '0xf424000000000000','0xc000000000000',52,
147 15625,4, 108 15625,4,
148 '0x8637bd06','0x7ffde7210be',43, 109 '0x8637bd06','0x7ffde7210be',43,
149 '0x8637bd05af6c69b6','0x7ffde7210be9424e592',75,
150 4,15625, 110 4,15625,
151 ], 300 => [ 111 ], 300 => [
152 '0xd5555556','0x2aaaaaaa',30, 112 '0xd5555556','0x2aaaaaaa',30,
153 '0xd555555555555556','0x2aaaaaaaaaaaaaaa',62,
154 10,3, 113 10,3,
155 '0x9999999a','0x1cccccccc',33, 114 '0x9999999a','0x1cccccccc',33,
156 '0x999999999999999a','0x1cccccccccccccccc',65,
157 3,10, 115 3,10,
158 '0xd0555556','0xaaaaa',20, 116 '0xd0555556','0xaaaaa',20,
159 '0xd055555555555556','0xaaaaaaaaaaaaa',52,
160 10000,3, 117 10000,3,
161 '0x9d495183','0x7ffcb923a29',43, 118 '0x9d495183','0x7ffcb923a29',43,
162 '0x9d495182a9930be1','0x7ffcb923a29c779a6b5',75,
163 3,10000, 119 3,10000,
164 ], 512 => [ 120 ], 512 => [
165 '0xfa000000','0x7e000000',31, 121 '0xfa000000','0x7e000000',31,
166 '0xfa00000000000000','0x7e00000000000000',63,
167 125,64, 122 125,64,
168 '0x83126e98','0xfdf3b645',32, 123 '0x83126e98','0xfdf3b645',32,
169 '0x83126e978d4fdf3c','0xfdf3b645a1cac083',64,
170 64,125, 124 64,125,
171 '0xf4240000','0x1c0000',21, 125 '0xf4240000','0x1c0000',21,
172 '0xf424000000000000','0x1c000000000000',53,
173 15625,8, 126 15625,8,
174 '0x8637bd06','0x3ffef39085f',42, 127 '0x8637bd06','0x3ffef39085f',42,
175 '0x8637bd05af6c69b6','0x3ffef39085f4a1272c9',74,
176 8,15625, 128 8,15625,
177 ], 1000 => [ 129 ], 1000 => [
178 '0x80000000','0x0',31, 130 '0x80000000','0x0',31,
179 '0x8000000000000000','0x0',63,
180 1,1, 131 1,1,
181 '0x80000000','0x0',31, 132 '0x80000000','0x0',31,
182 '0x8000000000000000','0x0',63,
183 1,1, 133 1,1,
184 '0xfa000000','0x0',22, 134 '0xfa000000','0x0',22,
185 '0xfa00000000000000','0x0',54,
186 1000,1, 135 1000,1,
187 '0x83126e98','0x1ff7ced9168',41, 136 '0x83126e98','0x1ff7ced9168',41,
188 '0x83126e978d4fdf3c','0x1ff7ced916872b020c4',73,
189 1,1000, 137 1,1000,
190 ], 1024 => [ 138 ], 1024 => [
191 '0xfa000000','0xfe000000',32, 139 '0xfa000000','0xfe000000',32,
192 '0xfa00000000000000','0xfe00000000000000',64,
193 125,128, 140 125,128,
194 '0x83126e98','0x7ef9db22',31, 141 '0x83126e98','0x7ef9db22',31,
195 '0x83126e978d4fdf3c','0x7ef9db22d0e56041',63,
196 128,125, 142 128,125,
197 '0xf4240000','0x3c0000',22, 143 '0xf4240000','0x3c0000',22,
198 '0xf424000000000000','0x3c000000000000',54,
199 15625,16, 144 15625,16,
200 '0x8637bd06','0x1fff79c842f',41, 145 '0x8637bd06','0x1fff79c842f',41,
201 '0x8637bd05af6c69b6','0x1fff79c842fa5093964',73,
202 16,15625, 146 16,15625,
203 ], 1200 => [ 147 ], 1200 => [
204 '0xd5555556','0xd5555555',32, 148 '0xd5555556','0xd5555555',32,
205 '0xd555555555555556','0xd555555555555555',64,
206 5,6, 149 5,6,
207 '0x9999999a','0x66666666',31, 150 '0x9999999a','0x66666666',31,
208 '0x999999999999999a','0x6666666666666666',63,
209 6,5, 151 6,5,
210 '0xd0555556','0x2aaaaa',22, 152 '0xd0555556','0x2aaaaa',22,
211 '0xd055555555555556','0x2aaaaaaaaaaaaa',54,
212 2500,3, 153 2500,3,
213 '0x9d495183','0x1ffcb923a29',41, 154 '0x9d495183','0x1ffcb923a29',41,
214 '0x9d495182a9930be1','0x1ffcb923a29c779a6b5',73,
215 3,2500, 155 3,2500,
216 ] 156 ]
217); 157);
@@ -264,6 +204,15 @@ sub fmuls($$$) {
264 return 0; 204 return 0;
265} 205}
266 206
207# Generate a hex value if the result fits in 64 bits;
208# otherwise skip.
209sub bignum_hex($) {
210 my($x) = @_;
211 my $s = $x->as_hex();
212
213 return (length($s) > 18) ? undef : $s;
214}
215
267# Provides mul, adj, and shr factors for a specific 216# Provides mul, adj, and shr factors for a specific
268# (bit, time, hz) combination 217# (bit, time, hz) combination
269sub muladj($$$) { 218sub muladj($$$) {
@@ -271,7 +220,7 @@ sub muladj($$$) {
271 my $s = fmuls($b, $t, $hz); 220 my $s = fmuls($b, $t, $hz);
272 my $m = fmul($s, $t, $hz); 221 my $m = fmul($s, $t, $hz);
273 my $a = fadj($s, $t, $hz); 222 my $a = fadj($s, $t, $hz);
274 return ($m->as_hex(), $a->as_hex(), $s); 223 return (bignum_hex($m), bignum_hex($a), $s);
275} 224}
276 225
277# Provides numerator, denominator values 226# Provides numerator, denominator values
@@ -288,12 +237,10 @@ sub conversions($$) {
288 237
289 # HZ_TO_xx 238 # HZ_TO_xx
290 push(@val, muladj(32, $t, $hz)); 239 push(@val, muladj(32, $t, $hz));
291 push(@val, muladj(64, $t, $hz));
292 push(@val, numden($t, $hz)); 240 push(@val, numden($t, $hz));
293 241
294 # xx_TO_HZ 242 # xx_TO_HZ
295 push(@val, muladj(32, $hz, $t)); 243 push(@val, muladj(32, $hz, $t));
296 push(@val, muladj(64, $hz, $t));
297 push(@val, numden($hz, $t)); 244 push(@val, numden($hz, $t));
298 245
299 return @val; 246 return @val;
@@ -318,6 +265,19 @@ sub compute_values($) {
318 return @val; 265 return @val;
319} 266}
320 267
268sub outputval($$)
269{
270 my($name, $val) = @_;
271 my $csuf;
272
273 if (defined($val)) {
274 if ($name !~ /SHR/) {
275 $val = "U64_C($val)";
276 }
277 printf "#define %-23s %s\n", $name.$csuf, $val.$csuf;
278 }
279}
280
321sub output($@) 281sub output($@)
322{ 282{
323 my($hz, @val) = @_; 283 my($hz, @val) = @_;
@@ -331,6 +291,7 @@ sub output($@)
331 print "\n"; 291 print "\n";
332 292
333 print "#include <linux/param.h>\n"; 293 print "#include <linux/param.h>\n";
294 print "#include <linux/types.h>\n";
334 295
335 print "\n"; 296 print "\n";
336 print "#if HZ != $hz\n"; 297 print "#if HZ != $hz\n";
@@ -340,15 +301,13 @@ sub output($@)
340 301
341 foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', 302 foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ',
342 'HZ_TO_USEC','USEC_TO_HZ') { 303 'HZ_TO_USEC','USEC_TO_HZ') {
343 foreach $bit (32, 64) { 304 foreach $bit (32) {
344 foreach $suf ('MUL', 'ADJ', 'SHR') { 305 foreach $suf ('MUL', 'ADJ', 'SHR') {
345 printf "#define %-23s %s\n", 306 outputval("${pfx}_$suf$bit", shift(@val));
346 "${pfx}_$suf$bit", shift(@val);
347 } 307 }
348 } 308 }
349 foreach $suf ('NUM', 'DEN') { 309 foreach $suf ('NUM', 'DEN') {
350 printf "#define %-23s %s\n", 310 outputval("${pfx}_$suf", shift(@val));
351 "${pfx}_$suf", shift(@val);
352 } 311 }
353 } 312 }
354 313
@@ -356,6 +315,23 @@ sub output($@)
356 print "#endif /* KERNEL_TIMECONST_H */\n"; 315 print "#endif /* KERNEL_TIMECONST_H */\n";
357} 316}
358 317
318# Pretty-print Perl values
319sub perlvals(@) {
320 my $v;
321 my @l = ();
322
323 foreach $v (@_) {
324 if (!defined($v)) {
325 push(@l, 'undef');
326 } elsif ($v =~ /^0x/) {
327 push(@l, "\'".$v."\'");
328 } else {
329 push(@l, $v.'');
330 }
331 }
332 return join(',', @l);
333}
334
359($hz) = @ARGV; 335($hz) = @ARGV;
360 336
361# Use this to generate the %canned_values structure 337# Use this to generate the %canned_values structure
@@ -373,15 +349,15 @@ if ($hz eq '--can') {
373 print "$pf$hz => [\n"; 349 print "$pf$hz => [\n";
374 while (scalar(@values)) { 350 while (scalar(@values)) {
375 my $bit; 351 my $bit;
376 foreach $bit (32, 64) { 352 foreach $bit (32) {
377 my $m = shift(@values); 353 my $m = shift(@values);
378 my $a = shift(@values); 354 my $a = shift(@values);
379 my $s = shift(@values); 355 my $s = shift(@values);
380 print "\t\t\'",$m,"\',\'",$a,"\',",$s,",\n"; 356 print "\t\t", perlvals($m,$a,$s), ",\n";
381 } 357 }
382 my $n = shift(@values); 358 my $n = shift(@values);
383 my $d = shift(@values); 359 my $d = shift(@values);
384 print "\t\t",$n,',',$d,",\n"; 360 print "\t\t", perlvals($n,$d), ",\n";
385 } 361 }
386 print "\t]"; 362 print "\t]";
387 $pf = ', '; 363 $pf = ', ';
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 721093a2256..29fc39f1029 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -195,7 +195,6 @@ static void delayed_work_timer_fn(unsigned long __data)
195int queue_delayed_work(struct workqueue_struct *wq, 195int queue_delayed_work(struct workqueue_struct *wq,
196 struct delayed_work *dwork, unsigned long delay) 196 struct delayed_work *dwork, unsigned long delay)
197{ 197{
198 timer_stats_timer_set_start_info(&dwork->timer);
199 if (delay == 0) 198 if (delay == 0)
200 return queue_work(wq, &dwork->work); 199 return queue_work(wq, &dwork->work);
201 200
@@ -219,11 +218,12 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
219 struct timer_list *timer = &dwork->timer; 218 struct timer_list *timer = &dwork->timer;
220 struct work_struct *work = &dwork->work; 219 struct work_struct *work = &dwork->work;
221 220
222 timer_stats_timer_set_start_info(&dwork->timer);
223 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { 221 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
224 BUG_ON(timer_pending(timer)); 222 BUG_ON(timer_pending(timer));
225 BUG_ON(!list_empty(&work->entry)); 223 BUG_ON(!list_empty(&work->entry));
226 224
225 timer_stats_timer_set_start_info(&dwork->timer);
226
227 /* This stores cwq for the moment, for the timer_fn */ 227 /* This stores cwq for the moment, for the timer_fn */
228 set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id())); 228 set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
229 timer->expires = jiffies + delay; 229 timer->expires = jiffies + delay;
@@ -564,7 +564,6 @@ EXPORT_SYMBOL(schedule_work);
564int schedule_delayed_work(struct delayed_work *dwork, 564int schedule_delayed_work(struct delayed_work *dwork,
565 unsigned long delay) 565 unsigned long delay)
566{ 566{
567 timer_stats_timer_set_start_info(&dwork->timer);
568 return queue_delayed_work(keventd_wq, dwork, delay); 567 return queue_delayed_work(keventd_wq, dwork, delay);
569} 568}
570EXPORT_SYMBOL(schedule_delayed_work); 569EXPORT_SYMBOL(schedule_delayed_work);
@@ -581,7 +580,6 @@ EXPORT_SYMBOL(schedule_delayed_work);
581int schedule_delayed_work_on(int cpu, 580int schedule_delayed_work_on(int cpu,
582 struct delayed_work *dwork, unsigned long delay) 581 struct delayed_work *dwork, unsigned long delay)
583{ 582{
584 timer_stats_timer_set_start_info(&dwork->timer);
585 return queue_delayed_work_on(cpu, keventd_wq, dwork, delay); 583 return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
586} 584}
587EXPORT_SYMBOL(schedule_delayed_work_on); 585EXPORT_SYMBOL(schedule_delayed_work_on);