diff options
Diffstat (limited to 'kernel')
47 files changed, 2529 insertions, 1113 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 4d9bf5f8531f..e2c9d52cfe9e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -87,6 +87,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o | |||
| 87 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o | 87 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o |
| 88 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o | 88 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o |
| 89 | obj-$(CONFIG_TINY_RCU) += rcutiny.o | 89 | obj-$(CONFIG_TINY_RCU) += rcutiny.o |
| 90 | obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o | ||
| 90 | obj-$(CONFIG_RELAY) += relay.o | 91 | obj-$(CONFIG_RELAY) += relay.o |
| 91 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 92 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
| 92 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 93 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c9483d8f6140..291ba3d04bea 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -138,7 +138,7 @@ struct css_id { | |||
| 138 | * is called after synchronize_rcu(). But for safe use, css_is_removed() | 138 | * is called after synchronize_rcu(). But for safe use, css_is_removed() |
| 139 | * css_tryget() should be used for avoiding race. | 139 | * css_tryget() should be used for avoiding race. |
| 140 | */ | 140 | */ |
| 141 | struct cgroup_subsys_state *css; | 141 | struct cgroup_subsys_state __rcu *css; |
| 142 | /* | 142 | /* |
| 143 | * ID of this css. | 143 | * ID of this css. |
| 144 | */ | 144 | */ |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b23c0979bbe7..51b143e2a07a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1397,7 +1397,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
| 1397 | if (tsk->flags & PF_THREAD_BOUND) | 1397 | if (tsk->flags & PF_THREAD_BOUND) |
| 1398 | return -EINVAL; | 1398 | return -EINVAL; |
| 1399 | 1399 | ||
| 1400 | ret = security_task_setscheduler(tsk, 0, NULL); | 1400 | ret = security_task_setscheduler(tsk); |
| 1401 | if (ret) | 1401 | if (ret) |
| 1402 | return ret; | 1402 | return ret; |
| 1403 | if (threadgroup) { | 1403 | if (threadgroup) { |
| @@ -1405,7 +1405,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
| 1405 | 1405 | ||
| 1406 | rcu_read_lock(); | 1406 | rcu_read_lock(); |
| 1407 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | 1407 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { |
| 1408 | ret = security_task_setscheduler(c, 0, NULL); | 1408 | ret = security_task_setscheduler(c); |
| 1409 | if (ret) { | 1409 | if (ret) { |
| 1410 | rcu_read_unlock(); | 1410 | rcu_read_unlock(); |
| 1411 | return ret; | 1411 | return ret; |
diff --git a/kernel/futex.c b/kernel/futex.c index 6a3a5fa1526d..a118bf160e0b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -91,6 +91,7 @@ struct futex_pi_state { | |||
| 91 | 91 | ||
| 92 | /** | 92 | /** |
| 93 | * struct futex_q - The hashed futex queue entry, one per waiting task | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
| 94 | * @list: priority-sorted list of tasks waiting on this futex | ||
| 94 | * @task: the task waiting on the futex | 95 | * @task: the task waiting on the futex |
| 95 | * @lock_ptr: the hash bucket lock | 96 | * @lock_ptr: the hash bucket lock |
| 96 | * @key: the key the futex is hashed on | 97 | * @key: the key the futex is hashed on |
| @@ -104,7 +105,7 @@ struct futex_pi_state { | |||
| 104 | * | 105 | * |
| 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 106 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
| 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 107 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
| 107 | * The order of wakup is always to make the first condition true, then | 108 | * The order of wakeup is always to make the first condition true, then |
| 108 | * the second. | 109 | * the second. |
| 109 | * | 110 | * |
| 110 | * PI futexes are typically woken before they are removed from the hash list via | 111 | * PI futexes are typically woken before they are removed from the hash list via |
| @@ -295,7 +296,7 @@ void put_futex_key(int fshared, union futex_key *key) | |||
| 295 | * Slow path to fixup the fault we just took in the atomic write | 296 | * Slow path to fixup the fault we just took in the atomic write |
| 296 | * access to @uaddr. | 297 | * access to @uaddr. |
| 297 | * | 298 | * |
| 298 | * We have no generic implementation of a non destructive write to the | 299 | * We have no generic implementation of a non-destructive write to the |
| 299 | * user address. We know that we faulted in the atomic pagefault | 300 | * user address. We know that we faulted in the atomic pagefault |
| 300 | * disabled section so we can as well avoid the #PF overhead by | 301 | * disabled section so we can as well avoid the #PF overhead by |
| 301 | * calling get_user_pages() right away. | 302 | * calling get_user_pages() right away. |
| @@ -515,7 +516,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 515 | */ | 516 | */ |
| 516 | pi_state = this->pi_state; | 517 | pi_state = this->pi_state; |
| 517 | /* | 518 | /* |
| 518 | * Userspace might have messed up non PI and PI futexes | 519 | * Userspace might have messed up non-PI and PI futexes |
| 519 | */ | 520 | */ |
| 520 | if (unlikely(!pi_state)) | 521 | if (unlikely(!pi_state)) |
| 521 | return -EINVAL; | 522 | return -EINVAL; |
| @@ -736,8 +737,8 @@ static void wake_futex(struct futex_q *q) | |||
| 736 | 737 | ||
| 737 | /* | 738 | /* |
| 738 | * We set q->lock_ptr = NULL _before_ we wake up the task. If | 739 | * We set q->lock_ptr = NULL _before_ we wake up the task. If |
| 739 | * a non futex wake up happens on another CPU then the task | 740 | * a non-futex wake up happens on another CPU then the task |
| 740 | * might exit and p would dereference a non existing task | 741 | * might exit and p would dereference a non-existing task |
| 741 | * struct. Prevent this by holding a reference on p across the | 742 | * struct. Prevent this by holding a reference on p across the |
| 742 | * wake up. | 743 | * wake up. |
| 743 | */ | 744 | */ |
| @@ -1131,11 +1132,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
| 1131 | 1132 | ||
| 1132 | /** | 1133 | /** |
| 1133 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 | 1134 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 |
| 1134 | * uaddr1: source futex user address | 1135 | * @uaddr1: source futex user address |
| 1135 | * uaddr2: target futex user address | 1136 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
| 1136 | * nr_wake: number of waiters to wake (must be 1 for requeue_pi) | 1137 | * @uaddr2: target futex user address |
| 1137 | * nr_requeue: number of waiters to requeue (0-INT_MAX) | 1138 | * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) |
| 1138 | * requeue_pi: if we are attempting to requeue from a non-pi futex to a | 1139 | * @nr_requeue: number of waiters to requeue (0-INT_MAX) |
| 1140 | * @cmpval: @uaddr1 expected value (or %NULL) | ||
| 1141 | * @requeue_pi: if we are attempting to requeue from a non-pi futex to a | ||
| 1139 | * pi futex (pi to pi requeue is not supported) | 1142 | * pi futex (pi to pi requeue is not supported) |
| 1140 | * | 1143 | * |
| 1141 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | 1144 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire |
| @@ -1360,10 +1363,10 @@ out: | |||
| 1360 | 1363 | ||
| 1361 | /* The key must be already stored in q->key. */ | 1364 | /* The key must be already stored in q->key. */ |
| 1362 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | 1365 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) |
| 1366 | __acquires(&hb->lock) | ||
| 1363 | { | 1367 | { |
| 1364 | struct futex_hash_bucket *hb; | 1368 | struct futex_hash_bucket *hb; |
| 1365 | 1369 | ||
| 1366 | get_futex_key_refs(&q->key); | ||
| 1367 | hb = hash_futex(&q->key); | 1370 | hb = hash_futex(&q->key); |
| 1368 | q->lock_ptr = &hb->lock; | 1371 | q->lock_ptr = &hb->lock; |
| 1369 | 1372 | ||
| @@ -1373,9 +1376,9 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 1373 | 1376 | ||
| 1374 | static inline void | 1377 | static inline void |
| 1375 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1378 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1379 | __releases(&hb->lock) | ||
| 1376 | { | 1380 | { |
| 1377 | spin_unlock(&hb->lock); | 1381 | spin_unlock(&hb->lock); |
| 1378 | drop_futex_key_refs(&q->key); | ||
| 1379 | } | 1382 | } |
| 1380 | 1383 | ||
| 1381 | /** | 1384 | /** |
| @@ -1391,6 +1394,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | |||
| 1391 | * an example). | 1394 | * an example). |
| 1392 | */ | 1395 | */ |
| 1393 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1396 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1397 | __releases(&hb->lock) | ||
| 1394 | { | 1398 | { |
| 1395 | int prio; | 1399 | int prio; |
| 1396 | 1400 | ||
| @@ -1471,6 +1475,7 @@ retry: | |||
| 1471 | * and dropped here. | 1475 | * and dropped here. |
| 1472 | */ | 1476 | */ |
| 1473 | static void unqueue_me_pi(struct futex_q *q) | 1477 | static void unqueue_me_pi(struct futex_q *q) |
| 1478 | __releases(q->lock_ptr) | ||
| 1474 | { | 1479 | { |
| 1475 | WARN_ON(plist_node_empty(&q->list)); | 1480 | WARN_ON(plist_node_empty(&q->list)); |
| 1476 | plist_del(&q->list, &q->list.plist); | 1481 | plist_del(&q->list, &q->list.plist); |
| @@ -1480,8 +1485,6 @@ static void unqueue_me_pi(struct futex_q *q) | |||
| 1480 | q->pi_state = NULL; | 1485 | q->pi_state = NULL; |
| 1481 | 1486 | ||
| 1482 | spin_unlock(q->lock_ptr); | 1487 | spin_unlock(q->lock_ptr); |
| 1483 | |||
| 1484 | drop_futex_key_refs(&q->key); | ||
| 1485 | } | 1488 | } |
| 1486 | 1489 | ||
| 1487 | /* | 1490 | /* |
| @@ -1812,7 +1815,10 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
| 1812 | } | 1815 | } |
| 1813 | 1816 | ||
| 1814 | retry: | 1817 | retry: |
| 1815 | /* Prepare to wait on uaddr. */ | 1818 | /* |
| 1819 | * Prepare to wait on uaddr. On success, holds hb lock and increments | ||
| 1820 | * q.key refs. | ||
| 1821 | */ | ||
| 1816 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1822 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 1817 | if (ret) | 1823 | if (ret) |
| 1818 | goto out; | 1824 | goto out; |
| @@ -1822,28 +1828,27 @@ retry: | |||
| 1822 | 1828 | ||
| 1823 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1829 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
| 1824 | ret = 0; | 1830 | ret = 0; |
| 1831 | /* unqueue_me() drops q.key ref */ | ||
| 1825 | if (!unqueue_me(&q)) | 1832 | if (!unqueue_me(&q)) |
| 1826 | goto out_put_key; | 1833 | goto out; |
| 1827 | ret = -ETIMEDOUT; | 1834 | ret = -ETIMEDOUT; |
| 1828 | if (to && !to->task) | 1835 | if (to && !to->task) |
| 1829 | goto out_put_key; | 1836 | goto out; |
| 1830 | 1837 | ||
| 1831 | /* | 1838 | /* |
| 1832 | * We expect signal_pending(current), but we might be the | 1839 | * We expect signal_pending(current), but we might be the |
| 1833 | * victim of a spurious wakeup as well. | 1840 | * victim of a spurious wakeup as well. |
| 1834 | */ | 1841 | */ |
| 1835 | if (!signal_pending(current)) { | 1842 | if (!signal_pending(current)) |
| 1836 | put_futex_key(fshared, &q.key); | ||
| 1837 | goto retry; | 1843 | goto retry; |
| 1838 | } | ||
| 1839 | 1844 | ||
| 1840 | ret = -ERESTARTSYS; | 1845 | ret = -ERESTARTSYS; |
| 1841 | if (!abs_time) | 1846 | if (!abs_time) |
| 1842 | goto out_put_key; | 1847 | goto out; |
| 1843 | 1848 | ||
| 1844 | restart = ¤t_thread_info()->restart_block; | 1849 | restart = ¤t_thread_info()->restart_block; |
| 1845 | restart->fn = futex_wait_restart; | 1850 | restart->fn = futex_wait_restart; |
| 1846 | restart->futex.uaddr = (u32 *)uaddr; | 1851 | restart->futex.uaddr = uaddr; |
| 1847 | restart->futex.val = val; | 1852 | restart->futex.val = val; |
| 1848 | restart->futex.time = abs_time->tv64; | 1853 | restart->futex.time = abs_time->tv64; |
| 1849 | restart->futex.bitset = bitset; | 1854 | restart->futex.bitset = bitset; |
| @@ -1856,8 +1861,6 @@ retry: | |||
| 1856 | 1861 | ||
| 1857 | ret = -ERESTART_RESTARTBLOCK; | 1862 | ret = -ERESTART_RESTARTBLOCK; |
| 1858 | 1863 | ||
| 1859 | out_put_key: | ||
| 1860 | put_futex_key(fshared, &q.key); | ||
| 1861 | out: | 1864 | out: |
| 1862 | if (to) { | 1865 | if (to) { |
| 1863 | hrtimer_cancel(&to->timer); | 1866 | hrtimer_cancel(&to->timer); |
| @@ -1869,7 +1872,7 @@ out: | |||
| 1869 | 1872 | ||
| 1870 | static long futex_wait_restart(struct restart_block *restart) | 1873 | static long futex_wait_restart(struct restart_block *restart) |
| 1871 | { | 1874 | { |
| 1872 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; | 1875 | u32 __user *uaddr = restart->futex.uaddr; |
| 1873 | int fshared = 0; | 1876 | int fshared = 0; |
| 1874 | ktime_t t, *tp = NULL; | 1877 | ktime_t t, *tp = NULL; |
| 1875 | 1878 | ||
| @@ -2236,7 +2239,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2236 | q.rt_waiter = &rt_waiter; | 2239 | q.rt_waiter = &rt_waiter; |
| 2237 | q.requeue_pi_key = &key2; | 2240 | q.requeue_pi_key = &key2; |
| 2238 | 2241 | ||
| 2239 | /* Prepare to wait on uaddr. */ | 2242 | /* |
| 2243 | * Prepare to wait on uaddr. On success, increments q.key (key1) ref | ||
| 2244 | * count. | ||
| 2245 | */ | ||
| 2240 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 2246 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 2241 | if (ret) | 2247 | if (ret) |
| 2242 | goto out_key2; | 2248 | goto out_key2; |
| @@ -2254,7 +2260,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2254 | * In order for us to be here, we know our q.key == key2, and since | 2260 | * In order for us to be here, we know our q.key == key2, and since |
| 2255 | * we took the hb->lock above, we also know that futex_requeue() has | 2261 | * we took the hb->lock above, we also know that futex_requeue() has |
| 2256 | * completed and we no longer have to concern ourselves with a wakeup | 2262 | * completed and we no longer have to concern ourselves with a wakeup |
| 2257 | * race with the atomic proxy lock acquition by the requeue code. | 2263 | * race with the atomic proxy lock acquisition by the requeue code. The |
| 2264 | * futex_requeue dropped our key1 reference and incremented our key2 | ||
| 2265 | * reference count. | ||
| 2258 | */ | 2266 | */ |
| 2259 | 2267 | ||
| 2260 | /* Check if the requeue code acquired the second futex for us. */ | 2268 | /* Check if the requeue code acquired the second futex for us. */ |
| @@ -2458,7 +2466,7 @@ retry: | |||
| 2458 | */ | 2466 | */ |
| 2459 | static inline int fetch_robust_entry(struct robust_list __user **entry, | 2467 | static inline int fetch_robust_entry(struct robust_list __user **entry, |
| 2460 | struct robust_list __user * __user *head, | 2468 | struct robust_list __user * __user *head, |
| 2461 | int *pi) | 2469 | unsigned int *pi) |
| 2462 | { | 2470 | { |
| 2463 | unsigned long uentry; | 2471 | unsigned long uentry; |
| 2464 | 2472 | ||
| @@ -2647,7 +2655,7 @@ static int __init futex_init(void) | |||
| 2647 | * of the complex code paths. Also we want to prevent | 2655 | * of the complex code paths. Also we want to prevent |
| 2648 | * registration of robust lists in that case. NULL is | 2656 | * registration of robust lists in that case. NULL is |
| 2649 | * guaranteed to fault and we get -EFAULT on functional | 2657 | * guaranteed to fault and we get -EFAULT on functional |
| 2650 | * implementation, the non functional ones will return | 2658 | * implementation, the non-functional ones will return |
| 2651 | * -ENOSYS. | 2659 | * -ENOSYS. |
| 2652 | */ | 2660 | */ |
| 2653 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); | 2661 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index d49afb2395e5..06da4dfc339b 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | */ | 19 | */ |
| 20 | static inline int | 20 | static inline int |
| 21 | fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, | 21 | fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, |
| 22 | compat_uptr_t __user *head, int *pi) | 22 | compat_uptr_t __user *head, unsigned int *pi) |
| 23 | { | 23 | { |
| 24 | if (get_user(*uentry, head)) | 24 | if (get_user(*uentry, head)) |
| 25 | return -EFAULT; | 25 | return -EFAULT; |
diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0c642d51aac2..53ead174da2f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c | |||
| @@ -98,7 +98,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
| 98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | 98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" |
| 99 | " disables this message.\n"); | 99 | " disables this message.\n"); |
| 100 | sched_show_task(t); | 100 | sched_show_task(t); |
| 101 | __debug_show_held_locks(t); | 101 | debug_show_held_locks(t); |
| 102 | 102 | ||
| 103 | touch_nmi_watchdog(); | 103 | touch_nmi_watchdog(); |
| 104 | 104 | ||
| @@ -111,7 +111,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
| 111 | * periodically exit the critical section and enter a new one. | 111 | * periodically exit the critical section and enter a new one. |
| 112 | * | 112 | * |
| 113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order | 113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order |
| 114 | * exit the grace period. For classic RCU, a reschedule is required. | 114 | * to exit the grace period. For classic RCU, a reschedule is required. |
| 115 | */ | 115 | */ |
| 116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) | 116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) |
| 117 | { | 117 | { |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig new file mode 100644 index 000000000000..31d766bf5d2e --- /dev/null +++ b/kernel/irq/Kconfig | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | config HAVE_GENERIC_HARDIRQS | ||
| 2 | def_bool n | ||
| 3 | |||
| 4 | if HAVE_GENERIC_HARDIRQS | ||
| 5 | menu "IRQ subsystem" | ||
| 6 | # | ||
| 7 | # Interrupt subsystem related configuration options | ||
| 8 | # | ||
| 9 | config GENERIC_HARDIRQS | ||
| 10 | def_bool y | ||
| 11 | |||
| 12 | config GENERIC_HARDIRQS_NO__DO_IRQ | ||
| 13 | def_bool y | ||
| 14 | |||
| 15 | # Select this to disable the deprecated stuff | ||
| 16 | config GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 17 | def_bool n | ||
| 18 | |||
| 19 | # Options selectable by the architecture code | ||
| 20 | config HAVE_SPARSE_IRQ | ||
| 21 | def_bool n | ||
| 22 | |||
| 23 | config GENERIC_IRQ_PROBE | ||
| 24 | def_bool n | ||
| 25 | |||
| 26 | config GENERIC_PENDING_IRQ | ||
| 27 | def_bool n | ||
| 28 | |||
| 29 | config AUTO_IRQ_AFFINITY | ||
| 30 | def_bool n | ||
| 31 | |||
| 32 | config IRQ_PER_CPU | ||
| 33 | def_bool n | ||
| 34 | |||
| 35 | config HARDIRQS_SW_RESEND | ||
| 36 | def_bool n | ||
| 37 | |||
| 38 | config SPARSE_IRQ | ||
| 39 | bool "Support sparse irq numbering" | ||
| 40 | depends on HAVE_SPARSE_IRQ | ||
| 41 | ---help--- | ||
| 42 | |||
| 43 | Sparse irq numbering is useful for distro kernels that want | ||
| 44 | to define a high CONFIG_NR_CPUS value but still want to have | ||
| 45 | low kernel memory footprint on smaller machines. | ||
| 46 | |||
| 47 | ( Sparse irqs can also be beneficial on NUMA boxes, as they spread | ||
| 48 | out the interrupt descriptors in a more NUMA-friendly way. ) | ||
| 49 | |||
| 50 | If you don't know what to do here, say N. | ||
| 51 | |||
| 52 | endmenu | ||
| 53 | endif | ||
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 7d047808419d..54329cd7b3ee 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile | |||
| @@ -1,7 +1,6 @@ | |||
| 1 | 1 | ||
| 2 | obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o | 2 | obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o |
| 3 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o | 3 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o |
| 4 | obj-$(CONFIG_PROC_FS) += proc.o | 4 | obj-$(CONFIG_PROC_FS) += proc.o |
| 5 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o | 5 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o |
| 6 | obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o | ||
| 7 | obj-$(CONFIG_PM_SLEEP) += pm.o | 6 | obj-$(CONFIG_PM_SLEEP) += pm.o |
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 2295a31ef110..505798f86c36 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c | |||
| @@ -57,9 +57,10 @@ unsigned long probe_irq_on(void) | |||
| 57 | * Some chips need to know about probing in | 57 | * Some chips need to know about probing in |
| 58 | * progress: | 58 | * progress: |
| 59 | */ | 59 | */ |
| 60 | if (desc->chip->set_type) | 60 | if (desc->irq_data.chip->irq_set_type) |
| 61 | desc->chip->set_type(i, IRQ_TYPE_PROBE); | 61 | desc->irq_data.chip->irq_set_type(&desc->irq_data, |
| 62 | desc->chip->startup(i); | 62 | IRQ_TYPE_PROBE); |
| 63 | desc->irq_data.chip->irq_startup(&desc->irq_data); | ||
| 63 | } | 64 | } |
| 64 | raw_spin_unlock_irq(&desc->lock); | 65 | raw_spin_unlock_irq(&desc->lock); |
| 65 | } | 66 | } |
| @@ -76,7 +77,7 @@ unsigned long probe_irq_on(void) | |||
| 76 | raw_spin_lock_irq(&desc->lock); | 77 | raw_spin_lock_irq(&desc->lock); |
| 77 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { | 78 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { |
| 78 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; | 79 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; |
| 79 | if (desc->chip->startup(i)) | 80 | if (desc->irq_data.chip->irq_startup(&desc->irq_data)) |
| 80 | desc->status |= IRQ_PENDING; | 81 | desc->status |= IRQ_PENDING; |
| 81 | } | 82 | } |
| 82 | raw_spin_unlock_irq(&desc->lock); | 83 | raw_spin_unlock_irq(&desc->lock); |
| @@ -98,7 +99,7 @@ unsigned long probe_irq_on(void) | |||
| 98 | /* It triggered already - consider it spurious. */ | 99 | /* It triggered already - consider it spurious. */ |
| 99 | if (!(status & IRQ_WAITING)) { | 100 | if (!(status & IRQ_WAITING)) { |
| 100 | desc->status = status & ~IRQ_AUTODETECT; | 101 | desc->status = status & ~IRQ_AUTODETECT; |
| 101 | desc->chip->shutdown(i); | 102 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 102 | } else | 103 | } else |
| 103 | if (i < 32) | 104 | if (i < 32) |
| 104 | mask |= 1 << i; | 105 | mask |= 1 << i; |
| @@ -137,7 +138,7 @@ unsigned int probe_irq_mask(unsigned long val) | |||
| 137 | mask |= 1 << i; | 138 | mask |= 1 << i; |
| 138 | 139 | ||
| 139 | desc->status = status & ~IRQ_AUTODETECT; | 140 | desc->status = status & ~IRQ_AUTODETECT; |
| 140 | desc->chip->shutdown(i); | 141 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 141 | } | 142 | } |
| 142 | raw_spin_unlock_irq(&desc->lock); | 143 | raw_spin_unlock_irq(&desc->lock); |
| 143 | } | 144 | } |
| @@ -181,7 +182,7 @@ int probe_irq_off(unsigned long val) | |||
| 181 | nr_of_irqs++; | 182 | nr_of_irqs++; |
| 182 | } | 183 | } |
| 183 | desc->status = status & ~IRQ_AUTODETECT; | 184 | desc->status = status & ~IRQ_AUTODETECT; |
| 184 | desc->chip->shutdown(i); | 185 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 185 | } | 186 | } |
| 186 | raw_spin_unlock_irq(&desc->lock); | 187 | raw_spin_unlock_irq(&desc->lock); |
| 187 | } | 188 | } |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b7091d5ca2f8..baa5c4acad83 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -18,108 +18,6 @@ | |||
| 18 | 18 | ||
| 19 | #include "internals.h" | 19 | #include "internals.h" |
| 20 | 20 | ||
| 21 | static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) | ||
| 22 | { | ||
| 23 | struct irq_desc *desc; | ||
| 24 | unsigned long flags; | ||
| 25 | |||
| 26 | desc = irq_to_desc(irq); | ||
| 27 | if (!desc) { | ||
| 28 | WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); | ||
| 29 | return; | ||
| 30 | } | ||
| 31 | |||
| 32 | /* Ensure we don't have left over values from a previous use of this irq */ | ||
| 33 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 34 | desc->status = IRQ_DISABLED; | ||
| 35 | desc->chip = &no_irq_chip; | ||
| 36 | desc->handle_irq = handle_bad_irq; | ||
| 37 | desc->depth = 1; | ||
| 38 | desc->msi_desc = NULL; | ||
| 39 | desc->handler_data = NULL; | ||
| 40 | if (!keep_chip_data) | ||
| 41 | desc->chip_data = NULL; | ||
| 42 | desc->action = NULL; | ||
| 43 | desc->irq_count = 0; | ||
| 44 | desc->irqs_unhandled = 0; | ||
| 45 | #ifdef CONFIG_SMP | ||
| 46 | cpumask_setall(desc->affinity); | ||
| 47 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 48 | cpumask_clear(desc->pending_mask); | ||
| 49 | #endif | ||
| 50 | #endif | ||
| 51 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 52 | } | ||
| 53 | |||
| 54 | /** | ||
| 55 | * dynamic_irq_init - initialize a dynamically allocated irq | ||
| 56 | * @irq: irq number to initialize | ||
| 57 | */ | ||
| 58 | void dynamic_irq_init(unsigned int irq) | ||
| 59 | { | ||
| 60 | dynamic_irq_init_x(irq, false); | ||
| 61 | } | ||
| 62 | |||
| 63 | /** | ||
| 64 | * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq | ||
| 65 | * @irq: irq number to initialize | ||
| 66 | * | ||
| 67 | * does not set irq_to_desc(irq)->chip_data to NULL | ||
| 68 | */ | ||
| 69 | void dynamic_irq_init_keep_chip_data(unsigned int irq) | ||
| 70 | { | ||
| 71 | dynamic_irq_init_x(irq, true); | ||
| 72 | } | ||
| 73 | |||
| 74 | static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) | ||
| 75 | { | ||
| 76 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 77 | unsigned long flags; | ||
| 78 | |||
| 79 | if (!desc) { | ||
| 80 | WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); | ||
| 81 | return; | ||
| 82 | } | ||
| 83 | |||
| 84 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 85 | if (desc->action) { | ||
| 86 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 87 | WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n", | ||
| 88 | irq); | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | desc->msi_desc = NULL; | ||
| 92 | desc->handler_data = NULL; | ||
| 93 | if (!keep_chip_data) | ||
| 94 | desc->chip_data = NULL; | ||
| 95 | desc->handle_irq = handle_bad_irq; | ||
| 96 | desc->chip = &no_irq_chip; | ||
| 97 | desc->name = NULL; | ||
| 98 | clear_kstat_irqs(desc); | ||
| 99 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 100 | } | ||
| 101 | |||
| 102 | /** | ||
| 103 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | ||
| 104 | * @irq: irq number to initialize | ||
| 105 | */ | ||
| 106 | void dynamic_irq_cleanup(unsigned int irq) | ||
| 107 | { | ||
| 108 | dynamic_irq_cleanup_x(irq, false); | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq | ||
| 113 | * @irq: irq number to initialize | ||
| 114 | * | ||
| 115 | * does not set irq_to_desc(irq)->chip_data to NULL | ||
| 116 | */ | ||
| 117 | void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) | ||
| 118 | { | ||
| 119 | dynamic_irq_cleanup_x(irq, true); | ||
| 120 | } | ||
| 121 | |||
| 122 | |||
| 123 | /** | 21 | /** |
| 124 | * set_irq_chip - set the irq chip for an irq | 22 | * set_irq_chip - set the irq chip for an irq |
| 125 | * @irq: irq number | 23 | * @irq: irq number |
| @@ -140,7 +38,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) | |||
| 140 | 38 | ||
| 141 | raw_spin_lock_irqsave(&desc->lock, flags); | 39 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 142 | irq_chip_set_defaults(chip); | 40 | irq_chip_set_defaults(chip); |
| 143 | desc->chip = chip; | 41 | desc->irq_data.chip = chip; |
| 144 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 42 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 145 | 43 | ||
| 146 | return 0; | 44 | return 0; |
| @@ -193,7 +91,7 @@ int set_irq_data(unsigned int irq, void *data) | |||
| 193 | } | 91 | } |
| 194 | 92 | ||
| 195 | raw_spin_lock_irqsave(&desc->lock, flags); | 93 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 196 | desc->handler_data = data; | 94 | desc->irq_data.handler_data = data; |
| 197 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 95 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 198 | return 0; | 96 | return 0; |
| 199 | } | 97 | } |
| @@ -218,7 +116,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry) | |||
| 218 | } | 116 | } |
| 219 | 117 | ||
| 220 | raw_spin_lock_irqsave(&desc->lock, flags); | 118 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 221 | desc->msi_desc = entry; | 119 | desc->irq_data.msi_desc = entry; |
| 222 | if (entry) | 120 | if (entry) |
| 223 | entry->irq = irq; | 121 | entry->irq = irq; |
| 224 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 122 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| @@ -243,19 +141,27 @@ int set_irq_chip_data(unsigned int irq, void *data) | |||
| 243 | return -EINVAL; | 141 | return -EINVAL; |
| 244 | } | 142 | } |
| 245 | 143 | ||
| 246 | if (!desc->chip) { | 144 | if (!desc->irq_data.chip) { |
| 247 | printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq); | 145 | printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq); |
| 248 | return -EINVAL; | 146 | return -EINVAL; |
| 249 | } | 147 | } |
| 250 | 148 | ||
| 251 | raw_spin_lock_irqsave(&desc->lock, flags); | 149 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 252 | desc->chip_data = data; | 150 | desc->irq_data.chip_data = data; |
| 253 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 151 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 254 | 152 | ||
| 255 | return 0; | 153 | return 0; |
| 256 | } | 154 | } |
| 257 | EXPORT_SYMBOL(set_irq_chip_data); | 155 | EXPORT_SYMBOL(set_irq_chip_data); |
| 258 | 156 | ||
| 157 | struct irq_data *irq_get_irq_data(unsigned int irq) | ||
| 158 | { | ||
| 159 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 160 | |||
| 161 | return desc ? &desc->irq_data : NULL; | ||
| 162 | } | ||
| 163 | EXPORT_SYMBOL_GPL(irq_get_irq_data); | ||
| 164 | |||
| 259 | /** | 165 | /** |
| 260 | * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq | 166 | * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq |
| 261 | * | 167 | * |
| @@ -287,93 +193,216 @@ EXPORT_SYMBOL_GPL(set_irq_nested_thread); | |||
| 287 | /* | 193 | /* |
| 288 | * default enable function | 194 | * default enable function |
| 289 | */ | 195 | */ |
| 290 | static void default_enable(unsigned int irq) | 196 | static void default_enable(struct irq_data *data) |
| 291 | { | 197 | { |
| 292 | struct irq_desc *desc = irq_to_desc(irq); | 198 | struct irq_desc *desc = irq_data_to_desc(data); |
| 293 | 199 | ||
| 294 | desc->chip->unmask(irq); | 200 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 295 | desc->status &= ~IRQ_MASKED; | 201 | desc->status &= ~IRQ_MASKED; |
| 296 | } | 202 | } |
| 297 | 203 | ||
| 298 | /* | 204 | /* |
| 299 | * default disable function | 205 | * default disable function |
| 300 | */ | 206 | */ |
| 301 | static void default_disable(unsigned int irq) | 207 | static void default_disable(struct irq_data *data) |
| 302 | { | 208 | { |
| 303 | } | 209 | } |
| 304 | 210 | ||
| 305 | /* | 211 | /* |
| 306 | * default startup function | 212 | * default startup function |
| 307 | */ | 213 | */ |
| 308 | static unsigned int default_startup(unsigned int irq) | 214 | static unsigned int default_startup(struct irq_data *data) |
| 309 | { | 215 | { |
| 310 | struct irq_desc *desc = irq_to_desc(irq); | 216 | struct irq_desc *desc = irq_data_to_desc(data); |
| 311 | 217 | ||
| 312 | desc->chip->enable(irq); | 218 | desc->irq_data.chip->irq_enable(data); |
| 313 | return 0; | 219 | return 0; |
| 314 | } | 220 | } |
| 315 | 221 | ||
| 316 | /* | 222 | /* |
| 317 | * default shutdown function | 223 | * default shutdown function |
| 318 | */ | 224 | */ |
| 319 | static void default_shutdown(unsigned int irq) | 225 | static void default_shutdown(struct irq_data *data) |
| 320 | { | 226 | { |
| 321 | struct irq_desc *desc = irq_to_desc(irq); | 227 | struct irq_desc *desc = irq_data_to_desc(data); |
| 322 | 228 | ||
| 323 | desc->chip->mask(irq); | 229 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 324 | desc->status |= IRQ_MASKED; | 230 | desc->status |= IRQ_MASKED; |
| 325 | } | 231 | } |
| 326 | 232 | ||
| 233 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 234 | /* Temporary migration helpers */ | ||
| 235 | static void compat_irq_mask(struct irq_data *data) | ||
| 236 | { | ||
| 237 | data->chip->mask(data->irq); | ||
| 238 | } | ||
| 239 | |||
| 240 | static void compat_irq_unmask(struct irq_data *data) | ||
| 241 | { | ||
| 242 | data->chip->unmask(data->irq); | ||
| 243 | } | ||
| 244 | |||
| 245 | static void compat_irq_ack(struct irq_data *data) | ||
| 246 | { | ||
| 247 | data->chip->ack(data->irq); | ||
| 248 | } | ||
| 249 | |||
| 250 | static void compat_irq_mask_ack(struct irq_data *data) | ||
| 251 | { | ||
| 252 | data->chip->mask_ack(data->irq); | ||
| 253 | } | ||
| 254 | |||
| 255 | static void compat_irq_eoi(struct irq_data *data) | ||
| 256 | { | ||
| 257 | data->chip->eoi(data->irq); | ||
| 258 | } | ||
| 259 | |||
| 260 | static void compat_irq_enable(struct irq_data *data) | ||
| 261 | { | ||
| 262 | data->chip->enable(data->irq); | ||
| 263 | } | ||
| 264 | |||
| 265 | static void compat_irq_disable(struct irq_data *data) | ||
| 266 | { | ||
| 267 | data->chip->disable(data->irq); | ||
| 268 | } | ||
| 269 | |||
| 270 | static void compat_irq_shutdown(struct irq_data *data) | ||
| 271 | { | ||
| 272 | data->chip->shutdown(data->irq); | ||
| 273 | } | ||
| 274 | |||
| 275 | static unsigned int compat_irq_startup(struct irq_data *data) | ||
| 276 | { | ||
| 277 | return data->chip->startup(data->irq); | ||
| 278 | } | ||
| 279 | |||
| 280 | static int compat_irq_set_affinity(struct irq_data *data, | ||
| 281 | const struct cpumask *dest, bool force) | ||
| 282 | { | ||
| 283 | return data->chip->set_affinity(data->irq, dest); | ||
| 284 | } | ||
| 285 | |||
| 286 | static int compat_irq_set_type(struct irq_data *data, unsigned int type) | ||
| 287 | { | ||
| 288 | return data->chip->set_type(data->irq, type); | ||
| 289 | } | ||
| 290 | |||
| 291 | static int compat_irq_set_wake(struct irq_data *data, unsigned int on) | ||
| 292 | { | ||
| 293 | return data->chip->set_wake(data->irq, on); | ||
| 294 | } | ||
| 295 | |||
| 296 | static int compat_irq_retrigger(struct irq_data *data) | ||
| 297 | { | ||
| 298 | return data->chip->retrigger(data->irq); | ||
| 299 | } | ||
| 300 | |||
| 301 | static void compat_bus_lock(struct irq_data *data) | ||
| 302 | { | ||
| 303 | data->chip->bus_lock(data->irq); | ||
| 304 | } | ||
| 305 | |||
| 306 | static void compat_bus_sync_unlock(struct irq_data *data) | ||
| 307 | { | ||
| 308 | data->chip->bus_sync_unlock(data->irq); | ||
| 309 | } | ||
| 310 | #endif | ||
| 311 | |||
| 327 | /* | 312 | /* |
| 328 | * Fixup enable/disable function pointers | 313 | * Fixup enable/disable function pointers |
| 329 | */ | 314 | */ |
| 330 | void irq_chip_set_defaults(struct irq_chip *chip) | 315 | void irq_chip_set_defaults(struct irq_chip *chip) |
| 331 | { | 316 | { |
| 332 | if (!chip->enable) | 317 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED |
| 333 | chip->enable = default_enable; | ||
| 334 | if (!chip->disable) | ||
| 335 | chip->disable = default_disable; | ||
| 336 | if (!chip->startup) | ||
| 337 | chip->startup = default_startup; | ||
| 338 | /* | 318 | /* |
| 339 | * We use chip->disable, when the user provided its own. When | 319 | * Compat fixup functions need to be before we set the |
| 340 | * we have default_disable set for chip->disable, then we need | 320 | * defaults for enable/disable/startup/shutdown |
| 321 | */ | ||
| 322 | if (chip->enable) | ||
| 323 | chip->irq_enable = compat_irq_enable; | ||
| 324 | if (chip->disable) | ||
| 325 | chip->irq_disable = compat_irq_disable; | ||
| 326 | if (chip->shutdown) | ||
| 327 | chip->irq_shutdown = compat_irq_shutdown; | ||
| 328 | if (chip->startup) | ||
| 329 | chip->irq_startup = compat_irq_startup; | ||
| 330 | #endif | ||
| 331 | /* | ||
| 332 | * The real defaults | ||
| 333 | */ | ||
| 334 | if (!chip->irq_enable) | ||
| 335 | chip->irq_enable = default_enable; | ||
| 336 | if (!chip->irq_disable) | ||
| 337 | chip->irq_disable = default_disable; | ||
| 338 | if (!chip->irq_startup) | ||
| 339 | chip->irq_startup = default_startup; | ||
| 340 | /* | ||
| 341 | * We use chip->irq_disable, when the user provided its own. When | ||
| 342 | * we have default_disable set for chip->irq_disable, then we need | ||
| 341 | * to use default_shutdown, otherwise the irq line is not | 343 | * to use default_shutdown, otherwise the irq line is not |
| 342 | * disabled on free_irq(): | 344 | * disabled on free_irq(): |
| 343 | */ | 345 | */ |
| 344 | if (!chip->shutdown) | 346 | if (!chip->irq_shutdown) |
| 345 | chip->shutdown = chip->disable != default_disable ? | 347 | chip->irq_shutdown = chip->irq_disable != default_disable ? |
| 346 | chip->disable : default_shutdown; | 348 | chip->irq_disable : default_shutdown; |
| 347 | if (!chip->name) | 349 | |
| 348 | chip->name = chip->typename; | 350 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED |
| 349 | if (!chip->end) | 351 | if (!chip->end) |
| 350 | chip->end = dummy_irq_chip.end; | 352 | chip->end = dummy_irq_chip.end; |
| 353 | |||
| 354 | /* | ||
| 355 | * Now fix up the remaining compat handlers | ||
| 356 | */ | ||
| 357 | if (chip->bus_lock) | ||
| 358 | chip->irq_bus_lock = compat_bus_lock; | ||
| 359 | if (chip->bus_sync_unlock) | ||
| 360 | chip->irq_bus_sync_unlock = compat_bus_sync_unlock; | ||
| 361 | if (chip->mask) | ||
| 362 | chip->irq_mask = compat_irq_mask; | ||
| 363 | if (chip->unmask) | ||
| 364 | chip->irq_unmask = compat_irq_unmask; | ||
| 365 | if (chip->ack) | ||
| 366 | chip->irq_ack = compat_irq_ack; | ||
| 367 | if (chip->mask_ack) | ||
| 368 | chip->irq_mask_ack = compat_irq_mask_ack; | ||
| 369 | if (chip->eoi) | ||
| 370 | chip->irq_eoi = compat_irq_eoi; | ||
| 371 | if (chip->set_affinity) | ||
| 372 | chip->irq_set_affinity = compat_irq_set_affinity; | ||
| 373 | if (chip->set_type) | ||
| 374 | chip->irq_set_type = compat_irq_set_type; | ||
| 375 | if (chip->set_wake) | ||
| 376 | chip->irq_set_wake = compat_irq_set_wake; | ||
| 377 | if (chip->retrigger) | ||
| 378 | chip->irq_retrigger = compat_irq_retrigger; | ||
| 379 | #endif | ||
| 351 | } | 380 | } |
| 352 | 381 | ||
| 353 | static inline void mask_ack_irq(struct irq_desc *desc, int irq) | 382 | static inline void mask_ack_irq(struct irq_desc *desc) |
| 354 | { | 383 | { |
| 355 | if (desc->chip->mask_ack) | 384 | if (desc->irq_data.chip->irq_mask_ack) |
| 356 | desc->chip->mask_ack(irq); | 385 | desc->irq_data.chip->irq_mask_ack(&desc->irq_data); |
| 357 | else { | 386 | else { |
| 358 | desc->chip->mask(irq); | 387 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 359 | if (desc->chip->ack) | 388 | if (desc->irq_data.chip->irq_ack) |
| 360 | desc->chip->ack(irq); | 389 | desc->irq_data.chip->irq_ack(&desc->irq_data); |
| 361 | } | 390 | } |
| 362 | desc->status |= IRQ_MASKED; | 391 | desc->status |= IRQ_MASKED; |
| 363 | } | 392 | } |
| 364 | 393 | ||
| 365 | static inline void mask_irq(struct irq_desc *desc, int irq) | 394 | static inline void mask_irq(struct irq_desc *desc) |
| 366 | { | 395 | { |
| 367 | if (desc->chip->mask) { | 396 | if (desc->irq_data.chip->irq_mask) { |
| 368 | desc->chip->mask(irq); | 397 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 369 | desc->status |= IRQ_MASKED; | 398 | desc->status |= IRQ_MASKED; |
| 370 | } | 399 | } |
| 371 | } | 400 | } |
| 372 | 401 | ||
| 373 | static inline void unmask_irq(struct irq_desc *desc, int irq) | 402 | static inline void unmask_irq(struct irq_desc *desc) |
| 374 | { | 403 | { |
| 375 | if (desc->chip->unmask) { | 404 | if (desc->irq_data.chip->irq_unmask) { |
| 376 | desc->chip->unmask(irq); | 405 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 377 | desc->status &= ~IRQ_MASKED; | 406 | desc->status &= ~IRQ_MASKED; |
| 378 | } | 407 | } |
| 379 | } | 408 | } |
| @@ -476,7 +505,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) | |||
| 476 | irqreturn_t action_ret; | 505 | irqreturn_t action_ret; |
| 477 | 506 | ||
| 478 | raw_spin_lock(&desc->lock); | 507 | raw_spin_lock(&desc->lock); |
| 479 | mask_ack_irq(desc, irq); | 508 | mask_ack_irq(desc); |
| 480 | 509 | ||
| 481 | if (unlikely(desc->status & IRQ_INPROGRESS)) | 510 | if (unlikely(desc->status & IRQ_INPROGRESS)) |
| 482 | goto out_unlock; | 511 | goto out_unlock; |
| @@ -502,7 +531,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) | |||
| 502 | desc->status &= ~IRQ_INPROGRESS; | 531 | desc->status &= ~IRQ_INPROGRESS; |
| 503 | 532 | ||
| 504 | if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT))) | 533 | if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT))) |
| 505 | unmask_irq(desc, irq); | 534 | unmask_irq(desc); |
| 506 | out_unlock: | 535 | out_unlock: |
| 507 | raw_spin_unlock(&desc->lock); | 536 | raw_spin_unlock(&desc->lock); |
| 508 | } | 537 | } |
| @@ -539,7 +568,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
| 539 | action = desc->action; | 568 | action = desc->action; |
| 540 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { | 569 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { |
| 541 | desc->status |= IRQ_PENDING; | 570 | desc->status |= IRQ_PENDING; |
| 542 | mask_irq(desc, irq); | 571 | mask_irq(desc); |
| 543 | goto out; | 572 | goto out; |
| 544 | } | 573 | } |
| 545 | 574 | ||
| @@ -554,7 +583,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
| 554 | raw_spin_lock(&desc->lock); | 583 | raw_spin_lock(&desc->lock); |
| 555 | desc->status &= ~IRQ_INPROGRESS; | 584 | desc->status &= ~IRQ_INPROGRESS; |
| 556 | out: | 585 | out: |
| 557 | desc->chip->eoi(irq); | 586 | desc->irq_data.chip->irq_eoi(&desc->irq_data); |
| 558 | 587 | ||
| 559 | raw_spin_unlock(&desc->lock); | 588 | raw_spin_unlock(&desc->lock); |
| 560 | } | 589 | } |
| @@ -590,14 +619,13 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
| 590 | if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) || | 619 | if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) || |
| 591 | !desc->action)) { | 620 | !desc->action)) { |
| 592 | desc->status |= (IRQ_PENDING | IRQ_MASKED); | 621 | desc->status |= (IRQ_PENDING | IRQ_MASKED); |
| 593 | mask_ack_irq(desc, irq); | 622 | mask_ack_irq(desc); |
| 594 | goto out_unlock; | 623 | goto out_unlock; |
| 595 | } | 624 | } |
| 596 | kstat_incr_irqs_this_cpu(irq, desc); | 625 | kstat_incr_irqs_this_cpu(irq, desc); |
| 597 | 626 | ||
| 598 | /* Start handling the irq */ | 627 | /* Start handling the irq */ |
| 599 | if (desc->chip->ack) | 628 | desc->irq_data.chip->irq_ack(&desc->irq_data); |
| 600 | desc->chip->ack(irq); | ||
| 601 | 629 | ||
| 602 | /* Mark the IRQ currently in progress.*/ | 630 | /* Mark the IRQ currently in progress.*/ |
| 603 | desc->status |= IRQ_INPROGRESS; | 631 | desc->status |= IRQ_INPROGRESS; |
| @@ -607,7 +635,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
| 607 | irqreturn_t action_ret; | 635 | irqreturn_t action_ret; |
| 608 | 636 | ||
| 609 | if (unlikely(!action)) { | 637 | if (unlikely(!action)) { |
| 610 | mask_irq(desc, irq); | 638 | mask_irq(desc); |
| 611 | goto out_unlock; | 639 | goto out_unlock; |
| 612 | } | 640 | } |
| 613 | 641 | ||
| @@ -619,7 +647,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
| 619 | if (unlikely((desc->status & | 647 | if (unlikely((desc->status & |
| 620 | (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == | 648 | (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == |
| 621 | (IRQ_PENDING | IRQ_MASKED))) { | 649 | (IRQ_PENDING | IRQ_MASKED))) { |
| 622 | unmask_irq(desc, irq); | 650 | unmask_irq(desc); |
| 623 | } | 651 | } |
| 624 | 652 | ||
| 625 | desc->status &= ~IRQ_PENDING; | 653 | desc->status &= ~IRQ_PENDING; |
| @@ -650,15 +678,15 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
| 650 | 678 | ||
| 651 | kstat_incr_irqs_this_cpu(irq, desc); | 679 | kstat_incr_irqs_this_cpu(irq, desc); |
| 652 | 680 | ||
| 653 | if (desc->chip->ack) | 681 | if (desc->irq_data.chip->irq_ack) |
| 654 | desc->chip->ack(irq); | 682 | desc->irq_data.chip->irq_ack(&desc->irq_data); |
| 655 | 683 | ||
| 656 | action_ret = handle_IRQ_event(irq, desc->action); | 684 | action_ret = handle_IRQ_event(irq, desc->action); |
| 657 | if (!noirqdebug) | 685 | if (!noirqdebug) |
| 658 | note_interrupt(irq, desc, action_ret); | 686 | note_interrupt(irq, desc, action_ret); |
| 659 | 687 | ||
| 660 | if (desc->chip->eoi) | 688 | if (desc->irq_data.chip->irq_eoi) |
| 661 | desc->chip->eoi(irq); | 689 | desc->irq_data.chip->irq_eoi(&desc->irq_data); |
| 662 | } | 690 | } |
| 663 | 691 | ||
| 664 | void | 692 | void |
| @@ -676,7 +704,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
| 676 | 704 | ||
| 677 | if (!handle) | 705 | if (!handle) |
| 678 | handle = handle_bad_irq; | 706 | handle = handle_bad_irq; |
| 679 | else if (desc->chip == &no_irq_chip) { | 707 | else if (desc->irq_data.chip == &no_irq_chip) { |
| 680 | printk(KERN_WARNING "Trying to install %sinterrupt handler " | 708 | printk(KERN_WARNING "Trying to install %sinterrupt handler " |
| 681 | "for IRQ%d\n", is_chained ? "chained " : "", irq); | 709 | "for IRQ%d\n", is_chained ? "chained " : "", irq); |
| 682 | /* | 710 | /* |
| @@ -686,16 +714,16 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
| 686 | * prevent us to setup the interrupt at all. Switch it to | 714 | * prevent us to setup the interrupt at all. Switch it to |
| 687 | * dummy_irq_chip for easy transition. | 715 | * dummy_irq_chip for easy transition. |
| 688 | */ | 716 | */ |
| 689 | desc->chip = &dummy_irq_chip; | 717 | desc->irq_data.chip = &dummy_irq_chip; |
| 690 | } | 718 | } |
| 691 | 719 | ||
| 692 | chip_bus_lock(irq, desc); | 720 | chip_bus_lock(desc); |
| 693 | raw_spin_lock_irqsave(&desc->lock, flags); | 721 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 694 | 722 | ||
| 695 | /* Uninstall? */ | 723 | /* Uninstall? */ |
| 696 | if (handle == handle_bad_irq) { | 724 | if (handle == handle_bad_irq) { |
| 697 | if (desc->chip != &no_irq_chip) | 725 | if (desc->irq_data.chip != &no_irq_chip) |
| 698 | mask_ack_irq(desc, irq); | 726 | mask_ack_irq(desc); |
| 699 | desc->status |= IRQ_DISABLED; | 727 | desc->status |= IRQ_DISABLED; |
| 700 | desc->depth = 1; | 728 | desc->depth = 1; |
| 701 | } | 729 | } |
| @@ -706,10 +734,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
| 706 | desc->status &= ~IRQ_DISABLED; | 734 | desc->status &= ~IRQ_DISABLED; |
| 707 | desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; | 735 | desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; |
| 708 | desc->depth = 0; | 736 | desc->depth = 0; |
| 709 | desc->chip->startup(irq); | 737 | desc->irq_data.chip->irq_startup(&desc->irq_data); |
| 710 | } | 738 | } |
| 711 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 739 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 712 | chip_bus_sync_unlock(irq, desc); | 740 | chip_bus_sync_unlock(desc); |
| 713 | } | 741 | } |
| 714 | EXPORT_SYMBOL_GPL(__set_irq_handler); | 742 | EXPORT_SYMBOL_GPL(__set_irq_handler); |
| 715 | 743 | ||
| @@ -729,32 +757,20 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, | |||
| 729 | __set_irq_handler(irq, handle, 0, name); | 757 | __set_irq_handler(irq, handle, 0, name); |
| 730 | } | 758 | } |
| 731 | 759 | ||
| 732 | void set_irq_noprobe(unsigned int irq) | 760 | void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) |
| 733 | { | 761 | { |
| 734 | struct irq_desc *desc = irq_to_desc(irq); | 762 | struct irq_desc *desc = irq_to_desc(irq); |
| 735 | unsigned long flags; | 763 | unsigned long flags; |
| 736 | 764 | ||
| 737 | if (!desc) { | 765 | if (!desc) |
| 738 | printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq); | ||
| 739 | return; | 766 | return; |
| 740 | } | ||
| 741 | |||
| 742 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 743 | desc->status |= IRQ_NOPROBE; | ||
| 744 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 745 | } | ||
| 746 | |||
| 747 | void set_irq_probe(unsigned int irq) | ||
| 748 | { | ||
| 749 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 750 | unsigned long flags; | ||
| 751 | 767 | ||
| 752 | if (!desc) { | 768 | /* Sanitize flags */ |
| 753 | printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq); | 769 | set &= IRQF_MODIFY_MASK; |
| 754 | return; | 770 | clr &= IRQF_MODIFY_MASK; |
| 755 | } | ||
| 756 | 771 | ||
| 757 | raw_spin_lock_irqsave(&desc->lock, flags); | 772 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 758 | desc->status &= ~IRQ_NOPROBE; | 773 | desc->status &= ~clr; |
| 774 | desc->status |= set; | ||
| 759 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 775 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 760 | } | 776 | } |
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c new file mode 100644 index 000000000000..20dc5474947e --- /dev/null +++ b/kernel/irq/dummychip.c | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar | ||
| 3 | * Copyright (C) 2005-2006, Thomas Gleixner, Russell King | ||
| 4 | * | ||
| 5 | * This file contains the dummy interrupt chip implementation | ||
| 6 | */ | ||
| 7 | #include <linux/interrupt.h> | ||
| 8 | #include <linux/irq.h> | ||
| 9 | |||
| 10 | #include "internals.h" | ||
| 11 | |||
| 12 | /* | ||
| 13 | * What should we do if we get a hw irq event on an illegal vector? | ||
| 14 | * Each architecture has to answer this themself. | ||
| 15 | */ | ||
| 16 | static void ack_bad(struct irq_data *data) | ||
| 17 | { | ||
| 18 | struct irq_desc *desc = irq_data_to_desc(data); | ||
| 19 | |||
| 20 | print_irq_desc(data->irq, desc); | ||
| 21 | ack_bad_irq(data->irq); | ||
| 22 | } | ||
| 23 | |||
| 24 | /* | ||
| 25 | * NOP functions | ||
| 26 | */ | ||
| 27 | static void noop(struct irq_data *data) { } | ||
| 28 | |||
| 29 | static unsigned int noop_ret(struct irq_data *data) | ||
| 30 | { | ||
| 31 | return 0; | ||
| 32 | } | ||
| 33 | |||
| 34 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 35 | static void compat_noop(unsigned int irq) { } | ||
| 36 | #define END_INIT .end = compat_noop | ||
| 37 | #else | ||
| 38 | #define END_INIT | ||
| 39 | #endif | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Generic no controller implementation | ||
| 43 | */ | ||
| 44 | struct irq_chip no_irq_chip = { | ||
| 45 | .name = "none", | ||
| 46 | .irq_startup = noop_ret, | ||
| 47 | .irq_shutdown = noop, | ||
| 48 | .irq_enable = noop, | ||
| 49 | .irq_disable = noop, | ||
| 50 | .irq_ack = ack_bad, | ||
| 51 | END_INIT | ||
| 52 | }; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Generic dummy implementation which can be used for | ||
| 56 | * real dumb interrupt sources | ||
| 57 | */ | ||
| 58 | struct irq_chip dummy_irq_chip = { | ||
| 59 | .name = "dummy", | ||
| 60 | .irq_startup = noop_ret, | ||
| 61 | .irq_shutdown = noop, | ||
| 62 | .irq_enable = noop, | ||
| 63 | .irq_disable = noop, | ||
| 64 | .irq_ack = noop, | ||
| 65 | .irq_mask = noop, | ||
| 66 | .irq_unmask = noop, | ||
| 67 | END_INIT | ||
| 68 | }; | ||
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 27e5c6911223..e2347eb63306 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
| @@ -11,24 +11,15 @@ | |||
| 11 | */ | 11 | */ |
| 12 | 12 | ||
| 13 | #include <linux/irq.h> | 13 | #include <linux/irq.h> |
| 14 | #include <linux/sched.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/random.h> | 14 | #include <linux/random.h> |
| 15 | #include <linux/sched.h> | ||
| 18 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 19 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
| 20 | #include <linux/rculist.h> | 18 | |
| 21 | #include <linux/hash.h> | ||
| 22 | #include <linux/radix-tree.h> | ||
| 23 | #include <trace/events/irq.h> | 19 | #include <trace/events/irq.h> |
| 24 | 20 | ||
| 25 | #include "internals.h" | 21 | #include "internals.h" |
| 26 | 22 | ||
| 27 | /* | ||
| 28 | * lockdep: we want to handle all irq_desc locks as a single lock-class: | ||
| 29 | */ | ||
| 30 | struct lock_class_key irq_desc_lock_class; | ||
| 31 | |||
| 32 | /** | 23 | /** |
| 33 | * handle_bad_irq - handle spurious and unhandled irqs | 24 | * handle_bad_irq - handle spurious and unhandled irqs |
| 34 | * @irq: the interrupt number | 25 | * @irq: the interrupt number |
| @@ -43,304 +34,6 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) | |||
| 43 | ack_bad_irq(irq); | 34 | ack_bad_irq(irq); |
| 44 | } | 35 | } |
| 45 | 36 | ||
| 46 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | ||
| 47 | static void __init init_irq_default_affinity(void) | ||
| 48 | { | ||
| 49 | alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | ||
| 50 | cpumask_setall(irq_default_affinity); | ||
| 51 | } | ||
| 52 | #else | ||
| 53 | static void __init init_irq_default_affinity(void) | ||
| 54 | { | ||
| 55 | } | ||
| 56 | #endif | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Linux has a controller-independent interrupt architecture. | ||
| 60 | * Every controller has a 'controller-template', that is used | ||
| 61 | * by the main code to do the right thing. Each driver-visible | ||
| 62 | * interrupt source is transparently wired to the appropriate | ||
| 63 | * controller. Thus drivers need not be aware of the | ||
| 64 | * interrupt-controller. | ||
| 65 | * | ||
| 66 | * The code is designed to be easily extended with new/different | ||
| 67 | * interrupt controllers, without having to do assembly magic or | ||
| 68 | * having to touch the generic code. | ||
| 69 | * | ||
| 70 | * Controller mappings for all interrupt sources: | ||
| 71 | */ | ||
| 72 | int nr_irqs = NR_IRQS; | ||
| 73 | EXPORT_SYMBOL_GPL(nr_irqs); | ||
| 74 | |||
| 75 | #ifdef CONFIG_SPARSE_IRQ | ||
| 76 | |||
| 77 | static struct irq_desc irq_desc_init = { | ||
| 78 | .irq = -1, | ||
| 79 | .status = IRQ_DISABLED, | ||
| 80 | .chip = &no_irq_chip, | ||
| 81 | .handle_irq = handle_bad_irq, | ||
| 82 | .depth = 1, | ||
| 83 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), | ||
| 84 | }; | ||
| 85 | |||
| 86 | void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) | ||
| 87 | { | ||
| 88 | void *ptr; | ||
| 89 | |||
| 90 | ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), | ||
| 91 | GFP_ATOMIC, node); | ||
| 92 | |||
| 93 | /* | ||
| 94 | * don't overwite if can not get new one | ||
| 95 | * init_copy_kstat_irqs() could still use old one | ||
| 96 | */ | ||
| 97 | if (ptr) { | ||
| 98 | printk(KERN_DEBUG " alloc kstat_irqs on node %d\n", node); | ||
| 99 | desc->kstat_irqs = ptr; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) | ||
| 104 | { | ||
| 105 | memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); | ||
| 106 | |||
| 107 | raw_spin_lock_init(&desc->lock); | ||
| 108 | desc->irq = irq; | ||
| 109 | #ifdef CONFIG_SMP | ||
| 110 | desc->node = node; | ||
| 111 | #endif | ||
| 112 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 113 | init_kstat_irqs(desc, node, nr_cpu_ids); | ||
| 114 | if (!desc->kstat_irqs) { | ||
| 115 | printk(KERN_ERR "can not alloc kstat_irqs\n"); | ||
| 116 | BUG_ON(1); | ||
| 117 | } | ||
| 118 | if (!alloc_desc_masks(desc, node, false)) { | ||
| 119 | printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); | ||
| 120 | BUG_ON(1); | ||
| 121 | } | ||
| 122 | init_desc_masks(desc); | ||
| 123 | arch_init_chip_data(desc, node); | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Protect the sparse_irqs: | ||
| 128 | */ | ||
| 129 | DEFINE_RAW_SPINLOCK(sparse_irq_lock); | ||
| 130 | |||
| 131 | static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); | ||
| 132 | |||
| 133 | static void set_irq_desc(unsigned int irq, struct irq_desc *desc) | ||
| 134 | { | ||
| 135 | radix_tree_insert(&irq_desc_tree, irq, desc); | ||
| 136 | } | ||
| 137 | |||
| 138 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 139 | { | ||
| 140 | return radix_tree_lookup(&irq_desc_tree, irq); | ||
| 141 | } | ||
| 142 | |||
| 143 | void replace_irq_desc(unsigned int irq, struct irq_desc *desc) | ||
| 144 | { | ||
| 145 | void **ptr; | ||
| 146 | |||
| 147 | ptr = radix_tree_lookup_slot(&irq_desc_tree, irq); | ||
| 148 | if (ptr) | ||
| 149 | radix_tree_replace_slot(ptr, desc); | ||
| 150 | } | ||
| 151 | |||
| 152 | static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { | ||
| 153 | [0 ... NR_IRQS_LEGACY-1] = { | ||
| 154 | .irq = -1, | ||
| 155 | .status = IRQ_DISABLED, | ||
| 156 | .chip = &no_irq_chip, | ||
| 157 | .handle_irq = handle_bad_irq, | ||
| 158 | .depth = 1, | ||
| 159 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), | ||
| 160 | } | ||
| 161 | }; | ||
| 162 | |||
| 163 | static unsigned int *kstat_irqs_legacy; | ||
| 164 | |||
| 165 | int __init early_irq_init(void) | ||
| 166 | { | ||
| 167 | struct irq_desc *desc; | ||
| 168 | int legacy_count; | ||
| 169 | int node; | ||
| 170 | int i; | ||
| 171 | |||
| 172 | init_irq_default_affinity(); | ||
| 173 | |||
| 174 | /* initialize nr_irqs based on nr_cpu_ids */ | ||
| 175 | arch_probe_nr_irqs(); | ||
| 176 | printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); | ||
| 177 | |||
| 178 | desc = irq_desc_legacy; | ||
| 179 | legacy_count = ARRAY_SIZE(irq_desc_legacy); | ||
| 180 | node = first_online_node; | ||
| 181 | |||
| 182 | /* allocate based on nr_cpu_ids */ | ||
| 183 | kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids * | ||
| 184 | sizeof(int), GFP_NOWAIT, node); | ||
| 185 | |||
| 186 | for (i = 0; i < legacy_count; i++) { | ||
| 187 | desc[i].irq = i; | ||
| 188 | #ifdef CONFIG_SMP | ||
| 189 | desc[i].node = node; | ||
| 190 | #endif | ||
| 191 | desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; | ||
| 192 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | ||
| 193 | alloc_desc_masks(&desc[i], node, true); | ||
| 194 | init_desc_masks(&desc[i]); | ||
| 195 | set_irq_desc(i, &desc[i]); | ||
| 196 | } | ||
| 197 | |||
| 198 | return arch_early_irq_init(); | ||
| 199 | } | ||
| 200 | |||
| 201 | struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 202 | { | ||
| 203 | struct irq_desc *desc; | ||
| 204 | unsigned long flags; | ||
| 205 | |||
| 206 | if (irq >= nr_irqs) { | ||
| 207 | WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", | ||
| 208 | irq, nr_irqs); | ||
| 209 | return NULL; | ||
| 210 | } | ||
| 211 | |||
| 212 | desc = irq_to_desc(irq); | ||
| 213 | if (desc) | ||
| 214 | return desc; | ||
| 215 | |||
| 216 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 217 | |||
| 218 | /* We have to check it to avoid races with another CPU */ | ||
| 219 | desc = irq_to_desc(irq); | ||
| 220 | if (desc) | ||
| 221 | goto out_unlock; | ||
| 222 | |||
| 223 | desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); | ||
| 224 | |||
| 225 | printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node); | ||
| 226 | if (!desc) { | ||
| 227 | printk(KERN_ERR "can not alloc irq_desc\n"); | ||
| 228 | BUG_ON(1); | ||
| 229 | } | ||
| 230 | init_one_irq_desc(irq, desc, node); | ||
| 231 | |||
| 232 | set_irq_desc(irq, desc); | ||
| 233 | |||
| 234 | out_unlock: | ||
| 235 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 236 | |||
| 237 | return desc; | ||
| 238 | } | ||
| 239 | |||
| 240 | #else /* !CONFIG_SPARSE_IRQ */ | ||
| 241 | |||
| 242 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | ||
| 243 | [0 ... NR_IRQS-1] = { | ||
| 244 | .status = IRQ_DISABLED, | ||
| 245 | .chip = &no_irq_chip, | ||
| 246 | .handle_irq = handle_bad_irq, | ||
| 247 | .depth = 1, | ||
| 248 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), | ||
| 249 | } | ||
| 250 | }; | ||
| 251 | |||
| 252 | static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; | ||
| 253 | int __init early_irq_init(void) | ||
| 254 | { | ||
| 255 | struct irq_desc *desc; | ||
| 256 | int count; | ||
| 257 | int i; | ||
| 258 | |||
| 259 | init_irq_default_affinity(); | ||
| 260 | |||
| 261 | printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); | ||
| 262 | |||
| 263 | desc = irq_desc; | ||
| 264 | count = ARRAY_SIZE(irq_desc); | ||
| 265 | |||
| 266 | for (i = 0; i < count; i++) { | ||
| 267 | desc[i].irq = i; | ||
| 268 | alloc_desc_masks(&desc[i], 0, true); | ||
| 269 | init_desc_masks(&desc[i]); | ||
| 270 | desc[i].kstat_irqs = kstat_irqs_all[i]; | ||
| 271 | } | ||
| 272 | return arch_early_irq_init(); | ||
| 273 | } | ||
| 274 | |||
| 275 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 276 | { | ||
| 277 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; | ||
| 278 | } | ||
| 279 | |||
| 280 | struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 281 | { | ||
| 282 | return irq_to_desc(irq); | ||
| 283 | } | ||
| 284 | #endif /* !CONFIG_SPARSE_IRQ */ | ||
| 285 | |||
| 286 | void clear_kstat_irqs(struct irq_desc *desc) | ||
| 287 | { | ||
| 288 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | ||
| 289 | } | ||
| 290 | |||
| 291 | /* | ||
| 292 | * What should we do if we get a hw irq event on an illegal vector? | ||
| 293 | * Each architecture has to answer this themself. | ||
| 294 | */ | ||
| 295 | static void ack_bad(unsigned int irq) | ||
| 296 | { | ||
| 297 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 298 | |||
| 299 | print_irq_desc(irq, desc); | ||
| 300 | ack_bad_irq(irq); | ||
| 301 | } | ||
| 302 | |||
| 303 | /* | ||
| 304 | * NOP functions | ||
| 305 | */ | ||
| 306 | static void noop(unsigned int irq) | ||
| 307 | { | ||
| 308 | } | ||
| 309 | |||
| 310 | static unsigned int noop_ret(unsigned int irq) | ||
| 311 | { | ||
| 312 | return 0; | ||
| 313 | } | ||
| 314 | |||
| 315 | /* | ||
| 316 | * Generic no controller implementation | ||
| 317 | */ | ||
| 318 | struct irq_chip no_irq_chip = { | ||
| 319 | .name = "none", | ||
| 320 | .startup = noop_ret, | ||
| 321 | .shutdown = noop, | ||
| 322 | .enable = noop, | ||
| 323 | .disable = noop, | ||
| 324 | .ack = ack_bad, | ||
| 325 | .end = noop, | ||
| 326 | }; | ||
| 327 | |||
| 328 | /* | ||
| 329 | * Generic dummy implementation which can be used for | ||
| 330 | * real dumb interrupt sources | ||
| 331 | */ | ||
| 332 | struct irq_chip dummy_irq_chip = { | ||
| 333 | .name = "dummy", | ||
| 334 | .startup = noop_ret, | ||
| 335 | .shutdown = noop, | ||
| 336 | .enable = noop, | ||
| 337 | .disable = noop, | ||
| 338 | .ack = noop, | ||
| 339 | .mask = noop, | ||
| 340 | .unmask = noop, | ||
| 341 | .end = noop, | ||
| 342 | }; | ||
| 343 | |||
| 344 | /* | 37 | /* |
| 345 | * Special, empty irq handler: | 38 | * Special, empty irq handler: |
| 346 | */ | 39 | */ |
| @@ -457,20 +150,20 @@ unsigned int __do_IRQ(unsigned int irq) | |||
| 457 | /* | 150 | /* |
| 458 | * No locking required for CPU-local interrupts: | 151 | * No locking required for CPU-local interrupts: |
| 459 | */ | 152 | */ |
| 460 | if (desc->chip->ack) | 153 | if (desc->irq_data.chip->ack) |
| 461 | desc->chip->ack(irq); | 154 | desc->irq_data.chip->ack(irq); |
| 462 | if (likely(!(desc->status & IRQ_DISABLED))) { | 155 | if (likely(!(desc->status & IRQ_DISABLED))) { |
| 463 | action_ret = handle_IRQ_event(irq, desc->action); | 156 | action_ret = handle_IRQ_event(irq, desc->action); |
| 464 | if (!noirqdebug) | 157 | if (!noirqdebug) |
| 465 | note_interrupt(irq, desc, action_ret); | 158 | note_interrupt(irq, desc, action_ret); |
| 466 | } | 159 | } |
| 467 | desc->chip->end(irq); | 160 | desc->irq_data.chip->end(irq); |
| 468 | return 1; | 161 | return 1; |
| 469 | } | 162 | } |
| 470 | 163 | ||
| 471 | raw_spin_lock(&desc->lock); | 164 | raw_spin_lock(&desc->lock); |
| 472 | if (desc->chip->ack) | 165 | if (desc->irq_data.chip->ack) |
| 473 | desc->chip->ack(irq); | 166 | desc->irq_data.chip->ack(irq); |
| 474 | /* | 167 | /* |
| 475 | * REPLAY is when Linux resends an IRQ that was dropped earlier | 168 | * REPLAY is when Linux resends an IRQ that was dropped earlier |
| 476 | * WAITING is used by probe to mark irqs that are being tested | 169 | * WAITING is used by probe to mark irqs that are being tested |
| @@ -530,27 +223,9 @@ out: | |||
| 530 | * The ->end() handler has to deal with interrupts which got | 223 | * The ->end() handler has to deal with interrupts which got |
| 531 | * disabled while the handler was running. | 224 | * disabled while the handler was running. |
| 532 | */ | 225 | */ |
| 533 | desc->chip->end(irq); | 226 | desc->irq_data.chip->end(irq); |
| 534 | raw_spin_unlock(&desc->lock); | 227 | raw_spin_unlock(&desc->lock); |
| 535 | 228 | ||
| 536 | return 1; | 229 | return 1; |
| 537 | } | 230 | } |
| 538 | #endif | 231 | #endif |
| 539 | |||
| 540 | void early_init_irq_lock_class(void) | ||
| 541 | { | ||
| 542 | struct irq_desc *desc; | ||
| 543 | int i; | ||
| 544 | |||
| 545 | for_each_irq_desc(i, desc) { | ||
| 546 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 547 | } | ||
| 548 | } | ||
| 549 | |||
| 550 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | ||
| 551 | { | ||
| 552 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 553 | return desc ? desc->kstat_irqs[cpu] : 0; | ||
| 554 | } | ||
| 555 | EXPORT_SYMBOL(kstat_irqs_cpu); | ||
| 556 | |||
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index c63f3bc88f0b..4571ae7e085a 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
| @@ -1,9 +1,12 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * IRQ subsystem internal functions and variables: | 2 | * IRQ subsystem internal functions and variables: |
| 3 | */ | 3 | */ |
| 4 | #include <linux/irqdesc.h> | ||
| 4 | 5 | ||
| 5 | extern int noirqdebug; | 6 | extern int noirqdebug; |
| 6 | 7 | ||
| 8 | #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) | ||
| 9 | |||
| 7 | /* Set default functions for irq_chip structures: */ | 10 | /* Set default functions for irq_chip structures: */ |
| 8 | extern void irq_chip_set_defaults(struct irq_chip *chip); | 11 | extern void irq_chip_set_defaults(struct irq_chip *chip); |
| 9 | 12 | ||
| @@ -15,21 +18,19 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 15 | extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); | 18 | extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); |
| 16 | extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); | 19 | extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); |
| 17 | 20 | ||
| 18 | extern struct lock_class_key irq_desc_lock_class; | ||
| 19 | extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); | 21 | extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); |
| 20 | extern void clear_kstat_irqs(struct irq_desc *desc); | ||
| 21 | extern raw_spinlock_t sparse_irq_lock; | ||
| 22 | 22 | ||
| 23 | #ifdef CONFIG_SPARSE_IRQ | 23 | /* Resending of interrupts :*/ |
| 24 | void replace_irq_desc(unsigned int irq, struct irq_desc *desc); | 24 | void check_irq_resend(struct irq_desc *desc, unsigned int irq); |
| 25 | #endif | ||
| 26 | 25 | ||
| 27 | #ifdef CONFIG_PROC_FS | 26 | #ifdef CONFIG_PROC_FS |
| 28 | extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); | 27 | extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); |
| 28 | extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc); | ||
| 29 | extern void register_handler_proc(unsigned int irq, struct irqaction *action); | 29 | extern void register_handler_proc(unsigned int irq, struct irqaction *action); |
| 30 | extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); | 30 | extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); |
| 31 | #else | 31 | #else |
| 32 | static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { } | 32 | static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { } |
| 33 | static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { } | ||
| 33 | static inline void register_handler_proc(unsigned int irq, | 34 | static inline void register_handler_proc(unsigned int irq, |
| 34 | struct irqaction *action) { } | 35 | struct irqaction *action) { } |
| 35 | static inline void unregister_handler_proc(unsigned int irq, | 36 | static inline void unregister_handler_proc(unsigned int irq, |
| @@ -40,17 +41,27 @@ extern int irq_select_affinity_usr(unsigned int irq); | |||
| 40 | 41 | ||
| 41 | extern void irq_set_thread_affinity(struct irq_desc *desc); | 42 | extern void irq_set_thread_affinity(struct irq_desc *desc); |
| 42 | 43 | ||
| 44 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 45 | static inline void irq_end(unsigned int irq, struct irq_desc *desc) | ||
| 46 | { | ||
| 47 | if (desc->irq_data.chip && desc->irq_data.chip->end) | ||
| 48 | desc->irq_data.chip->end(irq); | ||
| 49 | } | ||
| 50 | #else | ||
| 51 | static inline void irq_end(unsigned int irq, struct irq_desc *desc) { } | ||
| 52 | #endif | ||
| 53 | |||
| 43 | /* Inline functions for support of irq chips on slow busses */ | 54 | /* Inline functions for support of irq chips on slow busses */ |
| 44 | static inline void chip_bus_lock(unsigned int irq, struct irq_desc *desc) | 55 | static inline void chip_bus_lock(struct irq_desc *desc) |
| 45 | { | 56 | { |
| 46 | if (unlikely(desc->chip->bus_lock)) | 57 | if (unlikely(desc->irq_data.chip->irq_bus_lock)) |
| 47 | desc->chip->bus_lock(irq); | 58 | desc->irq_data.chip->irq_bus_lock(&desc->irq_data); |
| 48 | } | 59 | } |
| 49 | 60 | ||
| 50 | static inline void chip_bus_sync_unlock(unsigned int irq, struct irq_desc *desc) | 61 | static inline void chip_bus_sync_unlock(struct irq_desc *desc) |
| 51 | { | 62 | { |
| 52 | if (unlikely(desc->chip->bus_sync_unlock)) | 63 | if (unlikely(desc->irq_data.chip->irq_bus_sync_unlock)) |
| 53 | desc->chip->bus_sync_unlock(irq); | 64 | desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); |
| 54 | } | 65 | } |
| 55 | 66 | ||
| 56 | /* | 67 | /* |
| @@ -67,8 +78,8 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) | |||
| 67 | irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); | 78 | irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); |
| 68 | printk("->handle_irq(): %p, ", desc->handle_irq); | 79 | printk("->handle_irq(): %p, ", desc->handle_irq); |
| 69 | print_symbol("%s\n", (unsigned long)desc->handle_irq); | 80 | print_symbol("%s\n", (unsigned long)desc->handle_irq); |
| 70 | printk("->chip(): %p, ", desc->chip); | 81 | printk("->irq_data.chip(): %p, ", desc->irq_data.chip); |
| 71 | print_symbol("%s\n", (unsigned long)desc->chip); | 82 | print_symbol("%s\n", (unsigned long)desc->irq_data.chip); |
| 72 | printk("->action(): %p\n", desc->action); | 83 | printk("->action(): %p\n", desc->action); |
| 73 | if (desc->action) { | 84 | if (desc->action) { |
| 74 | printk("->action->handler(): %p, ", desc->action->handler); | 85 | printk("->action->handler(): %p, ", desc->action->handler); |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c new file mode 100644 index 000000000000..9d917ff72675 --- /dev/null +++ b/kernel/irq/irqdesc.c | |||
| @@ -0,0 +1,395 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar | ||
| 3 | * Copyright (C) 2005-2006, Thomas Gleixner, Russell King | ||
| 4 | * | ||
| 5 | * This file contains the interrupt descriptor management code | ||
| 6 | * | ||
| 7 | * Detailed information is available in Documentation/DocBook/genericirq | ||
| 8 | * | ||
| 9 | */ | ||
| 10 | #include <linux/irq.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/module.h> | ||
| 13 | #include <linux/interrupt.h> | ||
| 14 | #include <linux/kernel_stat.h> | ||
| 15 | #include <linux/radix-tree.h> | ||
| 16 | #include <linux/bitmap.h> | ||
| 17 | |||
| 18 | #include "internals.h" | ||
| 19 | |||
| 20 | /* | ||
| 21 | * lockdep: we want to handle all irq_desc locks as a single lock-class: | ||
| 22 | */ | ||
| 23 | static struct lock_class_key irq_desc_lock_class; | ||
| 24 | |||
| 25 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | ||
| 26 | static void __init init_irq_default_affinity(void) | ||
| 27 | { | ||
| 28 | alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | ||
| 29 | cpumask_setall(irq_default_affinity); | ||
| 30 | } | ||
| 31 | #else | ||
| 32 | static void __init init_irq_default_affinity(void) | ||
| 33 | { | ||
| 34 | } | ||
| 35 | #endif | ||
| 36 | |||
| 37 | #ifdef CONFIG_SMP | ||
| 38 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | ||
| 39 | { | ||
| 40 | if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) | ||
| 41 | return -ENOMEM; | ||
| 42 | |||
| 43 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 44 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | ||
| 45 | free_cpumask_var(desc->irq_data.affinity); | ||
| 46 | return -ENOMEM; | ||
| 47 | } | ||
| 48 | #endif | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void desc_smp_init(struct irq_desc *desc, int node) | ||
| 53 | { | ||
| 54 | desc->irq_data.node = node; | ||
| 55 | cpumask_copy(desc->irq_data.affinity, irq_default_affinity); | ||
| 56 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 57 | cpumask_clear(desc->pending_mask); | ||
| 58 | #endif | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline int desc_node(struct irq_desc *desc) | ||
| 62 | { | ||
| 63 | return desc->irq_data.node; | ||
| 64 | } | ||
| 65 | |||
| 66 | #else | ||
| 67 | static inline int | ||
| 68 | alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; } | ||
| 69 | static inline void desc_smp_init(struct irq_desc *desc, int node) { } | ||
| 70 | static inline int desc_node(struct irq_desc *desc) { return 0; } | ||
| 71 | #endif | ||
| 72 | |||
| 73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | ||
| 74 | { | ||
| 75 | desc->irq_data.irq = irq; | ||
| 76 | desc->irq_data.chip = &no_irq_chip; | ||
| 77 | desc->irq_data.chip_data = NULL; | ||
| 78 | desc->irq_data.handler_data = NULL; | ||
| 79 | desc->irq_data.msi_desc = NULL; | ||
| 80 | desc->status = IRQ_DEFAULT_INIT_FLAGS; | ||
| 81 | desc->handle_irq = handle_bad_irq; | ||
| 82 | desc->depth = 1; | ||
| 83 | desc->irq_count = 0; | ||
| 84 | desc->irqs_unhandled = 0; | ||
| 85 | desc->name = NULL; | ||
| 86 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | ||
| 87 | desc_smp_init(desc, node); | ||
| 88 | } | ||
| 89 | |||
| 90 | int nr_irqs = NR_IRQS; | ||
| 91 | EXPORT_SYMBOL_GPL(nr_irqs); | ||
| 92 | |||
| 93 | static DEFINE_MUTEX(sparse_irq_lock); | ||
| 94 | static DECLARE_BITMAP(allocated_irqs, NR_IRQS); | ||
| 95 | |||
| 96 | #ifdef CONFIG_SPARSE_IRQ | ||
| 97 | |||
| 98 | static RADIX_TREE(irq_desc_tree, GFP_KERNEL); | ||
| 99 | |||
| 100 | static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) | ||
| 101 | { | ||
| 102 | radix_tree_insert(&irq_desc_tree, irq, desc); | ||
| 103 | } | ||
| 104 | |||
| 105 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 106 | { | ||
| 107 | return radix_tree_lookup(&irq_desc_tree, irq); | ||
| 108 | } | ||
| 109 | |||
| 110 | static void delete_irq_desc(unsigned int irq) | ||
| 111 | { | ||
| 112 | radix_tree_delete(&irq_desc_tree, irq); | ||
| 113 | } | ||
| 114 | |||
| 115 | #ifdef CONFIG_SMP | ||
| 116 | static void free_masks(struct irq_desc *desc) | ||
| 117 | { | ||
| 118 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 119 | free_cpumask_var(desc->pending_mask); | ||
| 120 | #endif | ||
| 121 | free_cpumask_var(desc->irq_data.affinity); | ||
| 122 | } | ||
| 123 | #else | ||
| 124 | static inline void free_masks(struct irq_desc *desc) { } | ||
| 125 | #endif | ||
| 126 | |||
| 127 | static struct irq_desc *alloc_desc(int irq, int node) | ||
| 128 | { | ||
| 129 | struct irq_desc *desc; | ||
| 130 | gfp_t gfp = GFP_KERNEL; | ||
| 131 | |||
| 132 | desc = kzalloc_node(sizeof(*desc), gfp, node); | ||
| 133 | if (!desc) | ||
| 134 | return NULL; | ||
| 135 | /* allocate based on nr_cpu_ids */ | ||
| 136 | desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | ||
| 137 | gfp, node); | ||
| 138 | if (!desc->kstat_irqs) | ||
| 139 | goto err_desc; | ||
| 140 | |||
| 141 | if (alloc_masks(desc, gfp, node)) | ||
| 142 | goto err_kstat; | ||
| 143 | |||
| 144 | raw_spin_lock_init(&desc->lock); | ||
| 145 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 146 | |||
| 147 | desc_set_defaults(irq, desc, node); | ||
| 148 | |||
| 149 | return desc; | ||
| 150 | |||
| 151 | err_kstat: | ||
| 152 | kfree(desc->kstat_irqs); | ||
| 153 | err_desc: | ||
| 154 | kfree(desc); | ||
| 155 | return NULL; | ||
| 156 | } | ||
| 157 | |||
| 158 | static void free_desc(unsigned int irq) | ||
| 159 | { | ||
| 160 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 161 | |||
| 162 | unregister_irq_proc(irq, desc); | ||
| 163 | |||
| 164 | mutex_lock(&sparse_irq_lock); | ||
| 165 | delete_irq_desc(irq); | ||
| 166 | mutex_unlock(&sparse_irq_lock); | ||
| 167 | |||
| 168 | free_masks(desc); | ||
| 169 | kfree(desc->kstat_irqs); | ||
| 170 | kfree(desc); | ||
| 171 | } | ||
| 172 | |||
| 173 | static int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
| 174 | { | ||
| 175 | struct irq_desc *desc; | ||
| 176 | int i; | ||
| 177 | |||
| 178 | for (i = 0; i < cnt; i++) { | ||
| 179 | desc = alloc_desc(start + i, node); | ||
| 180 | if (!desc) | ||
| 181 | goto err; | ||
| 182 | mutex_lock(&sparse_irq_lock); | ||
| 183 | irq_insert_desc(start + i, desc); | ||
| 184 | mutex_unlock(&sparse_irq_lock); | ||
| 185 | } | ||
| 186 | return start; | ||
| 187 | |||
| 188 | err: | ||
| 189 | for (i--; i >= 0; i--) | ||
| 190 | free_desc(start + i); | ||
| 191 | |||
| 192 | mutex_lock(&sparse_irq_lock); | ||
| 193 | bitmap_clear(allocated_irqs, start, cnt); | ||
| 194 | mutex_unlock(&sparse_irq_lock); | ||
| 195 | return -ENOMEM; | ||
| 196 | } | ||
| 197 | |||
| 198 | struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 199 | { | ||
| 200 | int res = irq_alloc_descs(irq, irq, 1, node); | ||
| 201 | |||
| 202 | if (res == -EEXIST || res == irq) | ||
| 203 | return irq_to_desc(irq); | ||
| 204 | return NULL; | ||
| 205 | } | ||
| 206 | |||
| 207 | int __init early_irq_init(void) | ||
| 208 | { | ||
| 209 | int i, initcnt, node = first_online_node; | ||
| 210 | struct irq_desc *desc; | ||
| 211 | |||
| 212 | init_irq_default_affinity(); | ||
| 213 | |||
| 214 | /* Let arch update nr_irqs and return the nr of preallocated irqs */ | ||
| 215 | initcnt = arch_probe_nr_irqs(); | ||
| 216 | printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); | ||
| 217 | |||
| 218 | for (i = 0; i < initcnt; i++) { | ||
| 219 | desc = alloc_desc(i, node); | ||
| 220 | set_bit(i, allocated_irqs); | ||
| 221 | irq_insert_desc(i, desc); | ||
| 222 | } | ||
| 223 | return arch_early_irq_init(); | ||
| 224 | } | ||
| 225 | |||
| 226 | #else /* !CONFIG_SPARSE_IRQ */ | ||
| 227 | |||
| 228 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | ||
| 229 | [0 ... NR_IRQS-1] = { | ||
| 230 | .status = IRQ_DEFAULT_INIT_FLAGS, | ||
| 231 | .handle_irq = handle_bad_irq, | ||
| 232 | .depth = 1, | ||
| 233 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), | ||
| 234 | } | ||
| 235 | }; | ||
| 236 | |||
| 237 | static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; | ||
| 238 | int __init early_irq_init(void) | ||
| 239 | { | ||
| 240 | int count, i, node = first_online_node; | ||
| 241 | struct irq_desc *desc; | ||
| 242 | |||
| 243 | init_irq_default_affinity(); | ||
| 244 | |||
| 245 | printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); | ||
| 246 | |||
| 247 | desc = irq_desc; | ||
| 248 | count = ARRAY_SIZE(irq_desc); | ||
| 249 | |||
| 250 | for (i = 0; i < count; i++) { | ||
| 251 | desc[i].irq_data.irq = i; | ||
| 252 | desc[i].irq_data.chip = &no_irq_chip; | ||
| 253 | desc[i].kstat_irqs = kstat_irqs_all[i]; | ||
| 254 | alloc_masks(desc + i, GFP_KERNEL, node); | ||
| 255 | desc_smp_init(desc + i, node); | ||
| 256 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | ||
| 257 | } | ||
| 258 | return arch_early_irq_init(); | ||
| 259 | } | ||
| 260 | |||
| 261 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 262 | { | ||
| 263 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; | ||
| 264 | } | ||
| 265 | |||
| 266 | struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 267 | { | ||
| 268 | return irq_to_desc(irq); | ||
| 269 | } | ||
| 270 | |||
| 271 | static void free_desc(unsigned int irq) | ||
| 272 | { | ||
| 273 | dynamic_irq_cleanup(irq); | ||
| 274 | } | ||
| 275 | |||
| 276 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
| 277 | { | ||
| 278 | return start; | ||
| 279 | } | ||
| 280 | #endif /* !CONFIG_SPARSE_IRQ */ | ||
| 281 | |||
| 282 | /* Dynamic interrupt handling */ | ||
| 283 | |||
| 284 | /** | ||
| 285 | * irq_free_descs - free irq descriptors | ||
| 286 | * @from: Start of descriptor range | ||
| 287 | * @cnt: Number of consecutive irqs to free | ||
| 288 | */ | ||
| 289 | void irq_free_descs(unsigned int from, unsigned int cnt) | ||
| 290 | { | ||
| 291 | int i; | ||
| 292 | |||
| 293 | if (from >= nr_irqs || (from + cnt) > nr_irqs) | ||
| 294 | return; | ||
| 295 | |||
| 296 | for (i = 0; i < cnt; i++) | ||
| 297 | free_desc(from + i); | ||
| 298 | |||
| 299 | mutex_lock(&sparse_irq_lock); | ||
| 300 | bitmap_clear(allocated_irqs, from, cnt); | ||
| 301 | mutex_unlock(&sparse_irq_lock); | ||
| 302 | } | ||
| 303 | |||
| 304 | /** | ||
| 305 | * irq_alloc_descs - allocate and initialize a range of irq descriptors | ||
| 306 | * @irq: Allocate for specific irq number if irq >= 0 | ||
| 307 | * @from: Start the search from this irq number | ||
| 308 | * @cnt: Number of consecutive irqs to allocate. | ||
| 309 | * @node: Preferred node on which the irq descriptor should be allocated | ||
| 310 | * | ||
| 311 | * Returns the first irq number or error code | ||
| 312 | */ | ||
| 313 | int __ref | ||
| 314 | irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | ||
| 315 | { | ||
| 316 | int start, ret; | ||
| 317 | |||
| 318 | if (!cnt) | ||
| 319 | return -EINVAL; | ||
| 320 | |||
| 321 | mutex_lock(&sparse_irq_lock); | ||
| 322 | |||
| 323 | start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); | ||
| 324 | ret = -EEXIST; | ||
| 325 | if (irq >=0 && start != irq) | ||
| 326 | goto err; | ||
| 327 | |||
| 328 | ret = -ENOMEM; | ||
| 329 | if (start >= nr_irqs) | ||
| 330 | goto err; | ||
| 331 | |||
| 332 | bitmap_set(allocated_irqs, start, cnt); | ||
| 333 | mutex_unlock(&sparse_irq_lock); | ||
| 334 | return alloc_descs(start, cnt, node); | ||
| 335 | |||
| 336 | err: | ||
| 337 | mutex_unlock(&sparse_irq_lock); | ||
| 338 | return ret; | ||
| 339 | } | ||
| 340 | |||
| 341 | /** | ||
| 342 | * irq_reserve_irqs - mark irqs allocated | ||
| 343 | * @from: mark from irq number | ||
| 344 | * @cnt: number of irqs to mark | ||
| 345 | * | ||
| 346 | * Returns 0 on success or an appropriate error code | ||
| 347 | */ | ||
| 348 | int irq_reserve_irqs(unsigned int from, unsigned int cnt) | ||
| 349 | { | ||
| 350 | unsigned int start; | ||
| 351 | int ret = 0; | ||
| 352 | |||
| 353 | if (!cnt || (from + cnt) > nr_irqs) | ||
| 354 | return -EINVAL; | ||
| 355 | |||
| 356 | mutex_lock(&sparse_irq_lock); | ||
| 357 | start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); | ||
| 358 | if (start == from) | ||
| 359 | bitmap_set(allocated_irqs, start, cnt); | ||
| 360 | else | ||
| 361 | ret = -EEXIST; | ||
| 362 | mutex_unlock(&sparse_irq_lock); | ||
| 363 | return ret; | ||
| 364 | } | ||
| 365 | |||
| 366 | /** | ||
| 367 | * irq_get_next_irq - get next allocated irq number | ||
| 368 | * @offset: where to start the search | ||
| 369 | * | ||
| 370 | * Returns next irq number after offset or nr_irqs if none is found. | ||
| 371 | */ | ||
| 372 | unsigned int irq_get_next_irq(unsigned int offset) | ||
| 373 | { | ||
| 374 | return find_next_bit(allocated_irqs, nr_irqs, offset); | ||
| 375 | } | ||
| 376 | |||
| 377 | /** | ||
| 378 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | ||
| 379 | * @irq: irq number to initialize | ||
| 380 | */ | ||
| 381 | void dynamic_irq_cleanup(unsigned int irq) | ||
| 382 | { | ||
| 383 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 384 | unsigned long flags; | ||
| 385 | |||
| 386 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 387 | desc_set_defaults(irq, desc, desc_node(desc)); | ||
| 388 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 389 | } | ||
| 390 | |||
| 391 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | ||
| 392 | { | ||
| 393 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 394 | return desc ? desc->kstat_irqs[cpu] : 0; | ||
| 395 | } | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c3003e9d91a3..644e8d5fa367 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -73,8 +73,8 @@ int irq_can_set_affinity(unsigned int irq) | |||
| 73 | { | 73 | { |
| 74 | struct irq_desc *desc = irq_to_desc(irq); | 74 | struct irq_desc *desc = irq_to_desc(irq); |
| 75 | 75 | ||
| 76 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip || | 76 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->irq_data.chip || |
| 77 | !desc->chip->set_affinity) | 77 | !desc->irq_data.chip->irq_set_affinity) |
| 78 | return 0; | 78 | return 0; |
| 79 | 79 | ||
| 80 | return 1; | 80 | return 1; |
| @@ -109,17 +109,18 @@ void irq_set_thread_affinity(struct irq_desc *desc) | |||
| 109 | int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | 109 | int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) |
| 110 | { | 110 | { |
| 111 | struct irq_desc *desc = irq_to_desc(irq); | 111 | struct irq_desc *desc = irq_to_desc(irq); |
| 112 | struct irq_chip *chip = desc->irq_data.chip; | ||
| 112 | unsigned long flags; | 113 | unsigned long flags; |
| 113 | 114 | ||
| 114 | if (!desc->chip->set_affinity) | 115 | if (!chip->irq_set_affinity) |
| 115 | return -EINVAL; | 116 | return -EINVAL; |
| 116 | 117 | ||
| 117 | raw_spin_lock_irqsave(&desc->lock, flags); | 118 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 118 | 119 | ||
| 119 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 120 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 120 | if (desc->status & IRQ_MOVE_PCNTXT) { | 121 | if (desc->status & IRQ_MOVE_PCNTXT) { |
| 121 | if (!desc->chip->set_affinity(irq, cpumask)) { | 122 | if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) { |
| 122 | cpumask_copy(desc->affinity, cpumask); | 123 | cpumask_copy(desc->irq_data.affinity, cpumask); |
| 123 | irq_set_thread_affinity(desc); | 124 | irq_set_thread_affinity(desc); |
| 124 | } | 125 | } |
| 125 | } | 126 | } |
| @@ -128,8 +129,8 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | |||
| 128 | cpumask_copy(desc->pending_mask, cpumask); | 129 | cpumask_copy(desc->pending_mask, cpumask); |
| 129 | } | 130 | } |
| 130 | #else | 131 | #else |
| 131 | if (!desc->chip->set_affinity(irq, cpumask)) { | 132 | if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) { |
| 132 | cpumask_copy(desc->affinity, cpumask); | 133 | cpumask_copy(desc->irq_data.affinity, cpumask); |
| 133 | irq_set_thread_affinity(desc); | 134 | irq_set_thread_affinity(desc); |
| 134 | } | 135 | } |
| 135 | #endif | 136 | #endif |
| @@ -168,16 +169,16 @@ static int setup_affinity(unsigned int irq, struct irq_desc *desc) | |||
| 168 | * one of the targets is online. | 169 | * one of the targets is online. |
| 169 | */ | 170 | */ |
| 170 | if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { | 171 | if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { |
| 171 | if (cpumask_any_and(desc->affinity, cpu_online_mask) | 172 | if (cpumask_any_and(desc->irq_data.affinity, cpu_online_mask) |
| 172 | < nr_cpu_ids) | 173 | < nr_cpu_ids) |
| 173 | goto set_affinity; | 174 | goto set_affinity; |
| 174 | else | 175 | else |
| 175 | desc->status &= ~IRQ_AFFINITY_SET; | 176 | desc->status &= ~IRQ_AFFINITY_SET; |
| 176 | } | 177 | } |
| 177 | 178 | ||
| 178 | cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity); | 179 | cpumask_and(desc->irq_data.affinity, cpu_online_mask, irq_default_affinity); |
| 179 | set_affinity: | 180 | set_affinity: |
| 180 | desc->chip->set_affinity(irq, desc->affinity); | 181 | desc->irq_data.chip->irq_set_affinity(&desc->irq_data, desc->irq_data.affinity, false); |
| 181 | 182 | ||
| 182 | return 0; | 183 | return 0; |
| 183 | } | 184 | } |
| @@ -223,7 +224,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) | |||
| 223 | 224 | ||
| 224 | if (!desc->depth++) { | 225 | if (!desc->depth++) { |
| 225 | desc->status |= IRQ_DISABLED; | 226 | desc->status |= IRQ_DISABLED; |
| 226 | desc->chip->disable(irq); | 227 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
| 227 | } | 228 | } |
| 228 | } | 229 | } |
| 229 | 230 | ||
| @@ -246,11 +247,11 @@ void disable_irq_nosync(unsigned int irq) | |||
| 246 | if (!desc) | 247 | if (!desc) |
| 247 | return; | 248 | return; |
| 248 | 249 | ||
| 249 | chip_bus_lock(irq, desc); | 250 | chip_bus_lock(desc); |
| 250 | raw_spin_lock_irqsave(&desc->lock, flags); | 251 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 251 | __disable_irq(desc, irq, false); | 252 | __disable_irq(desc, irq, false); |
| 252 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 253 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 253 | chip_bus_sync_unlock(irq, desc); | 254 | chip_bus_sync_unlock(desc); |
| 254 | } | 255 | } |
| 255 | EXPORT_SYMBOL(disable_irq_nosync); | 256 | EXPORT_SYMBOL(disable_irq_nosync); |
| 256 | 257 | ||
| @@ -313,7 +314,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) | |||
| 313 | * IRQ line is re-enabled. | 314 | * IRQ line is re-enabled. |
| 314 | * | 315 | * |
| 315 | * This function may be called from IRQ context only when | 316 | * This function may be called from IRQ context only when |
| 316 | * desc->chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! | 317 | * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! |
| 317 | */ | 318 | */ |
| 318 | void enable_irq(unsigned int irq) | 319 | void enable_irq(unsigned int irq) |
| 319 | { | 320 | { |
| @@ -323,11 +324,11 @@ void enable_irq(unsigned int irq) | |||
| 323 | if (!desc) | 324 | if (!desc) |
| 324 | return; | 325 | return; |
| 325 | 326 | ||
| 326 | chip_bus_lock(irq, desc); | 327 | chip_bus_lock(desc); |
| 327 | raw_spin_lock_irqsave(&desc->lock, flags); | 328 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 328 | __enable_irq(desc, irq, false); | 329 | __enable_irq(desc, irq, false); |
| 329 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 330 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 330 | chip_bus_sync_unlock(irq, desc); | 331 | chip_bus_sync_unlock(desc); |
| 331 | } | 332 | } |
| 332 | EXPORT_SYMBOL(enable_irq); | 333 | EXPORT_SYMBOL(enable_irq); |
| 333 | 334 | ||
| @@ -336,8 +337,8 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) | |||
| 336 | struct irq_desc *desc = irq_to_desc(irq); | 337 | struct irq_desc *desc = irq_to_desc(irq); |
| 337 | int ret = -ENXIO; | 338 | int ret = -ENXIO; |
| 338 | 339 | ||
| 339 | if (desc->chip->set_wake) | 340 | if (desc->irq_data.chip->irq_set_wake) |
| 340 | ret = desc->chip->set_wake(irq, on); | 341 | ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on); |
| 341 | 342 | ||
| 342 | return ret; | 343 | return ret; |
| 343 | } | 344 | } |
| @@ -429,12 +430,12 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc) | |||
| 429 | } | 430 | } |
| 430 | 431 | ||
| 431 | int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | 432 | int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, |
| 432 | unsigned long flags) | 433 | unsigned long flags) |
| 433 | { | 434 | { |
| 434 | int ret; | 435 | int ret; |
| 435 | struct irq_chip *chip = desc->chip; | 436 | struct irq_chip *chip = desc->irq_data.chip; |
| 436 | 437 | ||
| 437 | if (!chip || !chip->set_type) { | 438 | if (!chip || !chip->irq_set_type) { |
| 438 | /* | 439 | /* |
| 439 | * IRQF_TRIGGER_* but the PIC does not support multiple | 440 | * IRQF_TRIGGER_* but the PIC does not support multiple |
| 440 | * flow-types? | 441 | * flow-types? |
| @@ -445,11 +446,11 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 445 | } | 446 | } |
| 446 | 447 | ||
| 447 | /* caller masked out all except trigger mode flags */ | 448 | /* caller masked out all except trigger mode flags */ |
| 448 | ret = chip->set_type(irq, flags); | 449 | ret = chip->irq_set_type(&desc->irq_data, flags); |
| 449 | 450 | ||
| 450 | if (ret) | 451 | if (ret) |
| 451 | pr_err("setting trigger mode %d for irq %u failed (%pF)\n", | 452 | pr_err("setting trigger mode %lu for irq %u failed (%pF)\n", |
| 452 | (int)flags, irq, chip->set_type); | 453 | flags, irq, chip->irq_set_type); |
| 453 | else { | 454 | else { |
| 454 | if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) | 455 | if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) |
| 455 | flags |= IRQ_LEVEL; | 456 | flags |= IRQ_LEVEL; |
| @@ -457,8 +458,8 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 457 | desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); | 458 | desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); |
| 458 | desc->status |= flags; | 459 | desc->status |= flags; |
| 459 | 460 | ||
| 460 | if (chip != desc->chip) | 461 | if (chip != desc->irq_data.chip) |
| 461 | irq_chip_set_defaults(desc->chip); | 462 | irq_chip_set_defaults(desc->irq_data.chip); |
| 462 | } | 463 | } |
| 463 | 464 | ||
| 464 | return ret; | 465 | return ret; |
| @@ -507,7 +508,7 @@ static int irq_wait_for_interrupt(struct irqaction *action) | |||
| 507 | static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) | 508 | static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) |
| 508 | { | 509 | { |
| 509 | again: | 510 | again: |
| 510 | chip_bus_lock(irq, desc); | 511 | chip_bus_lock(desc); |
| 511 | raw_spin_lock_irq(&desc->lock); | 512 | raw_spin_lock_irq(&desc->lock); |
| 512 | 513 | ||
| 513 | /* | 514 | /* |
| @@ -521,17 +522,17 @@ again: | |||
| 521 | */ | 522 | */ |
| 522 | if (unlikely(desc->status & IRQ_INPROGRESS)) { | 523 | if (unlikely(desc->status & IRQ_INPROGRESS)) { |
| 523 | raw_spin_unlock_irq(&desc->lock); | 524 | raw_spin_unlock_irq(&desc->lock); |
| 524 | chip_bus_sync_unlock(irq, desc); | 525 | chip_bus_sync_unlock(desc); |
| 525 | cpu_relax(); | 526 | cpu_relax(); |
| 526 | goto again; | 527 | goto again; |
| 527 | } | 528 | } |
| 528 | 529 | ||
| 529 | if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { | 530 | if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { |
| 530 | desc->status &= ~IRQ_MASKED; | 531 | desc->status &= ~IRQ_MASKED; |
| 531 | desc->chip->unmask(irq); | 532 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 532 | } | 533 | } |
| 533 | raw_spin_unlock_irq(&desc->lock); | 534 | raw_spin_unlock_irq(&desc->lock); |
| 534 | chip_bus_sync_unlock(irq, desc); | 535 | chip_bus_sync_unlock(desc); |
| 535 | } | 536 | } |
| 536 | 537 | ||
| 537 | #ifdef CONFIG_SMP | 538 | #ifdef CONFIG_SMP |
| @@ -556,7 +557,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) | |||
| 556 | } | 557 | } |
| 557 | 558 | ||
| 558 | raw_spin_lock_irq(&desc->lock); | 559 | raw_spin_lock_irq(&desc->lock); |
| 559 | cpumask_copy(mask, desc->affinity); | 560 | cpumask_copy(mask, desc->irq_data.affinity); |
| 560 | raw_spin_unlock_irq(&desc->lock); | 561 | raw_spin_unlock_irq(&desc->lock); |
| 561 | 562 | ||
| 562 | set_cpus_allowed_ptr(current, mask); | 563 | set_cpus_allowed_ptr(current, mask); |
| @@ -657,7 +658,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 657 | if (!desc) | 658 | if (!desc) |
| 658 | return -EINVAL; | 659 | return -EINVAL; |
| 659 | 660 | ||
| 660 | if (desc->chip == &no_irq_chip) | 661 | if (desc->irq_data.chip == &no_irq_chip) |
| 661 | return -ENOSYS; | 662 | return -ENOSYS; |
| 662 | /* | 663 | /* |
| 663 | * Some drivers like serial.c use request_irq() heavily, | 664 | * Some drivers like serial.c use request_irq() heavily, |
| @@ -752,7 +753,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 752 | } | 753 | } |
| 753 | 754 | ||
| 754 | if (!shared) { | 755 | if (!shared) { |
| 755 | irq_chip_set_defaults(desc->chip); | 756 | irq_chip_set_defaults(desc->irq_data.chip); |
| 756 | 757 | ||
| 757 | init_waitqueue_head(&desc->wait_for_threads); | 758 | init_waitqueue_head(&desc->wait_for_threads); |
| 758 | 759 | ||
| @@ -779,7 +780,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 779 | if (!(desc->status & IRQ_NOAUTOEN)) { | 780 | if (!(desc->status & IRQ_NOAUTOEN)) { |
| 780 | desc->depth = 0; | 781 | desc->depth = 0; |
| 781 | desc->status &= ~IRQ_DISABLED; | 782 | desc->status &= ~IRQ_DISABLED; |
| 782 | desc->chip->startup(irq); | 783 | desc->irq_data.chip->irq_startup(&desc->irq_data); |
| 783 | } else | 784 | } else |
| 784 | /* Undo nested disables: */ | 785 | /* Undo nested disables: */ |
| 785 | desc->depth = 1; | 786 | desc->depth = 1; |
| @@ -912,17 +913,17 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 912 | 913 | ||
| 913 | /* Currently used only by UML, might disappear one day: */ | 914 | /* Currently used only by UML, might disappear one day: */ |
| 914 | #ifdef CONFIG_IRQ_RELEASE_METHOD | 915 | #ifdef CONFIG_IRQ_RELEASE_METHOD |
| 915 | if (desc->chip->release) | 916 | if (desc->irq_data.chip->release) |
| 916 | desc->chip->release(irq, dev_id); | 917 | desc->irq_data.chip->release(irq, dev_id); |
| 917 | #endif | 918 | #endif |
| 918 | 919 | ||
| 919 | /* If this was the last handler, shut down the IRQ line: */ | 920 | /* If this was the last handler, shut down the IRQ line: */ |
| 920 | if (!desc->action) { | 921 | if (!desc->action) { |
| 921 | desc->status |= IRQ_DISABLED; | 922 | desc->status |= IRQ_DISABLED; |
| 922 | if (desc->chip->shutdown) | 923 | if (desc->irq_data.chip->irq_shutdown) |
| 923 | desc->chip->shutdown(irq); | 924 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 924 | else | 925 | else |
| 925 | desc->chip->disable(irq); | 926 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
| 926 | } | 927 | } |
| 927 | 928 | ||
| 928 | #ifdef CONFIG_SMP | 929 | #ifdef CONFIG_SMP |
| @@ -997,9 +998,9 @@ void free_irq(unsigned int irq, void *dev_id) | |||
| 997 | if (!desc) | 998 | if (!desc) |
| 998 | return; | 999 | return; |
| 999 | 1000 | ||
| 1000 | chip_bus_lock(irq, desc); | 1001 | chip_bus_lock(desc); |
| 1001 | kfree(__free_irq(irq, dev_id)); | 1002 | kfree(__free_irq(irq, dev_id)); |
| 1002 | chip_bus_sync_unlock(irq, desc); | 1003 | chip_bus_sync_unlock(desc); |
| 1003 | } | 1004 | } |
| 1004 | EXPORT_SYMBOL(free_irq); | 1005 | EXPORT_SYMBOL(free_irq); |
| 1005 | 1006 | ||
| @@ -1086,9 +1087,9 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
| 1086 | action->name = devname; | 1087 | action->name = devname; |
| 1087 | action->dev_id = dev_id; | 1088 | action->dev_id = dev_id; |
| 1088 | 1089 | ||
| 1089 | chip_bus_lock(irq, desc); | 1090 | chip_bus_lock(desc); |
| 1090 | retval = __setup_irq(irq, desc, action); | 1091 | retval = __setup_irq(irq, desc, action); |
| 1091 | chip_bus_sync_unlock(irq, desc); | 1092 | chip_bus_sync_unlock(desc); |
| 1092 | 1093 | ||
| 1093 | if (retval) | 1094 | if (retval) |
| 1094 | kfree(action); | 1095 | kfree(action); |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 241962280836..1d2541940480 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | void move_masked_irq(int irq) | 7 | void move_masked_irq(int irq) |
| 8 | { | 8 | { |
| 9 | struct irq_desc *desc = irq_to_desc(irq); | 9 | struct irq_desc *desc = irq_to_desc(irq); |
| 10 | struct irq_chip *chip = desc->irq_data.chip; | ||
| 10 | 11 | ||
| 11 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) | 12 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) |
| 12 | return; | 13 | return; |
| @@ -24,7 +25,7 @@ void move_masked_irq(int irq) | |||
| 24 | if (unlikely(cpumask_empty(desc->pending_mask))) | 25 | if (unlikely(cpumask_empty(desc->pending_mask))) |
| 25 | return; | 26 | return; |
| 26 | 27 | ||
| 27 | if (!desc->chip->set_affinity) | 28 | if (!chip->irq_set_affinity) |
| 28 | return; | 29 | return; |
| 29 | 30 | ||
| 30 | assert_raw_spin_locked(&desc->lock); | 31 | assert_raw_spin_locked(&desc->lock); |
| @@ -43,8 +44,9 @@ void move_masked_irq(int irq) | |||
| 43 | */ | 44 | */ |
| 44 | if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) | 45 | if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) |
| 45 | < nr_cpu_ids)) | 46 | < nr_cpu_ids)) |
| 46 | if (!desc->chip->set_affinity(irq, desc->pending_mask)) { | 47 | if (!chip->irq_set_affinity(&desc->irq_data, |
| 47 | cpumask_copy(desc->affinity, desc->pending_mask); | 48 | desc->pending_mask, false)) { |
| 49 | cpumask_copy(desc->irq_data.affinity, desc->pending_mask); | ||
| 48 | irq_set_thread_affinity(desc); | 50 | irq_set_thread_affinity(desc); |
| 49 | } | 51 | } |
| 50 | 52 | ||
| @@ -61,8 +63,8 @@ void move_native_irq(int irq) | |||
| 61 | if (unlikely(desc->status & IRQ_DISABLED)) | 63 | if (unlikely(desc->status & IRQ_DISABLED)) |
| 62 | return; | 64 | return; |
| 63 | 65 | ||
| 64 | desc->chip->mask(irq); | 66 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 65 | move_masked_irq(irq); | 67 | move_masked_irq(irq); |
| 66 | desc->chip->unmask(irq); | 68 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 67 | } | 69 | } |
| 68 | 70 | ||
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c deleted file mode 100644 index 65d3845665ac..000000000000 --- a/kernel/irq/numa_migrate.c +++ /dev/null | |||
| @@ -1,120 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * NUMA irq-desc migration code | ||
| 3 | * | ||
| 4 | * Migrate IRQ data structures (irq_desc, chip_data, etc.) over to | ||
| 5 | * the new "home node" of the IRQ. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/irq.h> | ||
| 9 | #include <linux/slab.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/random.h> | ||
| 12 | #include <linux/interrupt.h> | ||
| 13 | #include <linux/kernel_stat.h> | ||
| 14 | |||
| 15 | #include "internals.h" | ||
| 16 | |||
| 17 | static void init_copy_kstat_irqs(struct irq_desc *old_desc, | ||
| 18 | struct irq_desc *desc, | ||
| 19 | int node, int nr) | ||
| 20 | { | ||
| 21 | init_kstat_irqs(desc, node, nr); | ||
| 22 | |||
| 23 | if (desc->kstat_irqs != old_desc->kstat_irqs) | ||
| 24 | memcpy(desc->kstat_irqs, old_desc->kstat_irqs, | ||
| 25 | nr * sizeof(*desc->kstat_irqs)); | ||
| 26 | } | ||
| 27 | |||
| 28 | static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) | ||
| 29 | { | ||
| 30 | if (old_desc->kstat_irqs == desc->kstat_irqs) | ||
| 31 | return; | ||
| 32 | |||
| 33 | kfree(old_desc->kstat_irqs); | ||
| 34 | old_desc->kstat_irqs = NULL; | ||
| 35 | } | ||
| 36 | |||
| 37 | static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, | ||
| 38 | struct irq_desc *desc, int node) | ||
| 39 | { | ||
| 40 | memcpy(desc, old_desc, sizeof(struct irq_desc)); | ||
| 41 | if (!alloc_desc_masks(desc, node, false)) { | ||
| 42 | printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " | ||
| 43 | "for migration.\n", irq); | ||
| 44 | return false; | ||
| 45 | } | ||
| 46 | raw_spin_lock_init(&desc->lock); | ||
| 47 | desc->node = node; | ||
| 48 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 49 | init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); | ||
| 50 | init_copy_desc_masks(old_desc, desc); | ||
| 51 | arch_init_copy_chip_data(old_desc, desc, node); | ||
| 52 | return true; | ||
| 53 | } | ||
| 54 | |||
| 55 | static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) | ||
| 56 | { | ||
| 57 | free_kstat_irqs(old_desc, desc); | ||
| 58 | free_desc_masks(old_desc, desc); | ||
| 59 | arch_free_chip_data(old_desc, desc); | ||
| 60 | } | ||
| 61 | |||
| 62 | static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, | ||
| 63 | int node) | ||
| 64 | { | ||
| 65 | struct irq_desc *desc; | ||
| 66 | unsigned int irq; | ||
| 67 | unsigned long flags; | ||
| 68 | |||
| 69 | irq = old_desc->irq; | ||
| 70 | |||
| 71 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 72 | |||
| 73 | /* We have to check it to avoid races with another CPU */ | ||
| 74 | desc = irq_to_desc(irq); | ||
| 75 | |||
| 76 | if (desc && old_desc != desc) | ||
| 77 | goto out_unlock; | ||
| 78 | |||
| 79 | desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); | ||
| 80 | if (!desc) { | ||
| 81 | printk(KERN_ERR "irq %d: can not get new irq_desc " | ||
| 82 | "for migration.\n", irq); | ||
| 83 | /* still use old one */ | ||
| 84 | desc = old_desc; | ||
| 85 | goto out_unlock; | ||
| 86 | } | ||
| 87 | if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) { | ||
| 88 | /* still use old one */ | ||
| 89 | kfree(desc); | ||
| 90 | desc = old_desc; | ||
| 91 | goto out_unlock; | ||
| 92 | } | ||
| 93 | |||
| 94 | replace_irq_desc(irq, desc); | ||
| 95 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 96 | |||
| 97 | /* free the old one */ | ||
| 98 | free_one_irq_desc(old_desc, desc); | ||
| 99 | kfree(old_desc); | ||
| 100 | |||
| 101 | return desc; | ||
| 102 | |||
| 103 | out_unlock: | ||
| 104 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 105 | |||
| 106 | return desc; | ||
| 107 | } | ||
| 108 | |||
| 109 | struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) | ||
| 110 | { | ||
| 111 | /* those static or target node is -1, do not move them */ | ||
| 112 | if (desc->irq < NR_IRQS_LEGACY || node == -1) | ||
| 113 | return desc; | ||
| 114 | |||
| 115 | if (desc->node != node) | ||
| 116 | desc = __real_move_irq_desc(desc, node); | ||
| 117 | |||
| 118 | return desc; | ||
| 119 | } | ||
| 120 | |||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 09a2ee540bd2..01b1d3a88983 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -21,7 +21,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
| 21 | static int irq_affinity_proc_show(struct seq_file *m, void *v) | 21 | static int irq_affinity_proc_show(struct seq_file *m, void *v) |
| 22 | { | 22 | { |
| 23 | struct irq_desc *desc = irq_to_desc((long)m->private); | 23 | struct irq_desc *desc = irq_to_desc((long)m->private); |
| 24 | const struct cpumask *mask = desc->affinity; | 24 | const struct cpumask *mask = desc->irq_data.affinity; |
| 25 | 25 | ||
| 26 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 26 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 27 | if (desc->status & IRQ_MOVE_PENDING) | 27 | if (desc->status & IRQ_MOVE_PENDING) |
| @@ -65,7 +65,7 @@ static ssize_t irq_affinity_proc_write(struct file *file, | |||
| 65 | cpumask_var_t new_value; | 65 | cpumask_var_t new_value; |
| 66 | int err; | 66 | int err; |
| 67 | 67 | ||
| 68 | if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity || | 68 | if (!irq_to_desc(irq)->irq_data.chip->irq_set_affinity || no_irq_affinity || |
| 69 | irq_balancing_disabled(irq)) | 69 | irq_balancing_disabled(irq)) |
| 70 | return -EIO; | 70 | return -EIO; |
| 71 | 71 | ||
| @@ -185,7 +185,7 @@ static int irq_node_proc_show(struct seq_file *m, void *v) | |||
| 185 | { | 185 | { |
| 186 | struct irq_desc *desc = irq_to_desc((long) m->private); | 186 | struct irq_desc *desc = irq_to_desc((long) m->private); |
| 187 | 187 | ||
| 188 | seq_printf(m, "%d\n", desc->node); | 188 | seq_printf(m, "%d\n", desc->irq_data.node); |
| 189 | return 0; | 189 | return 0; |
| 190 | } | 190 | } |
| 191 | 191 | ||
| @@ -269,7 +269,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
| 269 | { | 269 | { |
| 270 | char name [MAX_NAMELEN]; | 270 | char name [MAX_NAMELEN]; |
| 271 | 271 | ||
| 272 | if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir) | 272 | if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) |
| 273 | return; | 273 | return; |
| 274 | 274 | ||
| 275 | memset(name, 0, MAX_NAMELEN); | 275 | memset(name, 0, MAX_NAMELEN); |
| @@ -297,6 +297,24 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
| 297 | &irq_spurious_proc_fops, (void *)(long)irq); | 297 | &irq_spurious_proc_fops, (void *)(long)irq); |
| 298 | } | 298 | } |
| 299 | 299 | ||
| 300 | void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) | ||
| 301 | { | ||
| 302 | char name [MAX_NAMELEN]; | ||
| 303 | |||
| 304 | if (!root_irq_dir || !desc->dir) | ||
| 305 | return; | ||
| 306 | #ifdef CONFIG_SMP | ||
| 307 | remove_proc_entry("smp_affinity", desc->dir); | ||
| 308 | remove_proc_entry("affinity_hint", desc->dir); | ||
| 309 | remove_proc_entry("node", desc->dir); | ||
| 310 | #endif | ||
| 311 | remove_proc_entry("spurious", desc->dir); | ||
| 312 | |||
| 313 | memset(name, 0, MAX_NAMELEN); | ||
| 314 | sprintf(name, "%u", irq); | ||
| 315 | remove_proc_entry(name, root_irq_dir); | ||
| 316 | } | ||
| 317 | |||
| 300 | #undef MAX_NAMELEN | 318 | #undef MAX_NAMELEN |
| 301 | 319 | ||
| 302 | void unregister_handler_proc(unsigned int irq, struct irqaction *action) | 320 | void unregister_handler_proc(unsigned int irq, struct irqaction *action) |
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 090c3763f3a2..891115a929aa 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c | |||
| @@ -60,7 +60,7 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) | |||
| 60 | /* | 60 | /* |
| 61 | * Make sure the interrupt is enabled, before resending it: | 61 | * Make sure the interrupt is enabled, before resending it: |
| 62 | */ | 62 | */ |
| 63 | desc->chip->enable(irq); | 63 | desc->irq_data.chip->irq_enable(&desc->irq_data); |
| 64 | 64 | ||
| 65 | /* | 65 | /* |
| 66 | * We do not resend level type interrupts. Level type | 66 | * We do not resend level type interrupts. Level type |
| @@ -70,7 +70,8 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) | |||
| 70 | if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { | 70 | if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { |
| 71 | desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; | 71 | desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; |
| 72 | 72 | ||
| 73 | if (!desc->chip->retrigger || !desc->chip->retrigger(irq)) { | 73 | if (!desc->irq_data.chip->irq_retrigger || |
| 74 | !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { | ||
| 74 | #ifdef CONFIG_HARDIRQS_SW_RESEND | 75 | #ifdef CONFIG_HARDIRQS_SW_RESEND |
| 75 | /* Set it pending and activate the softirq: */ | 76 | /* Set it pending and activate the softirq: */ |
| 76 | set_bit(irq, irqs_resend); | 77 | set_bit(irq, irqs_resend); |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 89fb90ae534f..3089d3b9d5f3 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -14,6 +14,8 @@ | |||
| 14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
| 15 | #include <linux/timer.h> | 15 | #include <linux/timer.h> |
| 16 | 16 | ||
| 17 | #include "internals.h" | ||
| 18 | |||
| 17 | static int irqfixup __read_mostly; | 19 | static int irqfixup __read_mostly; |
| 18 | 20 | ||
| 19 | #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) | 21 | #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) |
| @@ -78,8 +80,8 @@ static int try_one_irq(int irq, struct irq_desc *desc) | |||
| 78 | * If we did actual work for the real IRQ line we must let the | 80 | * If we did actual work for the real IRQ line we must let the |
| 79 | * IRQ controller clean up too | 81 | * IRQ controller clean up too |
| 80 | */ | 82 | */ |
| 81 | if (work && desc->chip && desc->chip->end) | 83 | if (work) |
| 82 | desc->chip->end(irq); | 84 | irq_end(irq, desc); |
| 83 | raw_spin_unlock(&desc->lock); | 85 | raw_spin_unlock(&desc->lock); |
| 84 | 86 | ||
| 85 | return ok; | 87 | return ok; |
| @@ -254,7 +256,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 254 | printk(KERN_EMERG "Disabling IRQ #%d\n", irq); | 256 | printk(KERN_EMERG "Disabling IRQ #%d\n", irq); |
| 255 | desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; | 257 | desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; |
| 256 | desc->depth++; | 258 | desc->depth++; |
| 257 | desc->chip->disable(irq); | 259 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
| 258 | 260 | ||
| 259 | mod_timer(&poll_spurious_irq_timer, | 261 | mod_timer(&poll_spurious_irq_timer, |
| 260 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); | 262 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f2852a510232..42ba65dff7d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -639,6 +639,16 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
| 639 | } | 639 | } |
| 640 | #endif | 640 | #endif |
| 641 | 641 | ||
| 642 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | ||
| 643 | debug_locks_off(); | ||
| 644 | printk(KERN_ERR | ||
| 645 | "BUG: looking up invalid subclass: %u\n", subclass); | ||
| 646 | printk(KERN_ERR | ||
| 647 | "turning off the locking correctness validator.\n"); | ||
| 648 | dump_stack(); | ||
| 649 | return NULL; | ||
| 650 | } | ||
| 651 | |||
| 642 | /* | 652 | /* |
| 643 | * Static locks do not have their class-keys yet - for them the key | 653 | * Static locks do not have their class-keys yet - for them the key |
| 644 | * is the lock object itself: | 654 | * is the lock object itself: |
| @@ -774,7 +784,9 @@ out_unlock_set: | |||
| 774 | raw_local_irq_restore(flags); | 784 | raw_local_irq_restore(flags); |
| 775 | 785 | ||
| 776 | if (!subclass || force) | 786 | if (!subclass || force) |
| 777 | lock->class_cache = class; | 787 | lock->class_cache[0] = class; |
| 788 | else if (subclass < NR_LOCKDEP_CACHING_CLASSES) | ||
| 789 | lock->class_cache[subclass] = class; | ||
| 778 | 790 | ||
| 779 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | 791 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) |
| 780 | return NULL; | 792 | return NULL; |
| @@ -2679,7 +2691,11 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 2679 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 2691 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
| 2680 | struct lock_class_key *key, int subclass) | 2692 | struct lock_class_key *key, int subclass) |
| 2681 | { | 2693 | { |
| 2682 | lock->class_cache = NULL; | 2694 | int i; |
| 2695 | |||
| 2696 | for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) | ||
| 2697 | lock->class_cache[i] = NULL; | ||
| 2698 | |||
| 2683 | #ifdef CONFIG_LOCK_STAT | 2699 | #ifdef CONFIG_LOCK_STAT |
| 2684 | lock->cpu = raw_smp_processor_id(); | 2700 | lock->cpu = raw_smp_processor_id(); |
| 2685 | #endif | 2701 | #endif |
| @@ -2739,21 +2755,13 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 2739 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2755 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| 2740 | return 0; | 2756 | return 0; |
| 2741 | 2757 | ||
| 2742 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | ||
| 2743 | debug_locks_off(); | ||
| 2744 | printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); | ||
| 2745 | printk("turning off the locking correctness validator.\n"); | ||
| 2746 | dump_stack(); | ||
| 2747 | return 0; | ||
| 2748 | } | ||
| 2749 | |||
| 2750 | if (lock->key == &__lockdep_no_validate__) | 2758 | if (lock->key == &__lockdep_no_validate__) |
| 2751 | check = 1; | 2759 | check = 1; |
| 2752 | 2760 | ||
| 2753 | if (!subclass) | 2761 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) |
| 2754 | class = lock->class_cache; | 2762 | class = lock->class_cache[subclass]; |
| 2755 | /* | 2763 | /* |
| 2756 | * Not cached yet or subclass? | 2764 | * Not cached? |
| 2757 | */ | 2765 | */ |
| 2758 | if (unlikely(!class)) { | 2766 | if (unlikely(!class)) { |
| 2759 | class = register_lock_class(lock, subclass, 0); | 2767 | class = register_lock_class(lock, subclass, 0); |
| @@ -2918,7 +2926,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) | |||
| 2918 | return 1; | 2926 | return 1; |
| 2919 | 2927 | ||
| 2920 | if (hlock->references) { | 2928 | if (hlock->references) { |
| 2921 | struct lock_class *class = lock->class_cache; | 2929 | struct lock_class *class = lock->class_cache[0]; |
| 2922 | 2930 | ||
| 2923 | if (!class) | 2931 | if (!class) |
| 2924 | class = look_up_lock_class(lock, 0); | 2932 | class = look_up_lock_class(lock, 0); |
| @@ -3559,7 +3567,12 @@ void lockdep_reset_lock(struct lockdep_map *lock) | |||
| 3559 | if (list_empty(head)) | 3567 | if (list_empty(head)) |
| 3560 | continue; | 3568 | continue; |
| 3561 | list_for_each_entry_safe(class, next, head, hash_entry) { | 3569 | list_for_each_entry_safe(class, next, head, hash_entry) { |
| 3562 | if (unlikely(class == lock->class_cache)) { | 3570 | int match = 0; |
| 3571 | |||
| 3572 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) | ||
| 3573 | match |= class == lock->class_cache[j]; | ||
| 3574 | |||
| 3575 | if (unlikely(match)) { | ||
| 3563 | if (debug_locks_off_graph_unlock()) | 3576 | if (debug_locks_off_graph_unlock()) |
| 3564 | WARN_ON(1); | 3577 | WARN_ON(1); |
| 3565 | goto out_restore; | 3578 | goto out_restore; |
| @@ -3775,7 +3788,7 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks); | |||
| 3775 | * Careful: only use this function if you are sure that | 3788 | * Careful: only use this function if you are sure that |
| 3776 | * the task cannot run in parallel! | 3789 | * the task cannot run in parallel! |
| 3777 | */ | 3790 | */ |
| 3778 | void __debug_show_held_locks(struct task_struct *task) | 3791 | void debug_show_held_locks(struct task_struct *task) |
| 3779 | { | 3792 | { |
| 3780 | if (unlikely(!debug_locks)) { | 3793 | if (unlikely(!debug_locks)) { |
| 3781 | printk("INFO: lockdep is turned off.\n"); | 3794 | printk("INFO: lockdep is turned off.\n"); |
| @@ -3783,12 +3796,6 @@ void __debug_show_held_locks(struct task_struct *task) | |||
| 3783 | } | 3796 | } |
| 3784 | lockdep_print_held_locks(task); | 3797 | lockdep_print_held_locks(task); |
| 3785 | } | 3798 | } |
| 3786 | EXPORT_SYMBOL_GPL(__debug_show_held_locks); | ||
| 3787 | |||
| 3788 | void debug_show_held_locks(struct task_struct *task) | ||
| 3789 | { | ||
| 3790 | __debug_show_held_locks(task); | ||
| 3791 | } | ||
| 3792 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 3799 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
| 3793 | 3800 | ||
| 3794 | void lockdep_sys_exit(void) | 3801 | void lockdep_sys_exit(void) |
diff --git a/kernel/pid.c b/kernel/pid.c index d55c6fb8d087..39b65b69584f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) | |||
| 401 | struct task_struct *result = NULL; | 401 | struct task_struct *result = NULL; |
| 402 | if (pid) { | 402 | if (pid) { |
| 403 | struct hlist_node *first; | 403 | struct hlist_node *first; |
| 404 | first = rcu_dereference_check(pid->tasks[type].first, | 404 | first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), |
| 405 | rcu_read_lock_held() || | 405 | rcu_read_lock_held() || |
| 406 | lockdep_tasklist_lock_is_held()); | 406 | lockdep_tasklist_lock_is_held()); |
| 407 | if (first) | 407 | if (first) |
| @@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task); | |||
| 416 | */ | 416 | */ |
| 417 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | 417 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
| 418 | { | 418 | { |
| 419 | rcu_lockdep_assert(rcu_read_lock_held()); | ||
| 419 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | 420 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
| 420 | } | 421 | } |
| 421 | 422 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 8fe465ac008a..2531017795f6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -85,7 +85,7 @@ EXPORT_SYMBOL(oops_in_progress); | |||
| 85 | * provides serialisation for access to the entire console | 85 | * provides serialisation for access to the entire console |
| 86 | * driver system. | 86 | * driver system. |
| 87 | */ | 87 | */ |
| 88 | static DECLARE_MUTEX(console_sem); | 88 | static DEFINE_SEMAPHORE(console_sem); |
| 89 | struct console *console_drivers; | 89 | struct console *console_drivers; |
| 90 | EXPORT_SYMBOL_GPL(console_drivers); | 90 | EXPORT_SYMBOL_GPL(console_drivers); |
| 91 | 91 | ||
| @@ -556,7 +556,7 @@ static void zap_locks(void) | |||
| 556 | /* If a crash is occurring, make sure we can't deadlock */ | 556 | /* If a crash is occurring, make sure we can't deadlock */ |
| 557 | spin_lock_init(&logbuf_lock); | 557 | spin_lock_init(&logbuf_lock); |
| 558 | /* And make sure that we print immediately */ | 558 | /* And make sure that we print immediately */ |
| 559 | init_MUTEX(&console_sem); | 559 | sema_init(&console_sem, 1); |
| 560 | } | 560 | } |
| 561 | 561 | ||
| 562 | #if defined(CONFIG_PRINTK_TIME) | 562 | #if defined(CONFIG_PRINTK_TIME) |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4d169835fb36..a23a57a976d1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void) | |||
| 73 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); | 73 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); |
| 74 | 74 | ||
| 75 | /** | 75 | /** |
| 76 | * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? | 76 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? |
| 77 | * | 77 | * |
| 78 | * Check for bottom half being disabled, which covers both the | 78 | * Check for bottom half being disabled, which covers both the |
| 79 | * CONFIG_PROVE_RCU and not cases. Note that if someone uses | 79 | * CONFIG_PROVE_RCU and not cases. Note that if someone uses |
| 80 | * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) | 80 | * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) |
| 81 | * will show the situation. | 81 | * will show the situation. This is useful for debug checks in functions |
| 82 | * that require that they be called within an RCU read-side critical | ||
| 83 | * section. | ||
| 82 | * | 84 | * |
| 83 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. | 85 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. |
| 84 | */ | 86 | */ |
| @@ -86,7 +88,7 @@ int rcu_read_lock_bh_held(void) | |||
| 86 | { | 88 | { |
| 87 | if (!debug_lockdep_rcu_enabled()) | 89 | if (!debug_lockdep_rcu_enabled()) |
| 88 | return 1; | 90 | return 1; |
| 89 | return in_softirq(); | 91 | return in_softirq() || irqs_disabled(); |
| 90 | } | 92 | } |
| 91 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | 93 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); |
| 92 | 94 | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 196ec02f8be0..d806735342ac 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
| @@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly; | |||
| 59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | 59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
| 60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 61 | 61 | ||
| 62 | /* Forward declarations for rcutiny_plugin.h. */ | ||
| 63 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); | ||
| 64 | static void __call_rcu(struct rcu_head *head, | ||
| 65 | void (*func)(struct rcu_head *rcu), | ||
| 66 | struct rcu_ctrlblk *rcp); | ||
| 67 | |||
| 68 | #include "rcutiny_plugin.h" | ||
| 69 | |||
| 62 | #ifdef CONFIG_NO_HZ | 70 | #ifdef CONFIG_NO_HZ |
| 63 | 71 | ||
| 64 | static long rcu_dynticks_nesting = 1; | 72 | static long rcu_dynticks_nesting = 1; |
| @@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
| 140 | rcu_sched_qs(cpu); | 148 | rcu_sched_qs(cpu); |
| 141 | else if (!in_softirq()) | 149 | else if (!in_softirq()) |
| 142 | rcu_bh_qs(cpu); | 150 | rcu_bh_qs(cpu); |
| 151 | rcu_preempt_check_callbacks(); | ||
| 143 | } | 152 | } |
| 144 | 153 | ||
| 145 | /* | 154 | /* |
| @@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 162 | *rcp->donetail = NULL; | 171 | *rcp->donetail = NULL; |
| 163 | if (rcp->curtail == rcp->donetail) | 172 | if (rcp->curtail == rcp->donetail) |
| 164 | rcp->curtail = &rcp->rcucblist; | 173 | rcp->curtail = &rcp->rcucblist; |
| 174 | rcu_preempt_remove_callbacks(rcp); | ||
| 165 | rcp->donetail = &rcp->rcucblist; | 175 | rcp->donetail = &rcp->rcucblist; |
| 166 | local_irq_restore(flags); | 176 | local_irq_restore(flags); |
| 167 | 177 | ||
| @@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
| 182 | { | 192 | { |
| 183 | __rcu_process_callbacks(&rcu_sched_ctrlblk); | 193 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
| 184 | __rcu_process_callbacks(&rcu_bh_ctrlblk); | 194 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
| 195 | rcu_preempt_process_callbacks(); | ||
| 185 | } | 196 | } |
| 186 | 197 | ||
| 187 | /* | 198 | /* |
| @@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head, | |||
| 223 | } | 234 | } |
| 224 | 235 | ||
| 225 | /* | 236 | /* |
| 226 | * Post an RCU callback to be invoked after the end of an RCU grace | 237 | * Post an RCU callback to be invoked after the end of an RCU-sched grace |
| 227 | * period. But since we have but one CPU, that would be after any | 238 | * period. But since we have but one CPU, that would be after any |
| 228 | * quiescent state. | 239 | * quiescent state. |
| 229 | */ | 240 | */ |
| 230 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 241 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
| 231 | { | 242 | { |
| 232 | __call_rcu(head, func, &rcu_sched_ctrlblk); | 243 | __call_rcu(head, func, &rcu_sched_ctrlblk); |
| 233 | } | 244 | } |
| 234 | EXPORT_SYMBOL_GPL(call_rcu); | 245 | EXPORT_SYMBOL_GPL(call_rcu_sched); |
| 235 | 246 | ||
| 236 | /* | 247 | /* |
| 237 | * Post an RCU bottom-half callback to be invoked after any subsequent | 248 | * Post an RCU bottom-half callback to be invoked after any subsequent |
| @@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
| 243 | } | 254 | } |
| 244 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 255 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
| 245 | 256 | ||
| 246 | void rcu_barrier(void) | ||
| 247 | { | ||
| 248 | struct rcu_synchronize rcu; | ||
| 249 | |||
| 250 | init_rcu_head_on_stack(&rcu.head); | ||
| 251 | init_completion(&rcu.completion); | ||
| 252 | /* Will wake me after RCU finished. */ | ||
| 253 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
| 254 | /* Wait for it. */ | ||
| 255 | wait_for_completion(&rcu.completion); | ||
| 256 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 257 | } | ||
| 258 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
| 259 | |||
| 260 | void rcu_barrier_bh(void) | 257 | void rcu_barrier_bh(void) |
| 261 | { | 258 | { |
| 262 | struct rcu_synchronize rcu; | 259 | struct rcu_synchronize rcu; |
| @@ -289,5 +286,3 @@ void __init rcu_init(void) | |||
| 289 | { | 286 | { |
| 290 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 287 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| 291 | } | 288 | } |
| 292 | |||
| 293 | #include "rcutiny_plugin.h" | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index d223a92bc742..6ceca4f745ff 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 2 | * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition |
| 3 | * Internal non-public definitions that provide either classic | 3 | * Internal non-public definitions that provide either classic |
| 4 | * or preemptable semantics. | 4 | * or preemptible semantics. |
| 5 | * | 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
| @@ -17,11 +17,587 @@ | |||
| 17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
| 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 19 | * | 19 | * |
| 20 | * Copyright IBM Corporation, 2009 | 20 | * Copyright (c) 2010 Linaro |
| 21 | * | 21 | * |
| 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #ifdef CONFIG_TINY_PREEMPT_RCU | ||
| 26 | |||
| 27 | #include <linux/delay.h> | ||
| 28 | |||
| 29 | /* Global control variables for preemptible RCU. */ | ||
| 30 | struct rcu_preempt_ctrlblk { | ||
| 31 | struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ | ||
| 32 | struct rcu_head **nexttail; | ||
| 33 | /* Tasks blocked in a preemptible RCU */ | ||
| 34 | /* read-side critical section while an */ | ||
| 35 | /* preemptible-RCU grace period is in */ | ||
| 36 | /* progress must wait for a later grace */ | ||
| 37 | /* period. This pointer points to the */ | ||
| 38 | /* ->next pointer of the last task that */ | ||
| 39 | /* must wait for a later grace period, or */ | ||
| 40 | /* to &->rcb.rcucblist if there is no */ | ||
| 41 | /* such task. */ | ||
| 42 | struct list_head blkd_tasks; | ||
| 43 | /* Tasks blocked in RCU read-side critical */ | ||
| 44 | /* section. Tasks are placed at the head */ | ||
| 45 | /* of this list and age towards the tail. */ | ||
| 46 | struct list_head *gp_tasks; | ||
| 47 | /* Pointer to the first task blocking the */ | ||
| 48 | /* current grace period, or NULL if there */ | ||
| 49 | /* is not such task. */ | ||
| 50 | struct list_head *exp_tasks; | ||
| 51 | /* Pointer to first task blocking the */ | ||
| 52 | /* current expedited grace period, or NULL */ | ||
| 53 | /* if there is no such task. If there */ | ||
| 54 | /* is no current expedited grace period, */ | ||
| 55 | /* then there cannot be any such task. */ | ||
| 56 | u8 gpnum; /* Current grace period. */ | ||
| 57 | u8 gpcpu; /* Last grace period blocked by the CPU. */ | ||
| 58 | u8 completed; /* Last grace period completed. */ | ||
| 59 | /* If all three are equal, RCU is idle. */ | ||
| 60 | }; | ||
| 61 | |||
| 62 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | ||
| 63 | .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
| 64 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
| 65 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
| 66 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | ||
| 67 | }; | ||
| 68 | |||
| 69 | static int rcu_preempted_readers_exp(void); | ||
| 70 | static void rcu_report_exp_done(void); | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Return true if the CPU has not yet responded to the current grace period. | ||
| 74 | */ | ||
| 75 | static int rcu_cpu_blocking_cur_gp(void) | ||
| 76 | { | ||
| 77 | return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Check for a running RCU reader. Because there is only one CPU, | ||
| 82 | * there can be but one running RCU reader at a time. ;-) | ||
| 83 | */ | ||
| 84 | static int rcu_preempt_running_reader(void) | ||
| 85 | { | ||
| 86 | return current->rcu_read_lock_nesting; | ||
| 87 | } | ||
| 88 | |||
| 89 | /* | ||
| 90 | * Check for preempted RCU readers blocking any grace period. | ||
| 91 | * If the caller needs a reliable answer, it must disable hard irqs. | ||
| 92 | */ | ||
| 93 | static int rcu_preempt_blocked_readers_any(void) | ||
| 94 | { | ||
| 95 | return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks); | ||
| 96 | } | ||
| 97 | |||
| 98 | /* | ||
| 99 | * Check for preempted RCU readers blocking the current grace period. | ||
| 100 | * If the caller needs a reliable answer, it must disable hard irqs. | ||
| 101 | */ | ||
| 102 | static int rcu_preempt_blocked_readers_cgp(void) | ||
| 103 | { | ||
| 104 | return rcu_preempt_ctrlblk.gp_tasks != NULL; | ||
| 105 | } | ||
| 106 | |||
| 107 | /* | ||
| 108 | * Return true if another preemptible-RCU grace period is needed. | ||
| 109 | */ | ||
| 110 | static int rcu_preempt_needs_another_gp(void) | ||
| 111 | { | ||
| 112 | return *rcu_preempt_ctrlblk.rcb.curtail != NULL; | ||
| 113 | } | ||
| 114 | |||
| 115 | /* | ||
| 116 | * Return true if a preemptible-RCU grace period is in progress. | ||
| 117 | * The caller must disable hardirqs. | ||
| 118 | */ | ||
| 119 | static int rcu_preempt_gp_in_progress(void) | ||
| 120 | { | ||
| 121 | return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; | ||
| 122 | } | ||
| 123 | |||
| 124 | /* | ||
| 125 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | ||
| 126 | * that this just means that the task currently running on the CPU is | ||
| 127 | * in a quiescent state. There might be any number of tasks blocked | ||
| 128 | * while in an RCU read-side critical section. | ||
| 129 | * | ||
| 130 | * Unlike the other rcu_*_qs() functions, callers to this function | ||
| 131 | * must disable irqs in order to protect the assignment to | ||
| 132 | * ->rcu_read_unlock_special. | ||
| 133 | * | ||
| 134 | * Because this is a single-CPU implementation, the only way a grace | ||
| 135 | * period can end is if the CPU is in a quiescent state. The reason is | ||
| 136 | * that a blocked preemptible-RCU reader can exit its critical section | ||
| 137 | * only if the CPU is running it at the time. Therefore, when the | ||
| 138 | * last task blocking the current grace period exits its RCU read-side | ||
| 139 | * critical section, neither the CPU nor blocked tasks will be stopping | ||
| 140 | * the current grace period. (In contrast, SMP implementations | ||
| 141 | * might have CPUs running in RCU read-side critical sections that | ||
| 142 | * block later grace periods -- but this is not possible given only | ||
| 143 | * one CPU.) | ||
| 144 | */ | ||
| 145 | static void rcu_preempt_cpu_qs(void) | ||
| 146 | { | ||
| 147 | /* Record both CPU and task as having responded to current GP. */ | ||
| 148 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; | ||
| 149 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
| 150 | |||
| 151 | /* | ||
| 152 | * If there is no GP, or if blocked readers are still blocking GP, | ||
| 153 | * then there is nothing more to do. | ||
| 154 | */ | ||
| 155 | if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) | ||
| 156 | return; | ||
| 157 | |||
| 158 | /* Advance callbacks. */ | ||
| 159 | rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; | ||
| 160 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail; | ||
| 161 | rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail; | ||
| 162 | |||
| 163 | /* If there are no blocked readers, next GP is done instantly. */ | ||
| 164 | if (!rcu_preempt_blocked_readers_any()) | ||
| 165 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; | ||
| 166 | |||
| 167 | /* If there are done callbacks, make RCU_SOFTIRQ process them. */ | ||
| 168 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | ||
| 169 | raise_softirq(RCU_SOFTIRQ); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Start a new RCU grace period if warranted. Hard irqs must be disabled. | ||
| 174 | */ | ||
| 175 | static void rcu_preempt_start_gp(void) | ||
| 176 | { | ||
| 177 | if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) { | ||
| 178 | |||
| 179 | /* Official start of GP. */ | ||
| 180 | rcu_preempt_ctrlblk.gpnum++; | ||
| 181 | |||
| 182 | /* Any blocked RCU readers block new GP. */ | ||
| 183 | if (rcu_preempt_blocked_readers_any()) | ||
| 184 | rcu_preempt_ctrlblk.gp_tasks = | ||
| 185 | rcu_preempt_ctrlblk.blkd_tasks.next; | ||
| 186 | |||
| 187 | /* If there is no running reader, CPU is done with GP. */ | ||
| 188 | if (!rcu_preempt_running_reader()) | ||
| 189 | rcu_preempt_cpu_qs(); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * We have entered the scheduler, and the current task might soon be | ||
| 195 | * context-switched away from. If this task is in an RCU read-side | ||
| 196 | * critical section, we will no longer be able to rely on the CPU to | ||
| 197 | * record that fact, so we enqueue the task on the blkd_tasks list. | ||
| 198 | * If the task started after the current grace period began, as recorded | ||
| 199 | * by ->gpcpu, we enqueue at the beginning of the list. Otherwise | ||
| 200 | * before the element referenced by ->gp_tasks (or at the tail if | ||
| 201 | * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element. | ||
| 202 | * The task will dequeue itself when it exits the outermost enclosing | ||
| 203 | * RCU read-side critical section. Therefore, the current grace period | ||
| 204 | * cannot be permitted to complete until the ->gp_tasks pointer becomes | ||
| 205 | * NULL. | ||
| 206 | * | ||
| 207 | * Caller must disable preemption. | ||
| 208 | */ | ||
| 209 | void rcu_preempt_note_context_switch(void) | ||
| 210 | { | ||
| 211 | struct task_struct *t = current; | ||
| 212 | unsigned long flags; | ||
| 213 | |||
| 214 | local_irq_save(flags); /* must exclude scheduler_tick(). */ | ||
| 215 | if (rcu_preempt_running_reader() && | ||
| 216 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | ||
| 217 | |||
| 218 | /* Possibly blocking in an RCU read-side critical section. */ | ||
| 219 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | ||
| 220 | |||
| 221 | /* | ||
| 222 | * If this CPU has already checked in, then this task | ||
| 223 | * will hold up the next grace period rather than the | ||
| 224 | * current grace period. Queue the task accordingly. | ||
| 225 | * If the task is queued for the current grace period | ||
| 226 | * (i.e., this CPU has not yet passed through a quiescent | ||
| 227 | * state for the current grace period), then as long | ||
| 228 | * as that task remains queued, the current grace period | ||
| 229 | * cannot end. | ||
| 230 | */ | ||
| 231 | list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); | ||
| 232 | if (rcu_cpu_blocking_cur_gp()) | ||
| 233 | rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Either we were not in an RCU read-side critical section to | ||
| 238 | * begin with, or we have now recorded that critical section | ||
| 239 | * globally. Either way, we can now note a quiescent state | ||
| 240 | * for this CPU. Again, if we were in an RCU read-side critical | ||
| 241 | * section, and if that critical section was blocking the current | ||
| 242 | * grace period, then the fact that the task has been enqueued | ||
| 243 | * means that current grace period continues to be blocked. | ||
| 244 | */ | ||
| 245 | rcu_preempt_cpu_qs(); | ||
| 246 | local_irq_restore(flags); | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * Tiny-preemptible RCU implementation for rcu_read_lock(). | ||
| 251 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | ||
| 252 | * if we block. | ||
| 253 | */ | ||
| 254 | void __rcu_read_lock(void) | ||
| 255 | { | ||
| 256 | current->rcu_read_lock_nesting++; | ||
| 257 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ | ||
| 258 | } | ||
| 259 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
| 260 | |||
| 261 | /* | ||
| 262 | * Handle special cases during rcu_read_unlock(), such as needing to | ||
| 263 | * notify RCU core processing or task having blocked during the RCU | ||
| 264 | * read-side critical section. | ||
| 265 | */ | ||
| 266 | static void rcu_read_unlock_special(struct task_struct *t) | ||
| 267 | { | ||
| 268 | int empty; | ||
| 269 | int empty_exp; | ||
| 270 | unsigned long flags; | ||
| 271 | struct list_head *np; | ||
| 272 | int special; | ||
| 273 | |||
| 274 | /* | ||
| 275 | * NMI handlers cannot block and cannot safely manipulate state. | ||
| 276 | * They therefore cannot possibly be special, so just leave. | ||
| 277 | */ | ||
| 278 | if (in_nmi()) | ||
| 279 | return; | ||
| 280 | |||
| 281 | local_irq_save(flags); | ||
| 282 | |||
| 283 | /* | ||
| 284 | * If RCU core is waiting for this CPU to exit critical section, | ||
| 285 | * let it know that we have done so. | ||
| 286 | */ | ||
| 287 | special = t->rcu_read_unlock_special; | ||
| 288 | if (special & RCU_READ_UNLOCK_NEED_QS) | ||
| 289 | rcu_preempt_cpu_qs(); | ||
| 290 | |||
| 291 | /* Hardware IRQ handlers cannot block. */ | ||
| 292 | if (in_irq()) { | ||
| 293 | local_irq_restore(flags); | ||
| 294 | return; | ||
| 295 | } | ||
| 296 | |||
| 297 | /* Clean up if blocked during RCU read-side critical section. */ | ||
| 298 | if (special & RCU_READ_UNLOCK_BLOCKED) { | ||
| 299 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; | ||
| 300 | |||
| 301 | /* | ||
| 302 | * Remove this task from the ->blkd_tasks list and adjust | ||
| 303 | * any pointers that might have been referencing it. | ||
| 304 | */ | ||
| 305 | empty = !rcu_preempt_blocked_readers_cgp(); | ||
| 306 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; | ||
| 307 | np = t->rcu_node_entry.next; | ||
| 308 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
| 309 | np = NULL; | ||
| 310 | list_del(&t->rcu_node_entry); | ||
| 311 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) | ||
| 312 | rcu_preempt_ctrlblk.gp_tasks = np; | ||
| 313 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) | ||
| 314 | rcu_preempt_ctrlblk.exp_tasks = np; | ||
| 315 | INIT_LIST_HEAD(&t->rcu_node_entry); | ||
| 316 | |||
| 317 | /* | ||
| 318 | * If this was the last task on the current list, and if | ||
| 319 | * we aren't waiting on the CPU, report the quiescent state | ||
| 320 | * and start a new grace period if needed. | ||
| 321 | */ | ||
| 322 | if (!empty && !rcu_preempt_blocked_readers_cgp()) { | ||
| 323 | rcu_preempt_cpu_qs(); | ||
| 324 | rcu_preempt_start_gp(); | ||
| 325 | } | ||
| 326 | |||
| 327 | /* | ||
| 328 | * If this was the last task on the expedited lists, | ||
| 329 | * then we need wake up the waiting task. | ||
| 330 | */ | ||
| 331 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) | ||
| 332 | rcu_report_exp_done(); | ||
| 333 | } | ||
| 334 | local_irq_restore(flags); | ||
| 335 | } | ||
| 336 | |||
| 337 | /* | ||
| 338 | * Tiny-preemptible RCU implementation for rcu_read_unlock(). | ||
| 339 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | ||
| 340 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | ||
| 341 | * invoke rcu_read_unlock_special() to clean up after a context switch | ||
| 342 | * in an RCU read-side critical section and other special cases. | ||
| 343 | */ | ||
| 344 | void __rcu_read_unlock(void) | ||
| 345 | { | ||
| 346 | struct task_struct *t = current; | ||
| 347 | |||
| 348 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ | ||
| 349 | --t->rcu_read_lock_nesting; | ||
| 350 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | ||
| 351 | if (t->rcu_read_lock_nesting == 0 && | ||
| 352 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
| 353 | rcu_read_unlock_special(t); | ||
| 354 | #ifdef CONFIG_PROVE_LOCKING | ||
| 355 | WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); | ||
| 356 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | ||
| 357 | } | ||
| 358 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Check for a quiescent state from the current CPU. When a task blocks, | ||
| 362 | * the task is recorded in the rcu_preempt_ctrlblk structure, which is | ||
| 363 | * checked elsewhere. This is called from the scheduling-clock interrupt. | ||
| 364 | * | ||
| 365 | * Caller must disable hard irqs. | ||
| 366 | */ | ||
| 367 | static void rcu_preempt_check_callbacks(void) | ||
| 368 | { | ||
| 369 | struct task_struct *t = current; | ||
| 370 | |||
| 371 | if (rcu_preempt_gp_in_progress() && | ||
| 372 | (!rcu_preempt_running_reader() || | ||
| 373 | !rcu_cpu_blocking_cur_gp())) | ||
| 374 | rcu_preempt_cpu_qs(); | ||
| 375 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | ||
| 376 | rcu_preempt_ctrlblk.rcb.donetail) | ||
| 377 | raise_softirq(RCU_SOFTIRQ); | ||
| 378 | if (rcu_preempt_gp_in_progress() && | ||
| 379 | rcu_cpu_blocking_cur_gp() && | ||
| 380 | rcu_preempt_running_reader()) | ||
| 381 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | ||
| 382 | } | ||
| 383 | |||
| 384 | /* | ||
| 385 | * TINY_PREEMPT_RCU has an extra callback-list tail pointer to | ||
| 386 | * update, so this is invoked from __rcu_process_callbacks() to | ||
| 387 | * handle that case. Of course, it is invoked for all flavors of | ||
| 388 | * RCU, but RCU callbacks can appear only on one of the lists, and | ||
| 389 | * neither ->nexttail nor ->donetail can possibly be NULL, so there | ||
| 390 | * is no need for an explicit check. | ||
| 391 | */ | ||
| 392 | static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | ||
| 393 | { | ||
| 394 | if (rcu_preempt_ctrlblk.nexttail == rcp->donetail) | ||
| 395 | rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist; | ||
| 396 | } | ||
| 397 | |||
| 398 | /* | ||
| 399 | * Process callbacks for preemptible RCU. | ||
| 400 | */ | ||
| 401 | static void rcu_preempt_process_callbacks(void) | ||
| 402 | { | ||
| 403 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | ||
| 404 | } | ||
| 405 | |||
| 406 | /* | ||
| 407 | * Queue a preemptible -RCU callback for invocation after a grace period. | ||
| 408 | */ | ||
| 409 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
| 410 | { | ||
| 411 | unsigned long flags; | ||
| 412 | |||
| 413 | debug_rcu_head_queue(head); | ||
| 414 | head->func = func; | ||
| 415 | head->next = NULL; | ||
| 416 | |||
| 417 | local_irq_save(flags); | ||
| 418 | *rcu_preempt_ctrlblk.nexttail = head; | ||
| 419 | rcu_preempt_ctrlblk.nexttail = &head->next; | ||
| 420 | rcu_preempt_start_gp(); /* checks to see if GP needed. */ | ||
| 421 | local_irq_restore(flags); | ||
| 422 | } | ||
| 423 | EXPORT_SYMBOL_GPL(call_rcu); | ||
| 424 | |||
| 425 | void rcu_barrier(void) | ||
| 426 | { | ||
| 427 | struct rcu_synchronize rcu; | ||
| 428 | |||
| 429 | init_rcu_head_on_stack(&rcu.head); | ||
| 430 | init_completion(&rcu.completion); | ||
| 431 | /* Will wake me after RCU finished. */ | ||
| 432 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
| 433 | /* Wait for it. */ | ||
| 434 | wait_for_completion(&rcu.completion); | ||
| 435 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 436 | } | ||
| 437 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
| 438 | |||
| 439 | /* | ||
| 440 | * synchronize_rcu - wait until a grace period has elapsed. | ||
| 441 | * | ||
| 442 | * Control will return to the caller some time after a full grace | ||
| 443 | * period has elapsed, in other words after all currently executing RCU | ||
| 444 | * read-side critical sections have completed. RCU read-side critical | ||
| 445 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
| 446 | * and may be nested. | ||
| 447 | */ | ||
| 448 | void synchronize_rcu(void) | ||
| 449 | { | ||
| 450 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 451 | if (!rcu_scheduler_active) | ||
| 452 | return; | ||
| 453 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
| 454 | |||
| 455 | WARN_ON_ONCE(rcu_preempt_running_reader()); | ||
| 456 | if (!rcu_preempt_blocked_readers_any()) | ||
| 457 | return; | ||
| 458 | |||
| 459 | /* Once we get past the fastpath checks, same code as rcu_barrier(). */ | ||
| 460 | rcu_barrier(); | ||
| 461 | } | ||
| 462 | EXPORT_SYMBOL_GPL(synchronize_rcu); | ||
| 463 | |||
| 464 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
| 465 | static unsigned long sync_rcu_preempt_exp_count; | ||
| 466 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
| 467 | |||
| 468 | /* | ||
| 469 | * Return non-zero if there are any tasks in RCU read-side critical | ||
| 470 | * sections blocking the current preemptible-RCU expedited grace period. | ||
| 471 | * If there is no preemptible-RCU expedited grace period currently in | ||
| 472 | * progress, returns zero unconditionally. | ||
| 473 | */ | ||
| 474 | static int rcu_preempted_readers_exp(void) | ||
| 475 | { | ||
| 476 | return rcu_preempt_ctrlblk.exp_tasks != NULL; | ||
| 477 | } | ||
| 478 | |||
| 479 | /* | ||
| 480 | * Report the exit from RCU read-side critical section for the last task | ||
| 481 | * that queued itself during or before the current expedited preemptible-RCU | ||
| 482 | * grace period. | ||
| 483 | */ | ||
| 484 | static void rcu_report_exp_done(void) | ||
| 485 | { | ||
| 486 | wake_up(&sync_rcu_preempt_exp_wq); | ||
| 487 | } | ||
| 488 | |||
| 489 | /* | ||
| 490 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | ||
| 491 | * is to rely in the fact that there is but one CPU, and that it is | ||
| 492 | * illegal for a task to invoke synchronize_rcu_expedited() while in a | ||
| 493 | * preemptible-RCU read-side critical section. Therefore, any such | ||
| 494 | * critical sections must correspond to blocked tasks, which must therefore | ||
| 495 | * be on the ->blkd_tasks list. So just record the current head of the | ||
| 496 | * list in the ->exp_tasks pointer, and wait for all tasks including and | ||
| 497 | * after the task pointed to by ->exp_tasks to drain. | ||
| 498 | */ | ||
| 499 | void synchronize_rcu_expedited(void) | ||
| 500 | { | ||
| 501 | unsigned long flags; | ||
| 502 | struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk; | ||
| 503 | unsigned long snap; | ||
| 504 | |||
| 505 | barrier(); /* ensure prior action seen before grace period. */ | ||
| 506 | |||
| 507 | WARN_ON_ONCE(rcu_preempt_running_reader()); | ||
| 508 | |||
| 509 | /* | ||
| 510 | * Acquire lock so that there is only one preemptible RCU grace | ||
| 511 | * period in flight. Of course, if someone does the expedited | ||
| 512 | * grace period for us while we are acquiring the lock, just leave. | ||
| 513 | */ | ||
| 514 | snap = sync_rcu_preempt_exp_count + 1; | ||
| 515 | mutex_lock(&sync_rcu_preempt_exp_mutex); | ||
| 516 | if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count)) | ||
| 517 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
| 518 | |||
| 519 | local_irq_save(flags); | ||
| 520 | |||
| 521 | /* | ||
| 522 | * All RCU readers have to already be on blkd_tasks because | ||
| 523 | * we cannot legally be executing in an RCU read-side critical | ||
| 524 | * section. | ||
| 525 | */ | ||
| 526 | |||
| 527 | /* Snapshot current head of ->blkd_tasks list. */ | ||
| 528 | rpcp->exp_tasks = rpcp->blkd_tasks.next; | ||
| 529 | if (rpcp->exp_tasks == &rpcp->blkd_tasks) | ||
| 530 | rpcp->exp_tasks = NULL; | ||
| 531 | local_irq_restore(flags); | ||
| 532 | |||
| 533 | /* Wait for tail of ->blkd_tasks list to drain. */ | ||
| 534 | if (rcu_preempted_readers_exp()) | ||
| 535 | wait_event(sync_rcu_preempt_exp_wq, | ||
| 536 | !rcu_preempted_readers_exp()); | ||
| 537 | |||
| 538 | /* Clean up and exit. */ | ||
| 539 | barrier(); /* ensure expedited GP seen before counter increment. */ | ||
| 540 | sync_rcu_preempt_exp_count++; | ||
| 541 | unlock_mb_ret: | ||
| 542 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
| 543 | barrier(); /* ensure subsequent action seen after grace period. */ | ||
| 544 | } | ||
| 545 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | ||
| 546 | |||
| 547 | /* | ||
| 548 | * Does preemptible RCU need the CPU to stay out of dynticks mode? | ||
| 549 | */ | ||
| 550 | int rcu_preempt_needs_cpu(void) | ||
| 551 | { | ||
| 552 | if (!rcu_preempt_running_reader()) | ||
| 553 | rcu_preempt_cpu_qs(); | ||
| 554 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; | ||
| 555 | } | ||
| 556 | |||
| 557 | /* | ||
| 558 | * Check for a task exiting while in a preemptible -RCU read-side | ||
| 559 | * critical section, clean up if so. No need to issue warnings, | ||
| 560 | * as debug_check_no_locks_held() already does this if lockdep | ||
| 561 | * is enabled. | ||
| 562 | */ | ||
| 563 | void exit_rcu(void) | ||
| 564 | { | ||
| 565 | struct task_struct *t = current; | ||
| 566 | |||
| 567 | if (t->rcu_read_lock_nesting == 0) | ||
| 568 | return; | ||
| 569 | t->rcu_read_lock_nesting = 1; | ||
| 570 | rcu_read_unlock(); | ||
| 571 | } | ||
| 572 | |||
| 573 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Because preemptible RCU does not exist, it never has any callbacks | ||
| 577 | * to check. | ||
| 578 | */ | ||
| 579 | static void rcu_preempt_check_callbacks(void) | ||
| 580 | { | ||
| 581 | } | ||
| 582 | |||
| 583 | /* | ||
| 584 | * Because preemptible RCU does not exist, it never has any callbacks | ||
| 585 | * to remove. | ||
| 586 | */ | ||
| 587 | static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | ||
| 588 | { | ||
| 589 | } | ||
| 590 | |||
| 591 | /* | ||
| 592 | * Because preemptible RCU does not exist, it never has any callbacks | ||
| 593 | * to process. | ||
| 594 | */ | ||
| 595 | static void rcu_preempt_process_callbacks(void) | ||
| 596 | { | ||
| 597 | } | ||
| 598 | |||
| 599 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | ||
| 600 | |||
| 25 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 601 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 26 | 602 | ||
| 27 | #include <linux/kernel_stat.h> | 603 | #include <linux/kernel_stat.h> |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 2e2726d790b9..9d8e8fb2515f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -120,7 +120,7 @@ struct rcu_torture { | |||
| 120 | }; | 120 | }; |
| 121 | 121 | ||
| 122 | static LIST_HEAD(rcu_torture_freelist); | 122 | static LIST_HEAD(rcu_torture_freelist); |
| 123 | static struct rcu_torture *rcu_torture_current; | 123 | static struct rcu_torture __rcu *rcu_torture_current; |
| 124 | static long rcu_torture_current_version; | 124 | static long rcu_torture_current_version; |
| 125 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; | 125 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; |
| 126 | static DEFINE_SPINLOCK(rcu_torture_lock); | 126 | static DEFINE_SPINLOCK(rcu_torture_lock); |
| @@ -153,8 +153,10 @@ int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | |||
| 153 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ | 153 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ |
| 154 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ | 154 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ |
| 155 | static int fullstop = FULLSTOP_RMMOD; | 155 | static int fullstop = FULLSTOP_RMMOD; |
| 156 | DEFINE_MUTEX(fullstop_mutex); /* Protect fullstop transitions and spawning */ | 156 | /* |
| 157 | /* of kthreads. */ | 157 | * Protect fullstop transitions and spawning of kthreads. |
| 158 | */ | ||
| 159 | static DEFINE_MUTEX(fullstop_mutex); | ||
| 158 | 160 | ||
| 159 | /* | 161 | /* |
| 160 | * Detect and respond to a system shutdown. | 162 | * Detect and respond to a system shutdown. |
| @@ -303,6 +305,10 @@ static void rcu_read_delay(struct rcu_random_state *rrsp) | |||
| 303 | mdelay(longdelay_ms); | 305 | mdelay(longdelay_ms); |
| 304 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) | 306 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) |
| 305 | udelay(shortdelay_us); | 307 | udelay(shortdelay_us); |
| 308 | #ifdef CONFIG_PREEMPT | ||
| 309 | if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000))) | ||
| 310 | preempt_schedule(); /* No QS if preempt_disable() in effect */ | ||
| 311 | #endif | ||
| 306 | } | 312 | } |
| 307 | 313 | ||
| 308 | static void rcu_torture_read_unlock(int idx) __releases(RCU) | 314 | static void rcu_torture_read_unlock(int idx) __releases(RCU) |
| @@ -536,6 +542,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp) | |||
| 536 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); | 542 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); |
| 537 | if (!delay) | 543 | if (!delay) |
| 538 | schedule_timeout_interruptible(longdelay); | 544 | schedule_timeout_interruptible(longdelay); |
| 545 | else | ||
| 546 | rcu_read_delay(rrsp); | ||
| 539 | } | 547 | } |
| 540 | 548 | ||
| 541 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) | 549 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) |
| @@ -731,7 +739,8 @@ rcu_torture_writer(void *arg) | |||
| 731 | continue; | 739 | continue; |
| 732 | rp->rtort_pipe_count = 0; | 740 | rp->rtort_pipe_count = 0; |
| 733 | udelay(rcu_random(&rand) & 0x3ff); | 741 | udelay(rcu_random(&rand) & 0x3ff); |
| 734 | old_rp = rcu_torture_current; | 742 | old_rp = rcu_dereference_check(rcu_torture_current, |
| 743 | current == writer_task); | ||
| 735 | rp->rtort_mbtest = 1; | 744 | rp->rtort_mbtest = 1; |
| 736 | rcu_assign_pointer(rcu_torture_current, rp); | 745 | rcu_assign_pointer(rcu_torture_current, rp); |
| 737 | smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ | 746 | smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d5bc43976c5a..ccdc04c47981 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -143,6 +143,11 @@ module_param(blimit, int, 0); | |||
| 143 | module_param(qhimark, int, 0); | 143 | module_param(qhimark, int, 0); |
| 144 | module_param(qlowmark, int, 0); | 144 | module_param(qlowmark, int, 0); |
| 145 | 145 | ||
| 146 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | ||
| 147 | int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT; | ||
| 148 | module_param(rcu_cpu_stall_suppress, int, 0644); | ||
| 149 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
| 150 | |||
| 146 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 151 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
| 147 | static int rcu_pending(int cpu); | 152 | static int rcu_pending(int cpu); |
| 148 | 153 | ||
| @@ -450,7 +455,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
| 450 | 455 | ||
| 451 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 456 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
| 452 | 457 | ||
| 453 | int rcu_cpu_stall_panicking __read_mostly; | 458 | int rcu_cpu_stall_suppress __read_mostly; |
| 454 | 459 | ||
| 455 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 460 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
| 456 | { | 461 | { |
| @@ -482,8 +487,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 482 | rcu_print_task_stall(rnp); | 487 | rcu_print_task_stall(rnp); |
| 483 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 488 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 484 | 489 | ||
| 485 | /* OK, time to rat on our buddy... */ | 490 | /* |
| 486 | 491 | * OK, time to rat on our buddy... | |
| 492 | * See Documentation/RCU/stallwarn.txt for info on how to debug | ||
| 493 | * RCU CPU stall warnings. | ||
| 494 | */ | ||
| 487 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", | 495 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", |
| 488 | rsp->name); | 496 | rsp->name); |
| 489 | rcu_for_each_leaf_node(rsp, rnp) { | 497 | rcu_for_each_leaf_node(rsp, rnp) { |
| @@ -512,6 +520,11 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
| 512 | unsigned long flags; | 520 | unsigned long flags; |
| 513 | struct rcu_node *rnp = rcu_get_root(rsp); | 521 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 514 | 522 | ||
| 523 | /* | ||
| 524 | * OK, time to rat on ourselves... | ||
| 525 | * See Documentation/RCU/stallwarn.txt for info on how to debug | ||
| 526 | * RCU CPU stall warnings. | ||
| 527 | */ | ||
| 515 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 528 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
| 516 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 529 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
| 517 | trigger_all_cpu_backtrace(); | 530 | trigger_all_cpu_backtrace(); |
| @@ -530,11 +543,11 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 530 | long delta; | 543 | long delta; |
| 531 | struct rcu_node *rnp; | 544 | struct rcu_node *rnp; |
| 532 | 545 | ||
| 533 | if (rcu_cpu_stall_panicking) | 546 | if (rcu_cpu_stall_suppress) |
| 534 | return; | 547 | return; |
| 535 | delta = jiffies - rsp->jiffies_stall; | 548 | delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); |
| 536 | rnp = rdp->mynode; | 549 | rnp = rdp->mynode; |
| 537 | if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { | 550 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) { |
| 538 | 551 | ||
| 539 | /* We haven't checked in, so go dump stack. */ | 552 | /* We haven't checked in, so go dump stack. */ |
| 540 | print_cpu_stall(rsp); | 553 | print_cpu_stall(rsp); |
| @@ -548,10 +561,26 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 548 | 561 | ||
| 549 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | 562 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) |
| 550 | { | 563 | { |
| 551 | rcu_cpu_stall_panicking = 1; | 564 | rcu_cpu_stall_suppress = 1; |
| 552 | return NOTIFY_DONE; | 565 | return NOTIFY_DONE; |
| 553 | } | 566 | } |
| 554 | 567 | ||
| 568 | /** | ||
| 569 | * rcu_cpu_stall_reset - prevent further stall warnings in current grace period | ||
| 570 | * | ||
| 571 | * Set the stall-warning timeout way off into the future, thus preventing | ||
| 572 | * any RCU CPU stall-warning messages from appearing in the current set of | ||
| 573 | * RCU grace periods. | ||
| 574 | * | ||
| 575 | * The caller must disable hard irqs. | ||
| 576 | */ | ||
| 577 | void rcu_cpu_stall_reset(void) | ||
| 578 | { | ||
| 579 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
| 580 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
| 581 | rcu_preempt_stall_reset(); | ||
| 582 | } | ||
| 583 | |||
| 555 | static struct notifier_block rcu_panic_block = { | 584 | static struct notifier_block rcu_panic_block = { |
| 556 | .notifier_call = rcu_panic, | 585 | .notifier_call = rcu_panic, |
| 557 | }; | 586 | }; |
| @@ -571,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 571 | { | 600 | { |
| 572 | } | 601 | } |
| 573 | 602 | ||
| 603 | void rcu_cpu_stall_reset(void) | ||
| 604 | { | ||
| 605 | } | ||
| 606 | |||
| 574 | static void __init check_cpu_stall_init(void) | 607 | static void __init check_cpu_stall_init(void) |
| 575 | { | 608 | { |
| 576 | } | 609 | } |
| @@ -712,7 +745,7 @@ static void | |||
| 712 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 745 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
| 713 | __releases(rcu_get_root(rsp)->lock) | 746 | __releases(rcu_get_root(rsp)->lock) |
| 714 | { | 747 | { |
| 715 | struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | 748 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 716 | struct rcu_node *rnp = rcu_get_root(rsp); | 749 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 717 | 750 | ||
| 718 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { | 751 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { |
| @@ -960,7 +993,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 960 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) |
| 961 | { | 994 | { |
| 962 | int i; | 995 | int i; |
| 963 | struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | 996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 964 | 997 | ||
| 965 | if (rdp->nxtlist == NULL) | 998 | if (rdp->nxtlist == NULL) |
| 966 | return; /* irqs disabled, so comparison is stable. */ | 999 | return; /* irqs disabled, so comparison is stable. */ |
| @@ -971,6 +1004,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | |||
| 971 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
| 972 | rdp->nxttail[i] = &rdp->nxtlist; | 1005 | rdp->nxttail[i] = &rdp->nxtlist; |
| 973 | rsp->orphan_qlen += rdp->qlen; | 1006 | rsp->orphan_qlen += rdp->qlen; |
| 1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
| 974 | rdp->qlen = 0; | 1008 | rdp->qlen = 0; |
| 975 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | 1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ |
| 976 | } | 1010 | } |
| @@ -984,7 +1018,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
| 984 | struct rcu_data *rdp; | 1018 | struct rcu_data *rdp; |
| 985 | 1019 | ||
| 986 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
| 987 | rdp = rsp->rda[smp_processor_id()]; | 1021 | rdp = this_cpu_ptr(rsp->rda); |
| 988 | if (rsp->orphan_cbs_list == NULL) { | 1022 | if (rsp->orphan_cbs_list == NULL) { |
| 989 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
| 990 | return; | 1024 | return; |
| @@ -992,6 +1026,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
| 992 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | 1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; |
| 993 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | 1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; |
| 994 | rdp->qlen += rsp->orphan_qlen; | 1028 | rdp->qlen += rsp->orphan_qlen; |
| 1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
| 995 | rsp->orphan_cbs_list = NULL; | 1030 | rsp->orphan_cbs_list = NULL; |
| 996 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | 1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; |
| 997 | rsp->orphan_qlen = 0; | 1032 | rsp->orphan_qlen = 0; |
| @@ -1007,7 +1042,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1007 | unsigned long flags; | 1042 | unsigned long flags; |
| 1008 | unsigned long mask; | 1043 | unsigned long mask; |
| 1009 | int need_report = 0; | 1044 | int need_report = 0; |
| 1010 | struct rcu_data *rdp = rsp->rda[cpu]; | 1045 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1011 | struct rcu_node *rnp; | 1046 | struct rcu_node *rnp; |
| 1012 | 1047 | ||
| 1013 | /* Exclude any attempts to start a new grace period. */ | 1048 | /* Exclude any attempts to start a new grace period. */ |
| @@ -1123,6 +1158,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1123 | 1158 | ||
| 1124 | /* Update count, and requeue any remaining callbacks. */ | 1159 | /* Update count, and requeue any remaining callbacks. */ |
| 1125 | rdp->qlen -= count; | 1160 | rdp->qlen -= count; |
| 1161 | rdp->n_cbs_invoked += count; | ||
| 1126 | if (list != NULL) { | 1162 | if (list != NULL) { |
| 1127 | *tail = rdp->nxtlist; | 1163 | *tail = rdp->nxtlist; |
| 1128 | rdp->nxtlist = list; | 1164 | rdp->nxtlist = list; |
| @@ -1226,7 +1262,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | |||
| 1226 | cpu = rnp->grplo; | 1262 | cpu = rnp->grplo; |
| 1227 | bit = 1; | 1263 | bit = 1; |
| 1228 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { | 1264 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { |
| 1229 | if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) | 1265 | if ((rnp->qsmask & bit) != 0 && |
| 1266 | f(per_cpu_ptr(rsp->rda, cpu))) | ||
| 1230 | mask |= bit; | 1267 | mask |= bit; |
| 1231 | } | 1268 | } |
| 1232 | if (mask != 0) { | 1269 | if (mask != 0) { |
| @@ -1402,7 +1439,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1402 | * a quiescent state betweentimes. | 1439 | * a quiescent state betweentimes. |
| 1403 | */ | 1440 | */ |
| 1404 | local_irq_save(flags); | 1441 | local_irq_save(flags); |
| 1405 | rdp = rsp->rda[smp_processor_id()]; | 1442 | rdp = this_cpu_ptr(rsp->rda); |
| 1406 | rcu_process_gp_end(rsp, rdp); | 1443 | rcu_process_gp_end(rsp, rdp); |
| 1407 | check_for_new_grace_period(rsp, rdp); | 1444 | check_for_new_grace_period(rsp, rdp); |
| 1408 | 1445 | ||
| @@ -1701,7 +1738,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 1701 | { | 1738 | { |
| 1702 | unsigned long flags; | 1739 | unsigned long flags; |
| 1703 | int i; | 1740 | int i; |
| 1704 | struct rcu_data *rdp = rsp->rda[cpu]; | 1741 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1705 | struct rcu_node *rnp = rcu_get_root(rsp); | 1742 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1706 | 1743 | ||
| 1707 | /* Set up local state, ensuring consistent view of global state. */ | 1744 | /* Set up local state, ensuring consistent view of global state. */ |
| @@ -1729,7 +1766,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
| 1729 | { | 1766 | { |
| 1730 | unsigned long flags; | 1767 | unsigned long flags; |
| 1731 | unsigned long mask; | 1768 | unsigned long mask; |
| 1732 | struct rcu_data *rdp = rsp->rda[cpu]; | 1769 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1733 | struct rcu_node *rnp = rcu_get_root(rsp); | 1770 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1734 | 1771 | ||
| 1735 | /* Set up local state, ensuring consistent view of global state. */ | 1772 | /* Set up local state, ensuring consistent view of global state. */ |
| @@ -1865,7 +1902,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
| 1865 | /* | 1902 | /* |
| 1866 | * Helper function for rcu_init() that initializes one rcu_state structure. | 1903 | * Helper function for rcu_init() that initializes one rcu_state structure. |
| 1867 | */ | 1904 | */ |
| 1868 | static void __init rcu_init_one(struct rcu_state *rsp) | 1905 | static void __init rcu_init_one(struct rcu_state *rsp, |
| 1906 | struct rcu_data __percpu *rda) | ||
| 1869 | { | 1907 | { |
| 1870 | static char *buf[] = { "rcu_node_level_0", | 1908 | static char *buf[] = { "rcu_node_level_0", |
| 1871 | "rcu_node_level_1", | 1909 | "rcu_node_level_1", |
| @@ -1918,37 +1956,23 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
| 1918 | } | 1956 | } |
| 1919 | } | 1957 | } |
| 1920 | 1958 | ||
| 1959 | rsp->rda = rda; | ||
| 1921 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 1960 | rnp = rsp->level[NUM_RCU_LVLS - 1]; |
| 1922 | for_each_possible_cpu(i) { | 1961 | for_each_possible_cpu(i) { |
| 1923 | while (i > rnp->grphi) | 1962 | while (i > rnp->grphi) |
| 1924 | rnp++; | 1963 | rnp++; |
| 1925 | rsp->rda[i]->mynode = rnp; | 1964 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
| 1926 | rcu_boot_init_percpu_data(i, rsp); | 1965 | rcu_boot_init_percpu_data(i, rsp); |
| 1927 | } | 1966 | } |
| 1928 | } | 1967 | } |
| 1929 | 1968 | ||
| 1930 | /* | ||
| 1931 | * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used | ||
| 1932 | * nowhere else! Assigns leaf node pointers into each CPU's rcu_data | ||
| 1933 | * structure. | ||
| 1934 | */ | ||
| 1935 | #define RCU_INIT_FLAVOR(rsp, rcu_data) \ | ||
| 1936 | do { \ | ||
| 1937 | int i; \ | ||
| 1938 | \ | ||
| 1939 | for_each_possible_cpu(i) { \ | ||
| 1940 | (rsp)->rda[i] = &per_cpu(rcu_data, i); \ | ||
| 1941 | } \ | ||
| 1942 | rcu_init_one(rsp); \ | ||
| 1943 | } while (0) | ||
| 1944 | |||
| 1945 | void __init rcu_init(void) | 1969 | void __init rcu_init(void) |
| 1946 | { | 1970 | { |
| 1947 | int cpu; | 1971 | int cpu; |
| 1948 | 1972 | ||
| 1949 | rcu_bootup_announce(); | 1973 | rcu_bootup_announce(); |
| 1950 | RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); | 1974 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
| 1951 | RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); | 1975 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
| 1952 | __rcu_init_preempt(); | 1976 | __rcu_init_preempt(); |
| 1953 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 1977 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| 1954 | 1978 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 14c040b18ed0..91d4170c5c13 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -202,6 +202,9 @@ struct rcu_data { | |||
| 202 | long qlen; /* # of queued callbacks */ | 202 | long qlen; /* # of queued callbacks */ |
| 203 | long qlen_last_fqs_check; | 203 | long qlen_last_fqs_check; |
| 204 | /* qlen at last check for QS forcing */ | 204 | /* qlen at last check for QS forcing */ |
| 205 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | ||
| 206 | unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ | ||
| 207 | unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ | ||
| 205 | unsigned long n_force_qs_snap; | 208 | unsigned long n_force_qs_snap; |
| 206 | /* did other CPU force QS recently? */ | 209 | /* did other CPU force QS recently? */ |
| 207 | long blimit; /* Upper limit on a processed batch */ | 210 | long blimit; /* Upper limit on a processed batch */ |
| @@ -254,19 +257,23 @@ struct rcu_data { | |||
| 254 | #define RCU_STALL_DELAY_DELTA 0 | 257 | #define RCU_STALL_DELAY_DELTA 0 |
| 255 | #endif | 258 | #endif |
| 256 | 259 | ||
| 257 | #define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) | 260 | #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ |
| 261 | RCU_STALL_DELAY_DELTA) | ||
| 258 | /* for rsp->jiffies_stall */ | 262 | /* for rsp->jiffies_stall */ |
| 259 | #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) | 263 | #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) |
| 260 | /* for rsp->jiffies_stall */ | 264 | /* for rsp->jiffies_stall */ |
| 261 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ | 265 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ |
| 262 | /* to take at least one */ | 266 | /* to take at least one */ |
| 263 | /* scheduling clock irq */ | 267 | /* scheduling clock irq */ |
| 264 | /* before ratting on them. */ | 268 | /* before ratting on them. */ |
| 265 | 269 | ||
| 266 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 270 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE |
| 271 | #define RCU_CPU_STALL_SUPPRESS_INIT 0 | ||
| 272 | #else | ||
| 273 | #define RCU_CPU_STALL_SUPPRESS_INIT 1 | ||
| 274 | #endif | ||
| 267 | 275 | ||
| 268 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | 276 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 269 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | ||
| 270 | 277 | ||
| 271 | /* | 278 | /* |
| 272 | * RCU global state, including node hierarchy. This hierarchy is | 279 | * RCU global state, including node hierarchy. This hierarchy is |
| @@ -283,7 +290,7 @@ struct rcu_state { | |||
| 283 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ | 290 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ |
| 284 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ | 291 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ |
| 285 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ | 292 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ |
| 286 | struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ | 293 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ |
| 287 | 294 | ||
| 288 | /* The following fields are guarded by the root rcu_node's lock. */ | 295 | /* The following fields are guarded by the root rcu_node's lock. */ |
| 289 | 296 | ||
| @@ -365,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | |||
| 365 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 372 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
| 366 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 373 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
| 367 | static void rcu_print_task_stall(struct rcu_node *rnp); | 374 | static void rcu_print_task_stall(struct rcu_node *rnp); |
| 375 | static void rcu_preempt_stall_reset(void); | ||
| 368 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 376 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 369 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 377 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
| 370 | #ifdef CONFIG_HOTPLUG_CPU | 378 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 0e4f420245d9..71a4147473f9 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -57,7 +57,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
| 57 | printk(KERN_INFO | 57 | printk(KERN_INFO |
| 58 | "\tRCU-based detection of stalled CPUs is disabled.\n"); | 58 | "\tRCU-based detection of stalled CPUs is disabled.\n"); |
| 59 | #endif | 59 | #endif |
| 60 | #ifndef CONFIG_RCU_CPU_STALL_VERBOSE | 60 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) |
| 61 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | 61 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); |
| 62 | #endif | 62 | #endif |
| 63 | #if NUM_RCU_LVL_4 != 0 | 63 | #if NUM_RCU_LVL_4 != 0 |
| @@ -154,7 +154,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 154 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 154 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
| 155 | 155 | ||
| 156 | /* Possibly blocking in an RCU read-side critical section. */ | 156 | /* Possibly blocking in an RCU read-side critical section. */ |
| 157 | rdp = rcu_preempt_state.rda[cpu]; | 157 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); |
| 158 | rnp = rdp->mynode; | 158 | rnp = rdp->mynode; |
| 159 | raw_spin_lock_irqsave(&rnp->lock, flags); | 159 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 160 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 160 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
| @@ -201,7 +201,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 201 | */ | 201 | */ |
| 202 | void __rcu_read_lock(void) | 202 | void __rcu_read_lock(void) |
| 203 | { | 203 | { |
| 204 | ACCESS_ONCE(current->rcu_read_lock_nesting)++; | 204 | current->rcu_read_lock_nesting++; |
| 205 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ | 205 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ |
| 206 | } | 206 | } |
| 207 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | 207 | EXPORT_SYMBOL_GPL(__rcu_read_lock); |
| @@ -344,7 +344,9 @@ void __rcu_read_unlock(void) | |||
| 344 | struct task_struct *t = current; | 344 | struct task_struct *t = current; |
| 345 | 345 | ||
| 346 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | 346 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ |
| 347 | if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && | 347 | --t->rcu_read_lock_nesting; |
| 348 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | ||
| 349 | if (t->rcu_read_lock_nesting == 0 && | ||
| 348 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 350 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
| 349 | rcu_read_unlock_special(t); | 351 | rcu_read_unlock_special(t); |
| 350 | #ifdef CONFIG_PROVE_LOCKING | 352 | #ifdef CONFIG_PROVE_LOCKING |
| @@ -417,6 +419,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp) | |||
| 417 | } | 419 | } |
| 418 | } | 420 | } |
| 419 | 421 | ||
| 422 | /* | ||
| 423 | * Suppress preemptible RCU's CPU stall warnings by pushing the | ||
| 424 | * time of the next stall-warning message comfortably far into the | ||
| 425 | * future. | ||
| 426 | */ | ||
| 427 | static void rcu_preempt_stall_reset(void) | ||
| 428 | { | ||
| 429 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
| 430 | } | ||
| 431 | |||
| 420 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 432 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 421 | 433 | ||
| 422 | /* | 434 | /* |
| @@ -546,9 +558,11 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
| 546 | * | 558 | * |
| 547 | * Control will return to the caller some time after a full grace | 559 | * Control will return to the caller some time after a full grace |
| 548 | * period has elapsed, in other words after all currently executing RCU | 560 | * period has elapsed, in other words after all currently executing RCU |
| 549 | * read-side critical sections have completed. RCU read-side critical | 561 | * read-side critical sections have completed. Note, however, that |
| 550 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | 562 | * upon return from synchronize_rcu(), the caller might well be executing |
| 551 | * and may be nested. | 563 | * concurrently with new RCU read-side critical sections that began while |
| 564 | * synchronize_rcu() was waiting. RCU read-side critical sections are | ||
| 565 | * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. | ||
| 552 | */ | 566 | */ |
| 553 | void synchronize_rcu(void) | 567 | void synchronize_rcu(void) |
| 554 | { | 568 | { |
| @@ -771,7 +785,7 @@ static void rcu_preempt_send_cbs_to_orphanage(void) | |||
| 771 | */ | 785 | */ |
| 772 | static void __init __rcu_init_preempt(void) | 786 | static void __init __rcu_init_preempt(void) |
| 773 | { | 787 | { |
| 774 | RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); | 788 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); |
| 775 | } | 789 | } |
| 776 | 790 | ||
| 777 | /* | 791 | /* |
| @@ -865,6 +879,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp) | |||
| 865 | { | 879 | { |
| 866 | } | 880 | } |
| 867 | 881 | ||
| 882 | /* | ||
| 883 | * Because preemptible RCU does not exist, there is no need to suppress | ||
| 884 | * its CPU stall warnings. | ||
| 885 | */ | ||
| 886 | static void rcu_preempt_stall_reset(void) | ||
| 887 | { | ||
| 888 | } | ||
| 889 | |||
| 868 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 890 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 869 | 891 | ||
| 870 | /* | 892 | /* |
| @@ -919,15 +941,6 @@ static void rcu_preempt_process_callbacks(void) | |||
| 919 | } | 941 | } |
| 920 | 942 | ||
| 921 | /* | 943 | /* |
| 922 | * In classic RCU, call_rcu() is just call_rcu_sched(). | ||
| 923 | */ | ||
| 924 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
| 925 | { | ||
| 926 | call_rcu_sched(head, func); | ||
| 927 | } | ||
| 928 | EXPORT_SYMBOL_GPL(call_rcu); | ||
| 929 | |||
| 930 | /* | ||
| 931 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 944 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
| 932 | * But because preemptable RCU does not exist, map to rcu-sched. | 945 | * But because preemptable RCU does not exist, map to rcu-sched. |
| 933 | */ | 946 | */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 36c95b45738e..d15430b9d122 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
| @@ -64,7 +64,9 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 64 | rdp->dynticks_fqs); | 64 | rdp->dynticks_fqs); |
| 65 | #endif /* #ifdef CONFIG_NO_HZ */ | 65 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 66 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 66 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
| 67 | seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit); | 67 | seq_printf(m, " ql=%ld b=%ld", rdp->qlen, rdp->blimit); |
| 68 | seq_printf(m, " ci=%lu co=%lu ca=%lu\n", | ||
| 69 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | ||
| 68 | } | 70 | } |
| 69 | 71 | ||
| 70 | #define PRINT_RCU_DATA(name, func, m) \ | 72 | #define PRINT_RCU_DATA(name, func, m) \ |
| @@ -119,7 +121,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
| 119 | rdp->dynticks_fqs); | 121 | rdp->dynticks_fqs); |
| 120 | #endif /* #ifdef CONFIG_NO_HZ */ | 122 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 121 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 123 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
| 122 | seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit); | 124 | seq_printf(m, ",%ld,%ld", rdp->qlen, rdp->blimit); |
| 125 | seq_printf(m, ",%lu,%lu,%lu\n", | ||
| 126 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | ||
| 123 | } | 127 | } |
| 124 | 128 | ||
| 125 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 129 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
| @@ -128,7 +132,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) | |||
| 128 | #ifdef CONFIG_NO_HZ | 132 | #ifdef CONFIG_NO_HZ |
| 129 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); | 133 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); |
| 130 | #endif /* #ifdef CONFIG_NO_HZ */ | 134 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 131 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); | 135 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); |
| 132 | #ifdef CONFIG_TREE_PREEMPT_RCU | 136 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 133 | seq_puts(m, "\"rcu_preempt:\"\n"); | 137 | seq_puts(m, "\"rcu_preempt:\"\n"); |
| 134 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); | 138 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); |
| @@ -262,7 +266,7 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) | |||
| 262 | struct rcu_data *rdp; | 266 | struct rcu_data *rdp; |
| 263 | 267 | ||
| 264 | for_each_possible_cpu(cpu) { | 268 | for_each_possible_cpu(cpu) { |
| 265 | rdp = rsp->rda[cpu]; | 269 | rdp = per_cpu_ptr(rsp->rda, cpu); |
| 266 | if (rdp->beenonline) | 270 | if (rdp->beenonline) |
| 267 | print_one_rcu_pending(m, rdp); | 271 | print_one_rcu_pending(m, rdp); |
| 268 | } | 272 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index c0d2067f3e0d..d42992bccdfa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -426,9 +426,7 @@ struct root_domain { | |||
| 426 | */ | 426 | */ |
| 427 | cpumask_var_t rto_mask; | 427 | cpumask_var_t rto_mask; |
| 428 | atomic_t rto_count; | 428 | atomic_t rto_count; |
| 429 | #ifdef CONFIG_SMP | ||
| 430 | struct cpupri cpupri; | 429 | struct cpupri cpupri; |
| 431 | #endif | ||
| 432 | }; | 430 | }; |
| 433 | 431 | ||
| 434 | /* | 432 | /* |
| @@ -437,7 +435,7 @@ struct root_domain { | |||
| 437 | */ | 435 | */ |
| 438 | static struct root_domain def_root_domain; | 436 | static struct root_domain def_root_domain; |
| 439 | 437 | ||
| 440 | #endif | 438 | #endif /* CONFIG_SMP */ |
| 441 | 439 | ||
| 442 | /* | 440 | /* |
| 443 | * This is the main, per-CPU runqueue data structure. | 441 | * This is the main, per-CPU runqueue data structure. |
| @@ -488,11 +486,12 @@ struct rq { | |||
| 488 | */ | 486 | */ |
| 489 | unsigned long nr_uninterruptible; | 487 | unsigned long nr_uninterruptible; |
| 490 | 488 | ||
| 491 | struct task_struct *curr, *idle; | 489 | struct task_struct *curr, *idle, *stop; |
| 492 | unsigned long next_balance; | 490 | unsigned long next_balance; |
| 493 | struct mm_struct *prev_mm; | 491 | struct mm_struct *prev_mm; |
| 494 | 492 | ||
| 495 | u64 clock; | 493 | u64 clock; |
| 494 | u64 clock_task; | ||
| 496 | 495 | ||
| 497 | atomic_t nr_iowait; | 496 | atomic_t nr_iowait; |
| 498 | 497 | ||
| @@ -520,6 +519,10 @@ struct rq { | |||
| 520 | u64 avg_idle; | 519 | u64 avg_idle; |
| 521 | #endif | 520 | #endif |
| 522 | 521 | ||
| 522 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 523 | u64 prev_irq_time; | ||
| 524 | #endif | ||
| 525 | |||
| 523 | /* calc_load related fields */ | 526 | /* calc_load related fields */ |
| 524 | unsigned long calc_load_update; | 527 | unsigned long calc_load_update; |
| 525 | long calc_load_active; | 528 | long calc_load_active; |
| @@ -643,10 +646,22 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 643 | 646 | ||
| 644 | #endif /* CONFIG_CGROUP_SCHED */ | 647 | #endif /* CONFIG_CGROUP_SCHED */ |
| 645 | 648 | ||
| 649 | static u64 irq_time_cpu(int cpu); | ||
| 650 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
| 651 | |||
| 646 | inline void update_rq_clock(struct rq *rq) | 652 | inline void update_rq_clock(struct rq *rq) |
| 647 | { | 653 | { |
| 648 | if (!rq->skip_clock_update) | 654 | if (!rq->skip_clock_update) { |
| 649 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 655 | int cpu = cpu_of(rq); |
| 656 | u64 irq_time; | ||
| 657 | |||
| 658 | rq->clock = sched_clock_cpu(cpu); | ||
| 659 | irq_time = irq_time_cpu(cpu); | ||
| 660 | if (rq->clock - irq_time > rq->clock_task) | ||
| 661 | rq->clock_task = rq->clock - irq_time; | ||
| 662 | |||
| 663 | sched_irq_time_avg_update(rq, irq_time); | ||
| 664 | } | ||
| 650 | } | 665 | } |
| 651 | 666 | ||
| 652 | /* | 667 | /* |
| @@ -723,7 +738,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
| 723 | size_t cnt, loff_t *ppos) | 738 | size_t cnt, loff_t *ppos) |
| 724 | { | 739 | { |
| 725 | char buf[64]; | 740 | char buf[64]; |
| 726 | char *cmp = buf; | 741 | char *cmp; |
| 727 | int neg = 0; | 742 | int neg = 0; |
| 728 | int i; | 743 | int i; |
| 729 | 744 | ||
| @@ -734,6 +749,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
| 734 | return -EFAULT; | 749 | return -EFAULT; |
| 735 | 750 | ||
| 736 | buf[cnt] = 0; | 751 | buf[cnt] = 0; |
| 752 | cmp = strstrip(buf); | ||
| 737 | 753 | ||
| 738 | if (strncmp(buf, "NO_", 3) == 0) { | 754 | if (strncmp(buf, "NO_", 3) == 0) { |
| 739 | neg = 1; | 755 | neg = 1; |
| @@ -741,9 +757,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
| 741 | } | 757 | } |
| 742 | 758 | ||
| 743 | for (i = 0; sched_feat_names[i]; i++) { | 759 | for (i = 0; sched_feat_names[i]; i++) { |
| 744 | int len = strlen(sched_feat_names[i]); | 760 | if (strcmp(cmp, sched_feat_names[i]) == 0) { |
| 745 | |||
| 746 | if (strncmp(cmp, sched_feat_names[i], len) == 0) { | ||
| 747 | if (neg) | 761 | if (neg) |
| 748 | sysctl_sched_features &= ~(1UL << i); | 762 | sysctl_sched_features &= ~(1UL << i); |
| 749 | else | 763 | else |
| @@ -1840,7 +1854,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
| 1840 | 1854 | ||
| 1841 | static const struct sched_class rt_sched_class; | 1855 | static const struct sched_class rt_sched_class; |
| 1842 | 1856 | ||
| 1843 | #define sched_class_highest (&rt_sched_class) | 1857 | #define sched_class_highest (&stop_sched_class) |
| 1844 | #define for_each_class(class) \ | 1858 | #define for_each_class(class) \ |
| 1845 | for (class = sched_class_highest; class; class = class->next) | 1859 | for (class = sched_class_highest; class; class = class->next) |
| 1846 | 1860 | ||
| @@ -1858,12 +1872,6 @@ static void dec_nr_running(struct rq *rq) | |||
| 1858 | 1872 | ||
| 1859 | static void set_load_weight(struct task_struct *p) | 1873 | static void set_load_weight(struct task_struct *p) |
| 1860 | { | 1874 | { |
| 1861 | if (task_has_rt_policy(p)) { | ||
| 1862 | p->se.load.weight = 0; | ||
| 1863 | p->se.load.inv_weight = WMULT_CONST; | ||
| 1864 | return; | ||
| 1865 | } | ||
| 1866 | |||
| 1867 | /* | 1875 | /* |
| 1868 | * SCHED_IDLE tasks get minimal weight: | 1876 | * SCHED_IDLE tasks get minimal weight: |
| 1869 | */ | 1877 | */ |
| @@ -1917,13 +1925,132 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
| 1917 | dec_nr_running(rq); | 1925 | dec_nr_running(rq); |
| 1918 | } | 1926 | } |
| 1919 | 1927 | ||
| 1928 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1929 | |||
| 1930 | /* | ||
| 1931 | * There are no locks covering percpu hardirq/softirq time. | ||
| 1932 | * They are only modified in account_system_vtime, on corresponding CPU | ||
| 1933 | * with interrupts disabled. So, writes are safe. | ||
| 1934 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
| 1935 | * This may result in other CPU reading this CPU's irq time and can | ||
| 1936 | * race with irq/account_system_vtime on this CPU. We would either get old | ||
| 1937 | * or new value (or semi updated value on 32 bit) with a side effect of | ||
| 1938 | * accounting a slice of irq time to wrong task when irq is in progress | ||
| 1939 | * while we read rq->clock. That is a worthy compromise in place of having | ||
| 1940 | * locks on each irq in account_system_time. | ||
| 1941 | */ | ||
| 1942 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | ||
| 1943 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | ||
| 1944 | |||
| 1945 | static DEFINE_PER_CPU(u64, irq_start_time); | ||
| 1946 | static int sched_clock_irqtime; | ||
| 1947 | |||
| 1948 | void enable_sched_clock_irqtime(void) | ||
| 1949 | { | ||
| 1950 | sched_clock_irqtime = 1; | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | void disable_sched_clock_irqtime(void) | ||
| 1954 | { | ||
| 1955 | sched_clock_irqtime = 0; | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | static u64 irq_time_cpu(int cpu) | ||
| 1959 | { | ||
| 1960 | if (!sched_clock_irqtime) | ||
| 1961 | return 0; | ||
| 1962 | |||
| 1963 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
| 1964 | } | ||
| 1965 | |||
| 1966 | void account_system_vtime(struct task_struct *curr) | ||
| 1967 | { | ||
| 1968 | unsigned long flags; | ||
| 1969 | int cpu; | ||
| 1970 | u64 now, delta; | ||
| 1971 | |||
| 1972 | if (!sched_clock_irqtime) | ||
| 1973 | return; | ||
| 1974 | |||
| 1975 | local_irq_save(flags); | ||
| 1976 | |||
| 1977 | cpu = smp_processor_id(); | ||
| 1978 | now = sched_clock_cpu(cpu); | ||
| 1979 | delta = now - per_cpu(irq_start_time, cpu); | ||
| 1980 | per_cpu(irq_start_time, cpu) = now; | ||
| 1981 | /* | ||
| 1982 | * We do not account for softirq time from ksoftirqd here. | ||
| 1983 | * We want to continue accounting softirq time to ksoftirqd thread | ||
| 1984 | * in that case, so as not to confuse scheduler with a special task | ||
| 1985 | * that do not consume any time, but still wants to run. | ||
| 1986 | */ | ||
| 1987 | if (hardirq_count()) | ||
| 1988 | per_cpu(cpu_hardirq_time, cpu) += delta; | ||
| 1989 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | ||
| 1990 | per_cpu(cpu_softirq_time, cpu) += delta; | ||
| 1991 | |||
| 1992 | local_irq_restore(flags); | ||
| 1993 | } | ||
| 1994 | EXPORT_SYMBOL_GPL(account_system_vtime); | ||
| 1995 | |||
| 1996 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | ||
| 1997 | { | ||
| 1998 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | ||
| 1999 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | ||
| 2000 | rq->prev_irq_time = curr_irq_time; | ||
| 2001 | sched_rt_avg_update(rq, delta_irq); | ||
| 2002 | } | ||
| 2003 | } | ||
| 2004 | |||
| 2005 | #else | ||
| 2006 | |||
| 2007 | static u64 irq_time_cpu(int cpu) | ||
| 2008 | { | ||
| 2009 | return 0; | ||
| 2010 | } | ||
| 2011 | |||
| 2012 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | ||
| 2013 | |||
| 2014 | #endif | ||
| 2015 | |||
| 1920 | #include "sched_idletask.c" | 2016 | #include "sched_idletask.c" |
| 1921 | #include "sched_fair.c" | 2017 | #include "sched_fair.c" |
| 1922 | #include "sched_rt.c" | 2018 | #include "sched_rt.c" |
| 2019 | #include "sched_stoptask.c" | ||
| 1923 | #ifdef CONFIG_SCHED_DEBUG | 2020 | #ifdef CONFIG_SCHED_DEBUG |
| 1924 | # include "sched_debug.c" | 2021 | # include "sched_debug.c" |
| 1925 | #endif | 2022 | #endif |
| 1926 | 2023 | ||
| 2024 | void sched_set_stop_task(int cpu, struct task_struct *stop) | ||
| 2025 | { | ||
| 2026 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | ||
| 2027 | struct task_struct *old_stop = cpu_rq(cpu)->stop; | ||
| 2028 | |||
| 2029 | if (stop) { | ||
| 2030 | /* | ||
| 2031 | * Make it appear like a SCHED_FIFO task, its something | ||
| 2032 | * userspace knows about and won't get confused about. | ||
| 2033 | * | ||
| 2034 | * Also, it will make PI more or less work without too | ||
| 2035 | * much confusion -- but then, stop work should not | ||
| 2036 | * rely on PI working anyway. | ||
| 2037 | */ | ||
| 2038 | sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); | ||
| 2039 | |||
| 2040 | stop->sched_class = &stop_sched_class; | ||
| 2041 | } | ||
| 2042 | |||
| 2043 | cpu_rq(cpu)->stop = stop; | ||
| 2044 | |||
| 2045 | if (old_stop) { | ||
| 2046 | /* | ||
| 2047 | * Reset it back to a normal scheduling class so that | ||
| 2048 | * it can die in pieces. | ||
| 2049 | */ | ||
| 2050 | old_stop->sched_class = &rt_sched_class; | ||
| 2051 | } | ||
| 2052 | } | ||
| 2053 | |||
| 1927 | /* | 2054 | /* |
| 1928 | * __normal_prio - return the priority that is based on the static prio | 2055 | * __normal_prio - return the priority that is based on the static prio |
| 1929 | */ | 2056 | */ |
| @@ -2003,6 +2130,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
| 2003 | if (p->sched_class != &fair_sched_class) | 2130 | if (p->sched_class != &fair_sched_class) |
| 2004 | return 0; | 2131 | return 0; |
| 2005 | 2132 | ||
| 2133 | if (unlikely(p->policy == SCHED_IDLE)) | ||
| 2134 | return 0; | ||
| 2135 | |||
| 2006 | /* | 2136 | /* |
| 2007 | * Buddy candidates are cache hot: | 2137 | * Buddy candidates are cache hot: |
| 2008 | */ | 2138 | */ |
| @@ -2852,14 +2982,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 2852 | */ | 2982 | */ |
| 2853 | arch_start_context_switch(prev); | 2983 | arch_start_context_switch(prev); |
| 2854 | 2984 | ||
| 2855 | if (likely(!mm)) { | 2985 | if (!mm) { |
| 2856 | next->active_mm = oldmm; | 2986 | next->active_mm = oldmm; |
| 2857 | atomic_inc(&oldmm->mm_count); | 2987 | atomic_inc(&oldmm->mm_count); |
| 2858 | enter_lazy_tlb(oldmm, next); | 2988 | enter_lazy_tlb(oldmm, next); |
| 2859 | } else | 2989 | } else |
| 2860 | switch_mm(oldmm, mm, next); | 2990 | switch_mm(oldmm, mm, next); |
| 2861 | 2991 | ||
| 2862 | if (likely(!prev->mm)) { | 2992 | if (!prev->mm) { |
| 2863 | prev->active_mm = NULL; | 2993 | prev->active_mm = NULL; |
| 2864 | rq->prev_mm = oldmm; | 2994 | rq->prev_mm = oldmm; |
| 2865 | } | 2995 | } |
| @@ -3248,7 +3378,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | |||
| 3248 | 3378 | ||
| 3249 | if (task_current(rq, p)) { | 3379 | if (task_current(rq, p)) { |
| 3250 | update_rq_clock(rq); | 3380 | update_rq_clock(rq); |
| 3251 | ns = rq->clock - p->se.exec_start; | 3381 | ns = rq->clock_task - p->se.exec_start; |
| 3252 | if ((s64)ns < 0) | 3382 | if ((s64)ns < 0) |
| 3253 | ns = 0; | 3383 | ns = 0; |
| 3254 | } | 3384 | } |
| @@ -3397,7 +3527,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
| 3397 | tmp = cputime_to_cputime64(cputime); | 3527 | tmp = cputime_to_cputime64(cputime); |
| 3398 | if (hardirq_count() - hardirq_offset) | 3528 | if (hardirq_count() - hardirq_offset) |
| 3399 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3529 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
| 3400 | else if (softirq_count()) | 3530 | else if (in_serving_softirq()) |
| 3401 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3531 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
| 3402 | else | 3532 | else |
| 3403 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3533 | cpustat->system = cputime64_add(cpustat->system, tmp); |
| @@ -3723,17 +3853,13 @@ pick_next_task(struct rq *rq) | |||
| 3723 | return p; | 3853 | return p; |
| 3724 | } | 3854 | } |
| 3725 | 3855 | ||
| 3726 | class = sched_class_highest; | 3856 | for_each_class(class) { |
| 3727 | for ( ; ; ) { | ||
| 3728 | p = class->pick_next_task(rq); | 3857 | p = class->pick_next_task(rq); |
| 3729 | if (p) | 3858 | if (p) |
| 3730 | return p; | 3859 | return p; |
| 3731 | /* | ||
| 3732 | * Will never be NULL as the idle class always | ||
| 3733 | * returns a non-NULL p: | ||
| 3734 | */ | ||
| 3735 | class = class->next; | ||
| 3736 | } | 3860 | } |
| 3861 | |||
| 3862 | BUG(); /* the idle class will always have a runnable task */ | ||
| 3737 | } | 3863 | } |
| 3738 | 3864 | ||
| 3739 | /* | 3865 | /* |
| @@ -4358,6 +4484,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 4358 | 4484 | ||
| 4359 | rq = task_rq_lock(p, &flags); | 4485 | rq = task_rq_lock(p, &flags); |
| 4360 | 4486 | ||
| 4487 | trace_sched_pi_setprio(p, prio); | ||
| 4361 | oldprio = p->prio; | 4488 | oldprio = p->prio; |
| 4362 | prev_class = p->sched_class; | 4489 | prev_class = p->sched_class; |
| 4363 | on_rq = p->se.on_rq; | 4490 | on_rq = p->se.on_rq; |
| @@ -4645,7 +4772,7 @@ recheck: | |||
| 4645 | } | 4772 | } |
| 4646 | 4773 | ||
| 4647 | if (user) { | 4774 | if (user) { |
| 4648 | retval = security_task_setscheduler(p, policy, param); | 4775 | retval = security_task_setscheduler(p); |
| 4649 | if (retval) | 4776 | if (retval) |
| 4650 | return retval; | 4777 | return retval; |
| 4651 | } | 4778 | } |
| @@ -4661,6 +4788,15 @@ recheck: | |||
| 4661 | */ | 4788 | */ |
| 4662 | rq = __task_rq_lock(p); | 4789 | rq = __task_rq_lock(p); |
| 4663 | 4790 | ||
| 4791 | /* | ||
| 4792 | * Changing the policy of the stop threads its a very bad idea | ||
| 4793 | */ | ||
| 4794 | if (p == rq->stop) { | ||
| 4795 | __task_rq_unlock(rq); | ||
| 4796 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
| 4797 | return -EINVAL; | ||
| 4798 | } | ||
| 4799 | |||
| 4664 | #ifdef CONFIG_RT_GROUP_SCHED | 4800 | #ifdef CONFIG_RT_GROUP_SCHED |
| 4665 | if (user) { | 4801 | if (user) { |
| 4666 | /* | 4802 | /* |
| @@ -4887,13 +5023,13 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
| 4887 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 5023 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) |
| 4888 | goto out_unlock; | 5024 | goto out_unlock; |
| 4889 | 5025 | ||
| 4890 | retval = security_task_setscheduler(p, 0, NULL); | 5026 | retval = security_task_setscheduler(p); |
| 4891 | if (retval) | 5027 | if (retval) |
| 4892 | goto out_unlock; | 5028 | goto out_unlock; |
| 4893 | 5029 | ||
| 4894 | cpuset_cpus_allowed(p, cpus_allowed); | 5030 | cpuset_cpus_allowed(p, cpus_allowed); |
| 4895 | cpumask_and(new_mask, in_mask, cpus_allowed); | 5031 | cpumask_and(new_mask, in_mask, cpus_allowed); |
| 4896 | again: | 5032 | again: |
| 4897 | retval = set_cpus_allowed_ptr(p, new_mask); | 5033 | retval = set_cpus_allowed_ptr(p, new_mask); |
| 4898 | 5034 | ||
| 4899 | if (!retval) { | 5035 | if (!retval) { |
| @@ -5337,7 +5473,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5337 | idle->se.exec_start = sched_clock(); | 5473 | idle->se.exec_start = sched_clock(); |
| 5338 | 5474 | ||
| 5339 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 5475 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
| 5476 | /* | ||
| 5477 | * We're having a chicken and egg problem, even though we are | ||
| 5478 | * holding rq->lock, the cpu isn't yet set to this cpu so the | ||
| 5479 | * lockdep check in task_group() will fail. | ||
| 5480 | * | ||
| 5481 | * Similar case to sched_fork(). / Alternatively we could | ||
| 5482 | * use task_rq_lock() here and obtain the other rq->lock. | ||
| 5483 | * | ||
| 5484 | * Silence PROVE_RCU | ||
| 5485 | */ | ||
| 5486 | rcu_read_lock(); | ||
| 5340 | __set_task_cpu(idle, cpu); | 5487 | __set_task_cpu(idle, cpu); |
| 5488 | rcu_read_unlock(); | ||
| 5341 | 5489 | ||
| 5342 | rq->curr = rq->idle = idle; | 5490 | rq->curr = rq->idle = idle; |
| 5343 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 5491 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
| @@ -6514,6 +6662,7 @@ struct s_data { | |||
| 6514 | cpumask_var_t nodemask; | 6662 | cpumask_var_t nodemask; |
| 6515 | cpumask_var_t this_sibling_map; | 6663 | cpumask_var_t this_sibling_map; |
| 6516 | cpumask_var_t this_core_map; | 6664 | cpumask_var_t this_core_map; |
| 6665 | cpumask_var_t this_book_map; | ||
| 6517 | cpumask_var_t send_covered; | 6666 | cpumask_var_t send_covered; |
| 6518 | cpumask_var_t tmpmask; | 6667 | cpumask_var_t tmpmask; |
| 6519 | struct sched_group **sched_group_nodes; | 6668 | struct sched_group **sched_group_nodes; |
| @@ -6525,6 +6674,7 @@ enum s_alloc { | |||
| 6525 | sa_rootdomain, | 6674 | sa_rootdomain, |
| 6526 | sa_tmpmask, | 6675 | sa_tmpmask, |
| 6527 | sa_send_covered, | 6676 | sa_send_covered, |
| 6677 | sa_this_book_map, | ||
| 6528 | sa_this_core_map, | 6678 | sa_this_core_map, |
| 6529 | sa_this_sibling_map, | 6679 | sa_this_sibling_map, |
| 6530 | sa_nodemask, | 6680 | sa_nodemask, |
| @@ -6560,31 +6710,48 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | |||
| 6560 | #ifdef CONFIG_SCHED_MC | 6710 | #ifdef CONFIG_SCHED_MC |
| 6561 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); | 6711 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); |
| 6562 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); | 6712 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); |
| 6563 | #endif /* CONFIG_SCHED_MC */ | ||
| 6564 | 6713 | ||
| 6565 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | ||
| 6566 | static int | 6714 | static int |
| 6567 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | 6715 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, |
| 6568 | struct sched_group **sg, struct cpumask *mask) | 6716 | struct sched_group **sg, struct cpumask *mask) |
| 6569 | { | 6717 | { |
| 6570 | int group; | 6718 | int group; |
| 6571 | 6719 | #ifdef CONFIG_SCHED_SMT | |
| 6572 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | 6720 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); |
| 6573 | group = cpumask_first(mask); | 6721 | group = cpumask_first(mask); |
| 6722 | #else | ||
| 6723 | group = cpu; | ||
| 6724 | #endif | ||
| 6574 | if (sg) | 6725 | if (sg) |
| 6575 | *sg = &per_cpu(sched_group_core, group).sg; | 6726 | *sg = &per_cpu(sched_group_core, group).sg; |
| 6576 | return group; | 6727 | return group; |
| 6577 | } | 6728 | } |
| 6578 | #elif defined(CONFIG_SCHED_MC) | 6729 | #endif /* CONFIG_SCHED_MC */ |
| 6730 | |||
| 6731 | /* | ||
| 6732 | * book sched-domains: | ||
| 6733 | */ | ||
| 6734 | #ifdef CONFIG_SCHED_BOOK | ||
| 6735 | static DEFINE_PER_CPU(struct static_sched_domain, book_domains); | ||
| 6736 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); | ||
| 6737 | |||
| 6579 | static int | 6738 | static int |
| 6580 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | 6739 | cpu_to_book_group(int cpu, const struct cpumask *cpu_map, |
| 6581 | struct sched_group **sg, struct cpumask *unused) | 6740 | struct sched_group **sg, struct cpumask *mask) |
| 6582 | { | 6741 | { |
| 6742 | int group = cpu; | ||
| 6743 | #ifdef CONFIG_SCHED_MC | ||
| 6744 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
| 6745 | group = cpumask_first(mask); | ||
| 6746 | #elif defined(CONFIG_SCHED_SMT) | ||
| 6747 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
| 6748 | group = cpumask_first(mask); | ||
| 6749 | #endif | ||
| 6583 | if (sg) | 6750 | if (sg) |
| 6584 | *sg = &per_cpu(sched_group_core, cpu).sg; | 6751 | *sg = &per_cpu(sched_group_book, group).sg; |
| 6585 | return cpu; | 6752 | return group; |
| 6586 | } | 6753 | } |
| 6587 | #endif | 6754 | #endif /* CONFIG_SCHED_BOOK */ |
| 6588 | 6755 | ||
| 6589 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); | 6756 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); |
| 6590 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); | 6757 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); |
| @@ -6594,7 +6761,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | |||
| 6594 | struct sched_group **sg, struct cpumask *mask) | 6761 | struct sched_group **sg, struct cpumask *mask) |
| 6595 | { | 6762 | { |
| 6596 | int group; | 6763 | int group; |
| 6597 | #ifdef CONFIG_SCHED_MC | 6764 | #ifdef CONFIG_SCHED_BOOK |
| 6765 | cpumask_and(mask, cpu_book_mask(cpu), cpu_map); | ||
| 6766 | group = cpumask_first(mask); | ||
| 6767 | #elif defined(CONFIG_SCHED_MC) | ||
| 6598 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | 6768 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); |
| 6599 | group = cpumask_first(mask); | 6769 | group = cpumask_first(mask); |
| 6600 | #elif defined(CONFIG_SCHED_SMT) | 6770 | #elif defined(CONFIG_SCHED_SMT) |
| @@ -6855,6 +7025,9 @@ SD_INIT_FUNC(CPU) | |||
| 6855 | #ifdef CONFIG_SCHED_MC | 7025 | #ifdef CONFIG_SCHED_MC |
| 6856 | SD_INIT_FUNC(MC) | 7026 | SD_INIT_FUNC(MC) |
| 6857 | #endif | 7027 | #endif |
| 7028 | #ifdef CONFIG_SCHED_BOOK | ||
| 7029 | SD_INIT_FUNC(BOOK) | ||
| 7030 | #endif | ||
| 6858 | 7031 | ||
| 6859 | static int default_relax_domain_level = -1; | 7032 | static int default_relax_domain_level = -1; |
| 6860 | 7033 | ||
| @@ -6904,6 +7077,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | |||
| 6904 | free_cpumask_var(d->tmpmask); /* fall through */ | 7077 | free_cpumask_var(d->tmpmask); /* fall through */ |
| 6905 | case sa_send_covered: | 7078 | case sa_send_covered: |
| 6906 | free_cpumask_var(d->send_covered); /* fall through */ | 7079 | free_cpumask_var(d->send_covered); /* fall through */ |
| 7080 | case sa_this_book_map: | ||
| 7081 | free_cpumask_var(d->this_book_map); /* fall through */ | ||
| 6907 | case sa_this_core_map: | 7082 | case sa_this_core_map: |
| 6908 | free_cpumask_var(d->this_core_map); /* fall through */ | 7083 | free_cpumask_var(d->this_core_map); /* fall through */ |
| 6909 | case sa_this_sibling_map: | 7084 | case sa_this_sibling_map: |
| @@ -6950,8 +7125,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | |||
| 6950 | return sa_nodemask; | 7125 | return sa_nodemask; |
| 6951 | if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) | 7126 | if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) |
| 6952 | return sa_this_sibling_map; | 7127 | return sa_this_sibling_map; |
| 6953 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | 7128 | if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) |
| 6954 | return sa_this_core_map; | 7129 | return sa_this_core_map; |
| 7130 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | ||
| 7131 | return sa_this_book_map; | ||
| 6955 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) | 7132 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) |
| 6956 | return sa_send_covered; | 7133 | return sa_send_covered; |
| 6957 | d->rd = alloc_rootdomain(); | 7134 | d->rd = alloc_rootdomain(); |
| @@ -7009,6 +7186,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, | |||
| 7009 | return sd; | 7186 | return sd; |
| 7010 | } | 7187 | } |
| 7011 | 7188 | ||
| 7189 | static struct sched_domain *__build_book_sched_domain(struct s_data *d, | ||
| 7190 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | ||
| 7191 | struct sched_domain *parent, int i) | ||
| 7192 | { | ||
| 7193 | struct sched_domain *sd = parent; | ||
| 7194 | #ifdef CONFIG_SCHED_BOOK | ||
| 7195 | sd = &per_cpu(book_domains, i).sd; | ||
| 7196 | SD_INIT(sd, BOOK); | ||
| 7197 | set_domain_attribute(sd, attr); | ||
| 7198 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); | ||
| 7199 | sd->parent = parent; | ||
| 7200 | parent->child = sd; | ||
| 7201 | cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
| 7202 | #endif | ||
| 7203 | return sd; | ||
| 7204 | } | ||
| 7205 | |||
| 7012 | static struct sched_domain *__build_mc_sched_domain(struct s_data *d, | 7206 | static struct sched_domain *__build_mc_sched_domain(struct s_data *d, |
| 7013 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 7207 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, |
| 7014 | struct sched_domain *parent, int i) | 7208 | struct sched_domain *parent, int i) |
| @@ -7066,6 +7260,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l, | |||
| 7066 | d->send_covered, d->tmpmask); | 7260 | d->send_covered, d->tmpmask); |
| 7067 | break; | 7261 | break; |
| 7068 | #endif | 7262 | #endif |
| 7263 | #ifdef CONFIG_SCHED_BOOK | ||
| 7264 | case SD_LV_BOOK: /* set up book groups */ | ||
| 7265 | cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); | ||
| 7266 | if (cpu == cpumask_first(d->this_book_map)) | ||
| 7267 | init_sched_build_groups(d->this_book_map, cpu_map, | ||
| 7268 | &cpu_to_book_group, | ||
| 7269 | d->send_covered, d->tmpmask); | ||
| 7270 | break; | ||
| 7271 | #endif | ||
| 7069 | case SD_LV_CPU: /* set up physical groups */ | 7272 | case SD_LV_CPU: /* set up physical groups */ |
| 7070 | cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); | 7273 | cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); |
| 7071 | if (!cpumask_empty(d->nodemask)) | 7274 | if (!cpumask_empty(d->nodemask)) |
| @@ -7113,12 +7316,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7113 | 7316 | ||
| 7114 | sd = __build_numa_sched_domains(&d, cpu_map, attr, i); | 7317 | sd = __build_numa_sched_domains(&d, cpu_map, attr, i); |
| 7115 | sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); | 7318 | sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); |
| 7319 | sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); | ||
| 7116 | sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); | 7320 | sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); |
| 7117 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); | 7321 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); |
| 7118 | } | 7322 | } |
| 7119 | 7323 | ||
| 7120 | for_each_cpu(i, cpu_map) { | 7324 | for_each_cpu(i, cpu_map) { |
| 7121 | build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); | 7325 | build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); |
| 7326 | build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); | ||
| 7122 | build_sched_groups(&d, SD_LV_MC, cpu_map, i); | 7327 | build_sched_groups(&d, SD_LV_MC, cpu_map, i); |
| 7123 | } | 7328 | } |
| 7124 | 7329 | ||
| @@ -7149,6 +7354,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7149 | init_sched_groups_power(i, sd); | 7354 | init_sched_groups_power(i, sd); |
| 7150 | } | 7355 | } |
| 7151 | #endif | 7356 | #endif |
| 7357 | #ifdef CONFIG_SCHED_BOOK | ||
| 7358 | for_each_cpu(i, cpu_map) { | ||
| 7359 | sd = &per_cpu(book_domains, i).sd; | ||
| 7360 | init_sched_groups_power(i, sd); | ||
| 7361 | } | ||
| 7362 | #endif | ||
| 7152 | 7363 | ||
| 7153 | for_each_cpu(i, cpu_map) { | 7364 | for_each_cpu(i, cpu_map) { |
| 7154 | sd = &per_cpu(phys_domains, i).sd; | 7365 | sd = &per_cpu(phys_domains, i).sd; |
| @@ -7174,6 +7385,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7174 | sd = &per_cpu(cpu_domains, i).sd; | 7385 | sd = &per_cpu(cpu_domains, i).sd; |
| 7175 | #elif defined(CONFIG_SCHED_MC) | 7386 | #elif defined(CONFIG_SCHED_MC) |
| 7176 | sd = &per_cpu(core_domains, i).sd; | 7387 | sd = &per_cpu(core_domains, i).sd; |
| 7388 | #elif defined(CONFIG_SCHED_BOOK) | ||
| 7389 | sd = &per_cpu(book_domains, i).sd; | ||
| 7177 | #else | 7390 | #else |
| 7178 | sd = &per_cpu(phys_domains, i).sd; | 7391 | sd = &per_cpu(phys_domains, i).sd; |
| 7179 | #endif | 7392 | #endif |
| @@ -8078,9 +8291,9 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8078 | 8291 | ||
| 8079 | return 1; | 8292 | return 1; |
| 8080 | 8293 | ||
| 8081 | err_free_rq: | 8294 | err_free_rq: |
| 8082 | kfree(cfs_rq); | 8295 | kfree(cfs_rq); |
| 8083 | err: | 8296 | err: |
| 8084 | return 0; | 8297 | return 0; |
| 8085 | } | 8298 | } |
| 8086 | 8299 | ||
| @@ -8168,9 +8381,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8168 | 8381 | ||
| 8169 | return 1; | 8382 | return 1; |
| 8170 | 8383 | ||
| 8171 | err_free_rq: | 8384 | err_free_rq: |
| 8172 | kfree(rt_rq); | 8385 | kfree(rt_rq); |
| 8173 | err: | 8386 | err: |
| 8174 | return 0; | 8387 | return 0; |
| 8175 | } | 8388 | } |
| 8176 | 8389 | ||
| @@ -8528,7 +8741,7 @@ static int tg_set_bandwidth(struct task_group *tg, | |||
| 8528 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 8741 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
| 8529 | } | 8742 | } |
| 8530 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 8743 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
| 8531 | unlock: | 8744 | unlock: |
| 8532 | read_unlock(&tasklist_lock); | 8745 | read_unlock(&tasklist_lock); |
| 8533 | mutex_unlock(&rt_constraints_mutex); | 8746 | mutex_unlock(&rt_constraints_mutex); |
| 8534 | 8747 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index db3f674ca49d..933f3d1b62ea 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | 25 | ||
| 26 | /* | 26 | /* |
| 27 | * Targeted preemption latency for CPU-bound tasks: | 27 | * Targeted preemption latency for CPU-bound tasks: |
| 28 | * (default: 5ms * (1 + ilog(ncpus)), units: nanoseconds) | 28 | * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) |
| 29 | * | 29 | * |
| 30 | * NOTE: this latency value is not the same as the concept of | 30 | * NOTE: this latency value is not the same as the concept of |
| 31 | * 'timeslice length' - timeslices in CFS are of variable length | 31 | * 'timeslice length' - timeslices in CFS are of variable length |
| @@ -52,7 +52,7 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling | |||
| 52 | 52 | ||
| 53 | /* | 53 | /* |
| 54 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
| 55 | * (default: 2 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
| 56 | */ | 56 | */ |
| 57 | unsigned int sysctl_sched_min_granularity = 750000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 750000ULL; |
| 58 | unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; | 58 | unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; |
| @@ -519,7 +519,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
| 519 | static void update_curr(struct cfs_rq *cfs_rq) | 519 | static void update_curr(struct cfs_rq *cfs_rq) |
| 520 | { | 520 | { |
| 521 | struct sched_entity *curr = cfs_rq->curr; | 521 | struct sched_entity *curr = cfs_rq->curr; |
| 522 | u64 now = rq_of(cfs_rq)->clock; | 522 | u64 now = rq_of(cfs_rq)->clock_task; |
| 523 | unsigned long delta_exec; | 523 | unsigned long delta_exec; |
| 524 | 524 | ||
| 525 | if (unlikely(!curr)) | 525 | if (unlikely(!curr)) |
| @@ -602,7 +602,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 602 | /* | 602 | /* |
| 603 | * We are starting a new run period: | 603 | * We are starting a new run period: |
| 604 | */ | 604 | */ |
| 605 | se->exec_start = rq_of(cfs_rq)->clock; | 605 | se->exec_start = rq_of(cfs_rq)->clock_task; |
| 606 | } | 606 | } |
| 607 | 607 | ||
| 608 | /************************************************** | 608 | /************************************************** |
| @@ -1764,6 +1764,10 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
| 1764 | set_task_cpu(p, this_cpu); | 1764 | set_task_cpu(p, this_cpu); |
| 1765 | activate_task(this_rq, p, 0); | 1765 | activate_task(this_rq, p, 0); |
| 1766 | check_preempt_curr(this_rq, p, 0); | 1766 | check_preempt_curr(this_rq, p, 0); |
| 1767 | |||
| 1768 | /* re-arm NEWIDLE balancing when moving tasks */ | ||
| 1769 | src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
| 1770 | this_rq->idle_stamp = 0; | ||
| 1767 | } | 1771 | } |
| 1768 | 1772 | ||
| 1769 | /* | 1773 | /* |
| @@ -1798,7 +1802,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 1798 | * 2) too many balance attempts have failed. | 1802 | * 2) too many balance attempts have failed. |
| 1799 | */ | 1803 | */ |
| 1800 | 1804 | ||
| 1801 | tsk_cache_hot = task_hot(p, rq->clock, sd); | 1805 | tsk_cache_hot = task_hot(p, rq->clock_task, sd); |
| 1802 | if (!tsk_cache_hot || | 1806 | if (!tsk_cache_hot || |
| 1803 | sd->nr_balance_failed > sd->cache_nice_tries) { | 1807 | sd->nr_balance_failed > sd->cache_nice_tries) { |
| 1804 | #ifdef CONFIG_SCHEDSTATS | 1808 | #ifdef CONFIG_SCHEDSTATS |
| @@ -2030,12 +2034,14 @@ struct sd_lb_stats { | |||
| 2030 | unsigned long this_load; | 2034 | unsigned long this_load; |
| 2031 | unsigned long this_load_per_task; | 2035 | unsigned long this_load_per_task; |
| 2032 | unsigned long this_nr_running; | 2036 | unsigned long this_nr_running; |
| 2037 | unsigned long this_has_capacity; | ||
| 2033 | 2038 | ||
| 2034 | /* Statistics of the busiest group */ | 2039 | /* Statistics of the busiest group */ |
| 2035 | unsigned long max_load; | 2040 | unsigned long max_load; |
| 2036 | unsigned long busiest_load_per_task; | 2041 | unsigned long busiest_load_per_task; |
| 2037 | unsigned long busiest_nr_running; | 2042 | unsigned long busiest_nr_running; |
| 2038 | unsigned long busiest_group_capacity; | 2043 | unsigned long busiest_group_capacity; |
| 2044 | unsigned long busiest_has_capacity; | ||
| 2039 | 2045 | ||
| 2040 | int group_imb; /* Is there imbalance in this sd */ | 2046 | int group_imb; /* Is there imbalance in this sd */ |
| 2041 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2047 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
| @@ -2058,6 +2064,7 @@ struct sg_lb_stats { | |||
| 2058 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | 2064 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ |
| 2059 | unsigned long group_capacity; | 2065 | unsigned long group_capacity; |
| 2060 | int group_imb; /* Is there an imbalance in the group ? */ | 2066 | int group_imb; /* Is there an imbalance in the group ? */ |
| 2067 | int group_has_capacity; /* Is there extra capacity in the group? */ | ||
| 2061 | }; | 2068 | }; |
| 2062 | 2069 | ||
| 2063 | /** | 2070 | /** |
| @@ -2268,7 +2275,13 @@ unsigned long scale_rt_power(int cpu) | |||
| 2268 | u64 total, available; | 2275 | u64 total, available; |
| 2269 | 2276 | ||
| 2270 | total = sched_avg_period() + (rq->clock - rq->age_stamp); | 2277 | total = sched_avg_period() + (rq->clock - rq->age_stamp); |
| 2271 | available = total - rq->rt_avg; | 2278 | |
| 2279 | if (unlikely(total < rq->rt_avg)) { | ||
| 2280 | /* Ensures that power won't end up being negative */ | ||
| 2281 | available = 0; | ||
| 2282 | } else { | ||
| 2283 | available = total - rq->rt_avg; | ||
| 2284 | } | ||
| 2272 | 2285 | ||
| 2273 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) | 2286 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) |
| 2274 | total = SCHED_LOAD_SCALE; | 2287 | total = SCHED_LOAD_SCALE; |
| @@ -2378,7 +2391,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2378 | int local_group, const struct cpumask *cpus, | 2391 | int local_group, const struct cpumask *cpus, |
| 2379 | int *balance, struct sg_lb_stats *sgs) | 2392 | int *balance, struct sg_lb_stats *sgs) |
| 2380 | { | 2393 | { |
| 2381 | unsigned long load, max_cpu_load, min_cpu_load; | 2394 | unsigned long load, max_cpu_load, min_cpu_load, max_nr_running; |
| 2382 | int i; | 2395 | int i; |
| 2383 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | 2396 | unsigned int balance_cpu = -1, first_idle_cpu = 0; |
| 2384 | unsigned long avg_load_per_task = 0; | 2397 | unsigned long avg_load_per_task = 0; |
| @@ -2389,6 +2402,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2389 | /* Tally up the load of all CPUs in the group */ | 2402 | /* Tally up the load of all CPUs in the group */ |
| 2390 | max_cpu_load = 0; | 2403 | max_cpu_load = 0; |
| 2391 | min_cpu_load = ~0UL; | 2404 | min_cpu_load = ~0UL; |
| 2405 | max_nr_running = 0; | ||
| 2392 | 2406 | ||
| 2393 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | 2407 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { |
| 2394 | struct rq *rq = cpu_rq(i); | 2408 | struct rq *rq = cpu_rq(i); |
| @@ -2406,8 +2420,10 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2406 | load = target_load(i, load_idx); | 2420 | load = target_load(i, load_idx); |
| 2407 | } else { | 2421 | } else { |
| 2408 | load = source_load(i, load_idx); | 2422 | load = source_load(i, load_idx); |
| 2409 | if (load > max_cpu_load) | 2423 | if (load > max_cpu_load) { |
| 2410 | max_cpu_load = load; | 2424 | max_cpu_load = load; |
| 2425 | max_nr_running = rq->nr_running; | ||
| 2426 | } | ||
| 2411 | if (min_cpu_load > load) | 2427 | if (min_cpu_load > load) |
| 2412 | min_cpu_load = load; | 2428 | min_cpu_load = load; |
| 2413 | } | 2429 | } |
| @@ -2447,13 +2463,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2447 | if (sgs->sum_nr_running) | 2463 | if (sgs->sum_nr_running) |
| 2448 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; | 2464 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; |
| 2449 | 2465 | ||
| 2450 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | 2466 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) |
| 2451 | sgs->group_imb = 1; | 2467 | sgs->group_imb = 1; |
| 2452 | 2468 | ||
| 2453 | sgs->group_capacity = | 2469 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
| 2454 | DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | ||
| 2455 | if (!sgs->group_capacity) | 2470 | if (!sgs->group_capacity) |
| 2456 | sgs->group_capacity = fix_small_capacity(sd, group); | 2471 | sgs->group_capacity = fix_small_capacity(sd, group); |
| 2472 | |||
| 2473 | if (sgs->group_capacity > sgs->sum_nr_running) | ||
| 2474 | sgs->group_has_capacity = 1; | ||
| 2457 | } | 2475 | } |
| 2458 | 2476 | ||
| 2459 | /** | 2477 | /** |
| @@ -2542,9 +2560,14 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2542 | /* | 2560 | /* |
| 2543 | * In case the child domain prefers tasks go to siblings | 2561 | * In case the child domain prefers tasks go to siblings |
| 2544 | * first, lower the sg capacity to one so that we'll try | 2562 | * first, lower the sg capacity to one so that we'll try |
| 2545 | * and move all the excess tasks away. | 2563 | * and move all the excess tasks away. We lower the capacity |
| 2564 | * of a group only if the local group has the capacity to fit | ||
| 2565 | * these excess tasks, i.e. nr_running < group_capacity. The | ||
| 2566 | * extra check prevents the case where you always pull from the | ||
| 2567 | * heaviest group when it is already under-utilized (possible | ||
| 2568 | * with a large weight task outweighs the tasks on the system). | ||
| 2546 | */ | 2569 | */ |
| 2547 | if (prefer_sibling) | 2570 | if (prefer_sibling && !local_group && sds->this_has_capacity) |
| 2548 | sgs.group_capacity = min(sgs.group_capacity, 1UL); | 2571 | sgs.group_capacity = min(sgs.group_capacity, 1UL); |
| 2549 | 2572 | ||
| 2550 | if (local_group) { | 2573 | if (local_group) { |
| @@ -2552,12 +2575,14 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2552 | sds->this = sg; | 2575 | sds->this = sg; |
| 2553 | sds->this_nr_running = sgs.sum_nr_running; | 2576 | sds->this_nr_running = sgs.sum_nr_running; |
| 2554 | sds->this_load_per_task = sgs.sum_weighted_load; | 2577 | sds->this_load_per_task = sgs.sum_weighted_load; |
| 2578 | sds->this_has_capacity = sgs.group_has_capacity; | ||
| 2555 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { | 2579 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { |
| 2556 | sds->max_load = sgs.avg_load; | 2580 | sds->max_load = sgs.avg_load; |
| 2557 | sds->busiest = sg; | 2581 | sds->busiest = sg; |
| 2558 | sds->busiest_nr_running = sgs.sum_nr_running; | 2582 | sds->busiest_nr_running = sgs.sum_nr_running; |
| 2559 | sds->busiest_group_capacity = sgs.group_capacity; | 2583 | sds->busiest_group_capacity = sgs.group_capacity; |
| 2560 | sds->busiest_load_per_task = sgs.sum_weighted_load; | 2584 | sds->busiest_load_per_task = sgs.sum_weighted_load; |
| 2585 | sds->busiest_has_capacity = sgs.group_has_capacity; | ||
| 2561 | sds->group_imb = sgs.group_imb; | 2586 | sds->group_imb = sgs.group_imb; |
| 2562 | } | 2587 | } |
| 2563 | 2588 | ||
| @@ -2754,6 +2779,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
| 2754 | return fix_small_imbalance(sds, this_cpu, imbalance); | 2779 | return fix_small_imbalance(sds, this_cpu, imbalance); |
| 2755 | 2780 | ||
| 2756 | } | 2781 | } |
| 2782 | |||
| 2757 | /******* find_busiest_group() helpers end here *********************/ | 2783 | /******* find_busiest_group() helpers end here *********************/ |
| 2758 | 2784 | ||
| 2759 | /** | 2785 | /** |
| @@ -2805,6 +2831,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2805 | * 4) This group is more busy than the avg busieness at this | 2831 | * 4) This group is more busy than the avg busieness at this |
| 2806 | * sched_domain. | 2832 | * sched_domain. |
| 2807 | * 5) The imbalance is within the specified limit. | 2833 | * 5) The imbalance is within the specified limit. |
| 2834 | * | ||
| 2835 | * Note: when doing newidle balance, if the local group has excess | ||
| 2836 | * capacity (i.e. nr_running < group_capacity) and the busiest group | ||
| 2837 | * does not have any capacity, we force a load balance to pull tasks | ||
| 2838 | * to the local group. In this case, we skip past checks 3, 4 and 5. | ||
| 2808 | */ | 2839 | */ |
| 2809 | if (!(*balance)) | 2840 | if (!(*balance)) |
| 2810 | goto ret; | 2841 | goto ret; |
| @@ -2816,6 +2847,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2816 | if (!sds.busiest || sds.busiest_nr_running == 0) | 2847 | if (!sds.busiest || sds.busiest_nr_running == 0) |
| 2817 | goto out_balanced; | 2848 | goto out_balanced; |
| 2818 | 2849 | ||
| 2850 | /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ | ||
| 2851 | if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && | ||
| 2852 | !sds.busiest_has_capacity) | ||
| 2853 | goto force_balance; | ||
| 2854 | |||
| 2819 | if (sds.this_load >= sds.max_load) | 2855 | if (sds.this_load >= sds.max_load) |
| 2820 | goto out_balanced; | 2856 | goto out_balanced; |
| 2821 | 2857 | ||
| @@ -2827,6 +2863,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2827 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | 2863 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) |
| 2828 | goto out_balanced; | 2864 | goto out_balanced; |
| 2829 | 2865 | ||
| 2866 | force_balance: | ||
| 2830 | /* Looks like there is an imbalance. Compute it */ | 2867 | /* Looks like there is an imbalance. Compute it */ |
| 2831 | calculate_imbalance(&sds, this_cpu, imbalance); | 2868 | calculate_imbalance(&sds, this_cpu, imbalance); |
| 2832 | return sds.busiest; | 2869 | return sds.busiest; |
| @@ -3031,7 +3068,14 @@ redo: | |||
| 3031 | 3068 | ||
| 3032 | if (!ld_moved) { | 3069 | if (!ld_moved) { |
| 3033 | schedstat_inc(sd, lb_failed[idle]); | 3070 | schedstat_inc(sd, lb_failed[idle]); |
| 3034 | sd->nr_balance_failed++; | 3071 | /* |
| 3072 | * Increment the failure counter only on periodic balance. | ||
| 3073 | * We do not want newidle balance, which can be very | ||
| 3074 | * frequent, pollute the failure counter causing | ||
| 3075 | * excessive cache_hot migrations and active balances. | ||
| 3076 | */ | ||
| 3077 | if (idle != CPU_NEWLY_IDLE) | ||
| 3078 | sd->nr_balance_failed++; | ||
| 3035 | 3079 | ||
| 3036 | if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), | 3080 | if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), |
| 3037 | this_cpu)) { | 3081 | this_cpu)) { |
| @@ -3153,10 +3197,8 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 3153 | interval = msecs_to_jiffies(sd->balance_interval); | 3197 | interval = msecs_to_jiffies(sd->balance_interval); |
| 3154 | if (time_after(next_balance, sd->last_balance + interval)) | 3198 | if (time_after(next_balance, sd->last_balance + interval)) |
| 3155 | next_balance = sd->last_balance + interval; | 3199 | next_balance = sd->last_balance + interval; |
| 3156 | if (pulled_task) { | 3200 | if (pulled_task) |
| 3157 | this_rq->idle_stamp = 0; | ||
| 3158 | break; | 3201 | break; |
| 3159 | } | ||
| 3160 | } | 3202 | } |
| 3161 | 3203 | ||
| 3162 | raw_spin_lock(&this_rq->lock); | 3204 | raw_spin_lock(&this_rq->lock); |
| @@ -3751,8 +3793,11 @@ static void task_fork_fair(struct task_struct *p) | |||
| 3751 | 3793 | ||
| 3752 | update_rq_clock(rq); | 3794 | update_rq_clock(rq); |
| 3753 | 3795 | ||
| 3754 | if (unlikely(task_cpu(p) != this_cpu)) | 3796 | if (unlikely(task_cpu(p) != this_cpu)) { |
| 3797 | rcu_read_lock(); | ||
| 3755 | __set_task_cpu(p, this_cpu); | 3798 | __set_task_cpu(p, this_cpu); |
| 3799 | rcu_read_unlock(); | ||
| 3800 | } | ||
| 3756 | 3801 | ||
| 3757 | update_curr(cfs_rq); | 3802 | update_curr(cfs_rq); |
| 3758 | 3803 | ||
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 83c66e8ad3ee..185f920ec1a2 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -61,3 +61,8 @@ SCHED_FEAT(ASYM_EFF_LOAD, 1) | |||
| 61 | * release the lock. Decreases scheduling overhead. | 61 | * release the lock. Decreases scheduling overhead. |
| 62 | */ | 62 | */ |
| 63 | SCHED_FEAT(OWNER_SPIN, 1) | 63 | SCHED_FEAT(OWNER_SPIN, 1) |
| 64 | |||
| 65 | /* | ||
| 66 | * Decrement CPU power based on irq activity | ||
| 67 | */ | ||
| 68 | SCHED_FEAT(NONIRQ_POWER, 1) | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d10c80ebb67a..bea7d79f7e9c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq) | |||
| 609 | if (!task_has_rt_policy(curr)) | 609 | if (!task_has_rt_policy(curr)) |
| 610 | return; | 610 | return; |
| 611 | 611 | ||
| 612 | delta_exec = rq->clock - curr->se.exec_start; | 612 | delta_exec = rq->clock_task - curr->se.exec_start; |
| 613 | if (unlikely((s64)delta_exec < 0)) | 613 | if (unlikely((s64)delta_exec < 0)) |
| 614 | delta_exec = 0; | 614 | delta_exec = 0; |
| 615 | 615 | ||
| @@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq) | |||
| 618 | curr->se.sum_exec_runtime += delta_exec; | 618 | curr->se.sum_exec_runtime += delta_exec; |
| 619 | account_group_exec_runtime(curr, delta_exec); | 619 | account_group_exec_runtime(curr, delta_exec); |
| 620 | 620 | ||
| 621 | curr->se.exec_start = rq->clock; | 621 | curr->se.exec_start = rq->clock_task; |
| 622 | cpuacct_charge(curr, delta_exec); | 622 | cpuacct_charge(curr, delta_exec); |
| 623 | 623 | ||
| 624 | sched_rt_avg_update(rq, delta_exec); | 624 | sched_rt_avg_update(rq, delta_exec); |
| @@ -960,18 +960,19 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | |||
| 960 | * runqueue. Otherwise simply start this RT task | 960 | * runqueue. Otherwise simply start this RT task |
| 961 | * on its current runqueue. | 961 | * on its current runqueue. |
| 962 | * | 962 | * |
| 963 | * We want to avoid overloading runqueues. Even if | 963 | * We want to avoid overloading runqueues. If the woken |
| 964 | * the RT task is of higher priority than the current RT task. | 964 | * task is a higher priority, then it will stay on this CPU |
| 965 | * RT tasks behave differently than other tasks. If | 965 | * and the lower prio task should be moved to another CPU. |
| 966 | * one gets preempted, we try to push it off to another queue. | 966 | * Even though this will probably make the lower prio task |
| 967 | * So trying to keep a preempting RT task on the same | 967 | * lose its cache, we do not want to bounce a higher task |
| 968 | * cache hot CPU will force the running RT task to | 968 | * around just because it gave up its CPU, perhaps for a |
| 969 | * a cold CPU. So we waste all the cache for the lower | 969 | * lock? |
| 970 | * RT task in hopes of saving some of a RT task | 970 | * |
| 971 | * that is just being woken and probably will have | 971 | * For equal prio tasks, we just let the scheduler sort it out. |
| 972 | * cold cache anyway. | ||
| 973 | */ | 972 | */ |
| 974 | if (unlikely(rt_task(rq->curr)) && | 973 | if (unlikely(rt_task(rq->curr)) && |
| 974 | (rq->curr->rt.nr_cpus_allowed < 2 || | ||
| 975 | rq->curr->prio < p->prio) && | ||
| 975 | (p->rt.nr_cpus_allowed > 1)) { | 976 | (p->rt.nr_cpus_allowed > 1)) { |
| 976 | int cpu = find_lowest_rq(p); | 977 | int cpu = find_lowest_rq(p); |
| 977 | 978 | ||
| @@ -1074,7 +1075,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
| 1074 | } while (rt_rq); | 1075 | } while (rt_rq); |
| 1075 | 1076 | ||
| 1076 | p = rt_task_of(rt_se); | 1077 | p = rt_task_of(rt_se); |
| 1077 | p->se.exec_start = rq->clock; | 1078 | p->se.exec_start = rq->clock_task; |
| 1078 | 1079 | ||
| 1079 | return p; | 1080 | return p; |
| 1080 | } | 1081 | } |
| @@ -1139,7 +1140,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
| 1139 | for_each_leaf_rt_rq(rt_rq, rq) { | 1140 | for_each_leaf_rt_rq(rt_rq, rq) { |
| 1140 | array = &rt_rq->active; | 1141 | array = &rt_rq->active; |
| 1141 | idx = sched_find_first_bit(array->bitmap); | 1142 | idx = sched_find_first_bit(array->bitmap); |
| 1142 | next_idx: | 1143 | next_idx: |
| 1143 | if (idx >= MAX_RT_PRIO) | 1144 | if (idx >= MAX_RT_PRIO) |
| 1144 | continue; | 1145 | continue; |
| 1145 | if (next && next->prio < idx) | 1146 | if (next && next->prio < idx) |
| @@ -1315,7 +1316,7 @@ static int push_rt_task(struct rq *rq) | |||
| 1315 | if (!next_task) | 1316 | if (!next_task) |
| 1316 | return 0; | 1317 | return 0; |
| 1317 | 1318 | ||
| 1318 | retry: | 1319 | retry: |
| 1319 | if (unlikely(next_task == rq->curr)) { | 1320 | if (unlikely(next_task == rq->curr)) { |
| 1320 | WARN_ON(1); | 1321 | WARN_ON(1); |
| 1321 | return 0; | 1322 | return 0; |
| @@ -1463,7 +1464,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
| 1463 | * but possible) | 1464 | * but possible) |
| 1464 | */ | 1465 | */ |
| 1465 | } | 1466 | } |
| 1466 | skip: | 1467 | skip: |
| 1467 | double_unlock_balance(this_rq, src_rq); | 1468 | double_unlock_balance(this_rq, src_rq); |
| 1468 | } | 1469 | } |
| 1469 | 1470 | ||
| @@ -1491,7 +1492,10 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) | |||
| 1491 | if (!task_running(rq, p) && | 1492 | if (!task_running(rq, p) && |
| 1492 | !test_tsk_need_resched(rq->curr) && | 1493 | !test_tsk_need_resched(rq->curr) && |
| 1493 | has_pushable_tasks(rq) && | 1494 | has_pushable_tasks(rq) && |
| 1494 | p->rt.nr_cpus_allowed > 1) | 1495 | p->rt.nr_cpus_allowed > 1 && |
| 1496 | rt_task(rq->curr) && | ||
| 1497 | (rq->curr->rt.nr_cpus_allowed < 2 || | ||
| 1498 | rq->curr->prio < p->prio)) | ||
| 1495 | push_rt_tasks(rq); | 1499 | push_rt_tasks(rq); |
| 1496 | } | 1500 | } |
| 1497 | 1501 | ||
| @@ -1709,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq) | |||
| 1709 | { | 1713 | { |
| 1710 | struct task_struct *p = rq->curr; | 1714 | struct task_struct *p = rq->curr; |
| 1711 | 1715 | ||
| 1712 | p->se.exec_start = rq->clock; | 1716 | p->se.exec_start = rq->clock_task; |
| 1713 | 1717 | ||
| 1714 | /* The running task is never eligible for pushing */ | 1718 | /* The running task is never eligible for pushing */ |
| 1715 | dequeue_pushable_task(rq, p); | 1719 | dequeue_pushable_task(rq, p); |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c new file mode 100644 index 000000000000..45bddc0c1048 --- /dev/null +++ b/kernel/sched_stoptask.c | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | /* | ||
| 2 | * stop-task scheduling class. | ||
| 3 | * | ||
| 4 | * The stop task is the highest priority task in the system, it preempts | ||
| 5 | * everything and will be preempted by nothing. | ||
| 6 | * | ||
| 7 | * See kernel/stop_machine.c | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifdef CONFIG_SMP | ||
| 11 | static int | ||
| 12 | select_task_rq_stop(struct rq *rq, struct task_struct *p, | ||
| 13 | int sd_flag, int flags) | ||
| 14 | { | ||
| 15 | return task_cpu(p); /* stop tasks as never migrate */ | ||
| 16 | } | ||
| 17 | #endif /* CONFIG_SMP */ | ||
| 18 | |||
| 19 | static void | ||
| 20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) | ||
| 21 | { | ||
| 22 | resched_task(rq->curr); /* we preempt everything */ | ||
| 23 | } | ||
| 24 | |||
| 25 | static struct task_struct *pick_next_task_stop(struct rq *rq) | ||
| 26 | { | ||
| 27 | struct task_struct *stop = rq->stop; | ||
| 28 | |||
| 29 | if (stop && stop->state == TASK_RUNNING) | ||
| 30 | return stop; | ||
| 31 | |||
| 32 | return NULL; | ||
| 33 | } | ||
| 34 | |||
| 35 | static void | ||
| 36 | enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) | ||
| 37 | { | ||
| 38 | } | ||
| 39 | |||
| 40 | static void | ||
| 41 | dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) | ||
| 42 | { | ||
| 43 | } | ||
| 44 | |||
| 45 | static void yield_task_stop(struct rq *rq) | ||
| 46 | { | ||
| 47 | BUG(); /* the stop task should never yield, its pointless. */ | ||
| 48 | } | ||
| 49 | |||
| 50 | static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) | ||
| 51 | { | ||
| 52 | } | ||
| 53 | |||
| 54 | static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) | ||
| 55 | { | ||
| 56 | } | ||
| 57 | |||
| 58 | static void set_curr_task_stop(struct rq *rq) | ||
| 59 | { | ||
| 60 | } | ||
| 61 | |||
| 62 | static void switched_to_stop(struct rq *rq, struct task_struct *p, | ||
| 63 | int running) | ||
| 64 | { | ||
| 65 | BUG(); /* its impossible to change to this class */ | ||
| 66 | } | ||
| 67 | |||
| 68 | static void prio_changed_stop(struct rq *rq, struct task_struct *p, | ||
| 69 | int oldprio, int running) | ||
| 70 | { | ||
| 71 | BUG(); /* how!?, what priority? */ | ||
| 72 | } | ||
| 73 | |||
| 74 | static unsigned int | ||
| 75 | get_rr_interval_stop(struct rq *rq, struct task_struct *task) | ||
| 76 | { | ||
| 77 | return 0; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Simple, special scheduling class for the per-CPU stop tasks: | ||
| 82 | */ | ||
| 83 | static const struct sched_class stop_sched_class = { | ||
| 84 | .next = &rt_sched_class, | ||
| 85 | |||
| 86 | .enqueue_task = enqueue_task_stop, | ||
| 87 | .dequeue_task = dequeue_task_stop, | ||
| 88 | .yield_task = yield_task_stop, | ||
| 89 | |||
| 90 | .check_preempt_curr = check_preempt_curr_stop, | ||
| 91 | |||
| 92 | .pick_next_task = pick_next_task_stop, | ||
| 93 | .put_prev_task = put_prev_task_stop, | ||
| 94 | |||
| 95 | #ifdef CONFIG_SMP | ||
| 96 | .select_task_rq = select_task_rq_stop, | ||
| 97 | #endif | ||
| 98 | |||
| 99 | .set_curr_task = set_curr_task_stop, | ||
| 100 | .task_tick = task_tick_stop, | ||
| 101 | |||
| 102 | .get_rr_interval = get_rr_interval_stop, | ||
| 103 | |||
| 104 | .prio_changed = prio_changed_stop, | ||
| 105 | .switched_to = switched_to_stop, | ||
| 106 | |||
| 107 | /* no .task_new for stop tasks */ | ||
| 108 | }; | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 07b4f1b1a73a..fc978889b194 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -77,11 +77,21 @@ void wakeup_softirqd(void) | |||
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | /* | 79 | /* |
| 80 | * preempt_count and SOFTIRQ_OFFSET usage: | ||
| 81 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | ||
| 82 | * softirq processing. | ||
| 83 | * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) | ||
| 84 | * on local_bh_disable or local_bh_enable. | ||
| 85 | * This lets us distinguish between whether we are currently processing | ||
| 86 | * softirq and whether we just have bh disabled. | ||
| 87 | */ | ||
| 88 | |||
| 89 | /* | ||
| 80 | * This one is for softirq.c-internal use, | 90 | * This one is for softirq.c-internal use, |
| 81 | * where hardirqs are disabled legitimately: | 91 | * where hardirqs are disabled legitimately: |
| 82 | */ | 92 | */ |
| 83 | #ifdef CONFIG_TRACE_IRQFLAGS | 93 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 84 | static void __local_bh_disable(unsigned long ip) | 94 | static void __local_bh_disable(unsigned long ip, unsigned int cnt) |
| 85 | { | 95 | { |
| 86 | unsigned long flags; | 96 | unsigned long flags; |
| 87 | 97 | ||
| @@ -95,32 +105,43 @@ static void __local_bh_disable(unsigned long ip) | |||
| 95 | * We must manually increment preempt_count here and manually | 105 | * We must manually increment preempt_count here and manually |
| 96 | * call the trace_preempt_off later. | 106 | * call the trace_preempt_off later. |
| 97 | */ | 107 | */ |
| 98 | preempt_count() += SOFTIRQ_OFFSET; | 108 | preempt_count() += cnt; |
| 99 | /* | 109 | /* |
| 100 | * Were softirqs turned off above: | 110 | * Were softirqs turned off above: |
| 101 | */ | 111 | */ |
| 102 | if (softirq_count() == SOFTIRQ_OFFSET) | 112 | if (softirq_count() == cnt) |
| 103 | trace_softirqs_off(ip); | 113 | trace_softirqs_off(ip); |
| 104 | raw_local_irq_restore(flags); | 114 | raw_local_irq_restore(flags); |
| 105 | 115 | ||
| 106 | if (preempt_count() == SOFTIRQ_OFFSET) | 116 | if (preempt_count() == cnt) |
| 107 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | 117 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); |
| 108 | } | 118 | } |
| 109 | #else /* !CONFIG_TRACE_IRQFLAGS */ | 119 | #else /* !CONFIG_TRACE_IRQFLAGS */ |
| 110 | static inline void __local_bh_disable(unsigned long ip) | 120 | static inline void __local_bh_disable(unsigned long ip, unsigned int cnt) |
| 111 | { | 121 | { |
| 112 | add_preempt_count(SOFTIRQ_OFFSET); | 122 | add_preempt_count(cnt); |
| 113 | barrier(); | 123 | barrier(); |
| 114 | } | 124 | } |
| 115 | #endif /* CONFIG_TRACE_IRQFLAGS */ | 125 | #endif /* CONFIG_TRACE_IRQFLAGS */ |
| 116 | 126 | ||
| 117 | void local_bh_disable(void) | 127 | void local_bh_disable(void) |
| 118 | { | 128 | { |
| 119 | __local_bh_disable((unsigned long)__builtin_return_address(0)); | 129 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
| 130 | SOFTIRQ_DISABLE_OFFSET); | ||
| 120 | } | 131 | } |
| 121 | 132 | ||
| 122 | EXPORT_SYMBOL(local_bh_disable); | 133 | EXPORT_SYMBOL(local_bh_disable); |
| 123 | 134 | ||
| 135 | static void __local_bh_enable(unsigned int cnt) | ||
| 136 | { | ||
| 137 | WARN_ON_ONCE(in_irq()); | ||
| 138 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 139 | |||
| 140 | if (softirq_count() == cnt) | ||
| 141 | trace_softirqs_on((unsigned long)__builtin_return_address(0)); | ||
| 142 | sub_preempt_count(cnt); | ||
| 143 | } | ||
| 144 | |||
| 124 | /* | 145 | /* |
| 125 | * Special-case - softirqs can safely be enabled in | 146 | * Special-case - softirqs can safely be enabled in |
| 126 | * cond_resched_softirq(), or by __do_softirq(), | 147 | * cond_resched_softirq(), or by __do_softirq(), |
| @@ -128,12 +149,7 @@ EXPORT_SYMBOL(local_bh_disable); | |||
| 128 | */ | 149 | */ |
| 129 | void _local_bh_enable(void) | 150 | void _local_bh_enable(void) |
| 130 | { | 151 | { |
| 131 | WARN_ON_ONCE(in_irq()); | 152 | __local_bh_enable(SOFTIRQ_DISABLE_OFFSET); |
| 132 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 133 | |||
| 134 | if (softirq_count() == SOFTIRQ_OFFSET) | ||
| 135 | trace_softirqs_on((unsigned long)__builtin_return_address(0)); | ||
| 136 | sub_preempt_count(SOFTIRQ_OFFSET); | ||
| 137 | } | 153 | } |
| 138 | 154 | ||
| 139 | EXPORT_SYMBOL(_local_bh_enable); | 155 | EXPORT_SYMBOL(_local_bh_enable); |
| @@ -147,13 +163,13 @@ static inline void _local_bh_enable_ip(unsigned long ip) | |||
| 147 | /* | 163 | /* |
| 148 | * Are softirqs going to be turned on now: | 164 | * Are softirqs going to be turned on now: |
| 149 | */ | 165 | */ |
| 150 | if (softirq_count() == SOFTIRQ_OFFSET) | 166 | if (softirq_count() == SOFTIRQ_DISABLE_OFFSET) |
| 151 | trace_softirqs_on(ip); | 167 | trace_softirqs_on(ip); |
| 152 | /* | 168 | /* |
| 153 | * Keep preemption disabled until we are done with | 169 | * Keep preemption disabled until we are done with |
| 154 | * softirq processing: | 170 | * softirq processing: |
| 155 | */ | 171 | */ |
| 156 | sub_preempt_count(SOFTIRQ_OFFSET - 1); | 172 | sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1); |
| 157 | 173 | ||
| 158 | if (unlikely(!in_interrupt() && local_softirq_pending())) | 174 | if (unlikely(!in_interrupt() && local_softirq_pending())) |
| 159 | do_softirq(); | 175 | do_softirq(); |
| @@ -198,7 +214,8 @@ asmlinkage void __do_softirq(void) | |||
| 198 | pending = local_softirq_pending(); | 214 | pending = local_softirq_pending(); |
| 199 | account_system_vtime(current); | 215 | account_system_vtime(current); |
| 200 | 216 | ||
| 201 | __local_bh_disable((unsigned long)__builtin_return_address(0)); | 217 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
| 218 | SOFTIRQ_OFFSET); | ||
| 202 | lockdep_softirq_enter(); | 219 | lockdep_softirq_enter(); |
| 203 | 220 | ||
| 204 | cpu = smp_processor_id(); | 221 | cpu = smp_processor_id(); |
| @@ -245,7 +262,7 @@ restart: | |||
| 245 | lockdep_softirq_exit(); | 262 | lockdep_softirq_exit(); |
| 246 | 263 | ||
| 247 | account_system_vtime(current); | 264 | account_system_vtime(current); |
| 248 | _local_bh_enable(); | 265 | __local_bh_enable(SOFTIRQ_OFFSET); |
| 249 | } | 266 | } |
| 250 | 267 | ||
| 251 | #ifndef __ARCH_HAS_DO_SOFTIRQ | 268 | #ifndef __ARCH_HAS_DO_SOFTIRQ |
| @@ -279,10 +296,16 @@ void irq_enter(void) | |||
| 279 | 296 | ||
| 280 | rcu_irq_enter(); | 297 | rcu_irq_enter(); |
| 281 | if (idle_cpu(cpu) && !in_interrupt()) { | 298 | if (idle_cpu(cpu) && !in_interrupt()) { |
| 282 | __irq_enter(); | 299 | /* |
| 300 | * Prevent raise_softirq from needlessly waking up ksoftirqd | ||
| 301 | * here, as softirq will be serviced on return from interrupt. | ||
| 302 | */ | ||
| 303 | local_bh_disable(); | ||
| 283 | tick_check_idle(cpu); | 304 | tick_check_idle(cpu); |
| 284 | } else | 305 | _local_bh_enable(); |
| 285 | __irq_enter(); | 306 | } |
| 307 | |||
| 308 | __irq_enter(); | ||
| 286 | } | 309 | } |
| 287 | 310 | ||
| 288 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | 311 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED |
| @@ -696,6 +719,7 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
| 696 | { | 719 | { |
| 697 | set_current_state(TASK_INTERRUPTIBLE); | 720 | set_current_state(TASK_INTERRUPTIBLE); |
| 698 | 721 | ||
| 722 | current->flags |= PF_KSOFTIRQD; | ||
| 699 | while (!kthread_should_stop()) { | 723 | while (!kthread_should_stop()) { |
| 700 | preempt_disable(); | 724 | preempt_disable(); |
| 701 | if (!local_softirq_pending()) { | 725 | if (!local_softirq_pending()) { |
| @@ -886,17 +910,14 @@ int __init __weak early_irq_init(void) | |||
| 886 | return 0; | 910 | return 0; |
| 887 | } | 911 | } |
| 888 | 912 | ||
| 913 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
| 889 | int __init __weak arch_probe_nr_irqs(void) | 914 | int __init __weak arch_probe_nr_irqs(void) |
| 890 | { | 915 | { |
| 891 | return 0; | 916 | return NR_IRQS_LEGACY; |
| 892 | } | 917 | } |
| 893 | 918 | ||
| 894 | int __init __weak arch_early_irq_init(void) | 919 | int __init __weak arch_early_irq_init(void) |
| 895 | { | 920 | { |
| 896 | return 0; | 921 | return 0; |
| 897 | } | 922 | } |
| 898 | 923 | #endif | |
| 899 | int __weak arch_init_chip_data(struct irq_desc *desc, int node) | ||
| 900 | { | ||
| 901 | return 0; | ||
| 902 | } | ||
diff --git a/kernel/srcu.c b/kernel/srcu.c index 2980da3fd509..c71e07500536 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
| @@ -46,11 +46,9 @@ static int init_srcu_struct_fields(struct srcu_struct *sp) | |||
| 46 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, | 46 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, |
| 47 | struct lock_class_key *key) | 47 | struct lock_class_key *key) |
| 48 | { | 48 | { |
| 49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 50 | /* Don't re-initialize a lock while it is held. */ | 49 | /* Don't re-initialize a lock while it is held. */ |
| 51 | debug_check_no_locks_freed((void *)sp, sizeof(*sp)); | 50 | debug_check_no_locks_freed((void *)sp, sizeof(*sp)); |
| 52 | lockdep_init_map(&sp->dep_map, name, key, 0); | 51 | lockdep_init_map(&sp->dep_map, name, key, 0); |
| 53 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
| 54 | return init_srcu_struct_fields(sp); | 52 | return init_srcu_struct_fields(sp); |
| 55 | } | 53 | } |
| 56 | EXPORT_SYMBOL_GPL(__init_srcu_struct); | 54 | EXPORT_SYMBOL_GPL(__init_srcu_struct); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 4372ccb25127..090c28812ce1 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -287,11 +287,12 @@ repeat: | |||
| 287 | goto repeat; | 287 | goto repeat; |
| 288 | } | 288 | } |
| 289 | 289 | ||
| 290 | extern void sched_set_stop_task(int cpu, struct task_struct *stop); | ||
| 291 | |||
| 290 | /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ | 292 | /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ |
| 291 | static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | 293 | static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, |
| 292 | unsigned long action, void *hcpu) | 294 | unsigned long action, void *hcpu) |
| 293 | { | 295 | { |
| 294 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | ||
| 295 | unsigned int cpu = (unsigned long)hcpu; | 296 | unsigned int cpu = (unsigned long)hcpu; |
| 296 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | 297 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); |
| 297 | struct task_struct *p; | 298 | struct task_struct *p; |
| @@ -304,13 +305,13 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
| 304 | cpu); | 305 | cpu); |
| 305 | if (IS_ERR(p)) | 306 | if (IS_ERR(p)) |
| 306 | return NOTIFY_BAD; | 307 | return NOTIFY_BAD; |
| 307 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
| 308 | get_task_struct(p); | 308 | get_task_struct(p); |
| 309 | kthread_bind(p, cpu); | ||
| 310 | sched_set_stop_task(cpu, p); | ||
| 309 | stopper->thread = p; | 311 | stopper->thread = p; |
| 310 | break; | 312 | break; |
| 311 | 313 | ||
| 312 | case CPU_ONLINE: | 314 | case CPU_ONLINE: |
| 313 | kthread_bind(stopper->thread, cpu); | ||
| 314 | /* strictly unnecessary, as first user will wake it */ | 315 | /* strictly unnecessary, as first user will wake it */ |
| 315 | wake_up_process(stopper->thread); | 316 | wake_up_process(stopper->thread); |
| 316 | /* mark enabled */ | 317 | /* mark enabled */ |
| @@ -325,6 +326,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
| 325 | { | 326 | { |
| 326 | struct cpu_stop_work *work; | 327 | struct cpu_stop_work *work; |
| 327 | 328 | ||
| 329 | sched_set_stop_task(cpu, NULL); | ||
| 328 | /* kill the stopper */ | 330 | /* kill the stopper */ |
| 329 | kthread_stop(stopper->thread); | 331 | kthread_stop(stopper->thread); |
| 330 | /* drain remaining works */ | 332 | /* drain remaining works */ |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index c63116863a80..d2321891538f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -149,10 +149,18 @@ static void ntp_update_offset(long offset) | |||
| 149 | time_reftime = get_seconds(); | 149 | time_reftime = get_seconds(); |
| 150 | 150 | ||
| 151 | offset64 = offset; | 151 | offset64 = offset; |
| 152 | freq_adj = (offset64 * secs) << | 152 | freq_adj = ntp_update_offset_fll(offset64, secs); |
| 153 | (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); | ||
| 154 | 153 | ||
| 155 | freq_adj += ntp_update_offset_fll(offset64, secs); | 154 | /* |
| 155 | * Clamp update interval to reduce PLL gain with low | ||
| 156 | * sampling rate (e.g. intermittent network connection) | ||
| 157 | * to avoid instability. | ||
| 158 | */ | ||
| 159 | if (unlikely(secs > 1 << (SHIFT_PLL + 1 + time_constant))) | ||
| 160 | secs = 1 << (SHIFT_PLL + 1 + time_constant); | ||
| 161 | |||
| 162 | freq_adj += (offset64 * secs) << | ||
| 163 | (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); | ||
| 156 | 164 | ||
| 157 | freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); | 165 | freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); |
| 158 | 166 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 65fb077ea79c..ebd80d50c474 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -1638,8 +1638,8 @@ ftrace_failures_open(struct inode *inode, struct file *file) | |||
| 1638 | 1638 | ||
| 1639 | ret = ftrace_avail_open(inode, file); | 1639 | ret = ftrace_avail_open(inode, file); |
| 1640 | if (!ret) { | 1640 | if (!ret) { |
| 1641 | m = (struct seq_file *)file->private_data; | 1641 | m = file->private_data; |
| 1642 | iter = (struct ftrace_iterator *)m->private; | 1642 | iter = m->private; |
| 1643 | iter->flags = FTRACE_ITER_FAILURES; | 1643 | iter->flags = FTRACE_ITER_FAILURES; |
| 1644 | } | 1644 | } |
| 1645 | 1645 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ec59f541156..001bcd2ccf4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -2196,7 +2196,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp) | |||
| 2196 | 2196 | ||
| 2197 | static int tracing_release(struct inode *inode, struct file *file) | 2197 | static int tracing_release(struct inode *inode, struct file *file) |
| 2198 | { | 2198 | { |
| 2199 | struct seq_file *m = (struct seq_file *)file->private_data; | 2199 | struct seq_file *m = file->private_data; |
| 2200 | struct trace_iterator *iter; | 2200 | struct trace_iterator *iter; |
| 2201 | int cpu; | 2201 | int cpu; |
| 2202 | 2202 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d39b3c5454a5..9021f8c0c0c3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr, | |||
| 343 | unsigned long ip, | 343 | unsigned long ip, |
| 344 | unsigned long parent_ip, | 344 | unsigned long parent_ip, |
| 345 | unsigned long flags, int pc); | 345 | unsigned long flags, int pc); |
| 346 | void trace_graph_function(struct trace_array *tr, | ||
| 347 | unsigned long ip, | ||
| 348 | unsigned long parent_ip, | ||
| 349 | unsigned long flags, int pc); | ||
| 346 | void trace_default_header(struct seq_file *m); | 350 | void trace_default_header(struct seq_file *m); |
| 347 | void print_trace_header(struct seq_file *m, struct trace_iterator *iter); | 351 | void print_trace_header(struct seq_file *m, struct trace_iterator *iter); |
| 348 | int trace_empty(struct trace_iterator *iter); | 352 | int trace_empty(struct trace_iterator *iter); |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ef49e9370b25..76b05980225c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -262,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) | |||
| 262 | return trace_graph_entry(trace); | 262 | return trace_graph_entry(trace); |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | static void | ||
| 266 | __trace_graph_function(struct trace_array *tr, | ||
| 267 | unsigned long ip, unsigned long flags, int pc) | ||
| 268 | { | ||
| 269 | u64 time = trace_clock_local(); | ||
| 270 | struct ftrace_graph_ent ent = { | ||
| 271 | .func = ip, | ||
| 272 | .depth = 0, | ||
| 273 | }; | ||
| 274 | struct ftrace_graph_ret ret = { | ||
| 275 | .func = ip, | ||
| 276 | .depth = 0, | ||
| 277 | .calltime = time, | ||
| 278 | .rettime = time, | ||
| 279 | }; | ||
| 280 | |||
| 281 | __trace_graph_entry(tr, &ent, flags, pc); | ||
| 282 | __trace_graph_return(tr, &ret, flags, pc); | ||
| 283 | } | ||
| 284 | |||
| 285 | void | ||
| 286 | trace_graph_function(struct trace_array *tr, | ||
| 287 | unsigned long ip, unsigned long parent_ip, | ||
| 288 | unsigned long flags, int pc) | ||
| 289 | { | ||
| 290 | __trace_graph_function(tr, ip, flags, pc); | ||
| 291 | } | ||
| 292 | |||
| 265 | void __trace_graph_return(struct trace_array *tr, | 293 | void __trace_graph_return(struct trace_array *tr, |
| 266 | struct ftrace_graph_ret *trace, | 294 | struct ftrace_graph_ret *trace, |
| 267 | unsigned long flags, | 295 | unsigned long flags, |
| @@ -888,12 +916,20 @@ check_irq_entry(struct trace_iterator *iter, u32 flags, | |||
| 888 | unsigned long addr, int depth) | 916 | unsigned long addr, int depth) |
| 889 | { | 917 | { |
| 890 | int cpu = iter->cpu; | 918 | int cpu = iter->cpu; |
| 919 | int *depth_irq; | ||
| 891 | struct fgraph_data *data = iter->private; | 920 | struct fgraph_data *data = iter->private; |
| 892 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 893 | 921 | ||
| 894 | if (flags & TRACE_GRAPH_PRINT_IRQS) | 922 | /* |
| 923 | * If we are either displaying irqs, or we got called as | ||
| 924 | * a graph event and private data does not exist, | ||
| 925 | * then we bypass the irq check. | ||
| 926 | */ | ||
| 927 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
| 928 | (!data)) | ||
| 895 | return 0; | 929 | return 0; |
| 896 | 930 | ||
| 931 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 932 | |||
| 897 | /* | 933 | /* |
| 898 | * We are inside the irq code | 934 | * We are inside the irq code |
| 899 | */ | 935 | */ |
| @@ -926,12 +962,20 @@ static int | |||
| 926 | check_irq_return(struct trace_iterator *iter, u32 flags, int depth) | 962 | check_irq_return(struct trace_iterator *iter, u32 flags, int depth) |
| 927 | { | 963 | { |
| 928 | int cpu = iter->cpu; | 964 | int cpu = iter->cpu; |
| 965 | int *depth_irq; | ||
| 929 | struct fgraph_data *data = iter->private; | 966 | struct fgraph_data *data = iter->private; |
| 930 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 931 | 967 | ||
| 932 | if (flags & TRACE_GRAPH_PRINT_IRQS) | 968 | /* |
| 969 | * If we are either displaying irqs, or we got called as | ||
| 970 | * a graph event and private data does not exist, | ||
| 971 | * then we bypass the irq check. | ||
| 972 | */ | ||
| 973 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
| 974 | (!data)) | ||
| 933 | return 0; | 975 | return 0; |
| 934 | 976 | ||
| 977 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 978 | |||
| 935 | /* | 979 | /* |
| 936 | * We are not inside the irq code. | 980 | * We are not inside the irq code. |
| 937 | */ | 981 | */ |
| @@ -1163,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 1163 | 1207 | ||
| 1164 | 1208 | ||
| 1165 | enum print_line_t | 1209 | enum print_line_t |
| 1166 | print_graph_function_flags(struct trace_iterator *iter, u32 flags) | 1210 | __print_graph_function_flags(struct trace_iterator *iter, u32 flags) |
| 1167 | { | 1211 | { |
| 1168 | struct ftrace_graph_ent_entry *field; | 1212 | struct ftrace_graph_ent_entry *field; |
| 1169 | struct fgraph_data *data = iter->private; | 1213 | struct fgraph_data *data = iter->private; |
| @@ -1226,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) | |||
| 1226 | static enum print_line_t | 1270 | static enum print_line_t |
| 1227 | print_graph_function(struct trace_iterator *iter) | 1271 | print_graph_function(struct trace_iterator *iter) |
| 1228 | { | 1272 | { |
| 1229 | return print_graph_function_flags(iter, tracer_flags.val); | 1273 | return __print_graph_function_flags(iter, tracer_flags.val); |
| 1274 | } | ||
| 1275 | |||
| 1276 | enum print_line_t print_graph_function_flags(struct trace_iterator *iter, | ||
| 1277 | u32 flags) | ||
| 1278 | { | ||
| 1279 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
| 1280 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 1281 | else | ||
| 1282 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 1283 | |||
| 1284 | return __print_graph_function_flags(iter, flags); | ||
| 1230 | } | 1285 | } |
| 1231 | 1286 | ||
| 1232 | static enum print_line_t | 1287 | static enum print_line_t |
| @@ -1258,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags) | |||
| 1258 | seq_printf(s, "#%.*s|||| / \n", size, spaces); | 1313 | seq_printf(s, "#%.*s|||| / \n", size, spaces); |
| 1259 | } | 1314 | } |
| 1260 | 1315 | ||
| 1261 | void print_graph_headers_flags(struct seq_file *s, u32 flags) | 1316 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) |
| 1262 | { | 1317 | { |
| 1263 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; | 1318 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; |
| 1264 | 1319 | ||
| @@ -1299,6 +1354,23 @@ void print_graph_headers(struct seq_file *s) | |||
| 1299 | print_graph_headers_flags(s, tracer_flags.val); | 1354 | print_graph_headers_flags(s, tracer_flags.val); |
| 1300 | } | 1355 | } |
| 1301 | 1356 | ||
| 1357 | void print_graph_headers_flags(struct seq_file *s, u32 flags) | ||
| 1358 | { | ||
| 1359 | struct trace_iterator *iter = s->private; | ||
| 1360 | |||
| 1361 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
| 1362 | /* print nothing if the buffers are empty */ | ||
| 1363 | if (trace_empty(iter)) | ||
| 1364 | return; | ||
| 1365 | |||
| 1366 | print_trace_header(s, iter); | ||
| 1367 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 1368 | } else | ||
| 1369 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 1370 | |||
| 1371 | __print_graph_headers_flags(s, flags); | ||
| 1372 | } | ||
| 1373 | |||
| 1302 | void graph_trace_open(struct trace_iterator *iter) | 1374 | void graph_trace_open(struct trace_iterator *iter) |
| 1303 | { | 1375 | { |
| 1304 | /* pid and depth on the last trace processed */ | 1376 | /* pid and depth on the last trace processed */ |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 73a6b0601f2e..5cf8c602b880 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
| @@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence; | |||
| 87 | 87 | ||
| 88 | #ifdef CONFIG_FUNCTION_TRACER | 88 | #ifdef CONFIG_FUNCTION_TRACER |
| 89 | /* | 89 | /* |
| 90 | * irqsoff uses its own tracer function to keep the overhead down: | 90 | * Prologue for the preempt and irqs off function tracers. |
| 91 | * | ||
| 92 | * Returns 1 if it is OK to continue, and data->disabled is | ||
| 93 | * incremented. | ||
| 94 | * 0 if the trace is to be ignored, and data->disabled | ||
| 95 | * is kept the same. | ||
| 96 | * | ||
| 97 | * Note, this function is also used outside this ifdef but | ||
| 98 | * inside the #ifdef of the function graph tracer below. | ||
| 99 | * This is OK, since the function graph tracer is | ||
| 100 | * dependent on the function tracer. | ||
| 91 | */ | 101 | */ |
| 92 | static void | 102 | static int func_prolog_dec(struct trace_array *tr, |
| 93 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | 103 | struct trace_array_cpu **data, |
| 104 | unsigned long *flags) | ||
| 94 | { | 105 | { |
| 95 | struct trace_array *tr = irqsoff_trace; | ||
| 96 | struct trace_array_cpu *data; | ||
| 97 | unsigned long flags; | ||
| 98 | long disabled; | 106 | long disabled; |
| 99 | int cpu; | 107 | int cpu; |
| 100 | 108 | ||
| @@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | |||
| 106 | */ | 114 | */ |
| 107 | cpu = raw_smp_processor_id(); | 115 | cpu = raw_smp_processor_id(); |
| 108 | if (likely(!per_cpu(tracing_cpu, cpu))) | 116 | if (likely(!per_cpu(tracing_cpu, cpu))) |
| 109 | return; | 117 | return 0; |
| 110 | 118 | ||
| 111 | local_save_flags(flags); | 119 | local_save_flags(*flags); |
| 112 | /* slight chance to get a false positive on tracing_cpu */ | 120 | /* slight chance to get a false positive on tracing_cpu */ |
| 113 | if (!irqs_disabled_flags(flags)) | 121 | if (!irqs_disabled_flags(*flags)) |
| 114 | return; | 122 | return 0; |
| 115 | 123 | ||
| 116 | data = tr->data[cpu]; | 124 | *data = tr->data[cpu]; |
| 117 | disabled = atomic_inc_return(&data->disabled); | 125 | disabled = atomic_inc_return(&(*data)->disabled); |
| 118 | 126 | ||
| 119 | if (likely(disabled == 1)) | 127 | if (likely(disabled == 1)) |
| 120 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | 128 | return 1; |
| 129 | |||
| 130 | atomic_dec(&(*data)->disabled); | ||
| 131 | |||
| 132 | return 0; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* | ||
| 136 | * irqsoff uses its own tracer function to keep the overhead down: | ||
| 137 | */ | ||
| 138 | static void | ||
| 139 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
| 140 | { | ||
| 141 | struct trace_array *tr = irqsoff_trace; | ||
| 142 | struct trace_array_cpu *data; | ||
| 143 | unsigned long flags; | ||
| 144 | |||
| 145 | if (!func_prolog_dec(tr, &data, &flags)) | ||
| 146 | return; | ||
| 147 | |||
| 148 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | ||
| 121 | 149 | ||
| 122 | atomic_dec(&data->disabled); | 150 | atomic_dec(&data->disabled); |
| 123 | } | 151 | } |
| @@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) | |||
| 155 | struct trace_array *tr = irqsoff_trace; | 183 | struct trace_array *tr = irqsoff_trace; |
| 156 | struct trace_array_cpu *data; | 184 | struct trace_array_cpu *data; |
| 157 | unsigned long flags; | 185 | unsigned long flags; |
| 158 | long disabled; | ||
| 159 | int ret; | 186 | int ret; |
| 160 | int cpu; | ||
| 161 | int pc; | 187 | int pc; |
| 162 | 188 | ||
| 163 | cpu = raw_smp_processor_id(); | 189 | if (!func_prolog_dec(tr, &data, &flags)) |
| 164 | if (likely(!per_cpu(tracing_cpu, cpu))) | ||
| 165 | return 0; | 190 | return 0; |
| 166 | 191 | ||
| 167 | local_save_flags(flags); | 192 | pc = preempt_count(); |
| 168 | /* slight chance to get a false positive on tracing_cpu */ | 193 | ret = __trace_graph_entry(tr, trace, flags, pc); |
| 169 | if (!irqs_disabled_flags(flags)) | ||
| 170 | return 0; | ||
| 171 | |||
| 172 | data = tr->data[cpu]; | ||
| 173 | disabled = atomic_inc_return(&data->disabled); | ||
| 174 | |||
| 175 | if (likely(disabled == 1)) { | ||
| 176 | pc = preempt_count(); | ||
| 177 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
| 178 | } else | ||
| 179 | ret = 0; | ||
| 180 | |||
| 181 | atomic_dec(&data->disabled); | 194 | atomic_dec(&data->disabled); |
| 195 | |||
| 182 | return ret; | 196 | return ret; |
| 183 | } | 197 | } |
| 184 | 198 | ||
| @@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) | |||
| 187 | struct trace_array *tr = irqsoff_trace; | 201 | struct trace_array *tr = irqsoff_trace; |
| 188 | struct trace_array_cpu *data; | 202 | struct trace_array_cpu *data; |
| 189 | unsigned long flags; | 203 | unsigned long flags; |
| 190 | long disabled; | ||
| 191 | int cpu; | ||
| 192 | int pc; | 204 | int pc; |
| 193 | 205 | ||
| 194 | cpu = raw_smp_processor_id(); | 206 | if (!func_prolog_dec(tr, &data, &flags)) |
| 195 | if (likely(!per_cpu(tracing_cpu, cpu))) | ||
| 196 | return; | 207 | return; |
| 197 | 208 | ||
| 198 | local_save_flags(flags); | 209 | pc = preempt_count(); |
| 199 | /* slight chance to get a false positive on tracing_cpu */ | 210 | __trace_graph_return(tr, trace, flags, pc); |
| 200 | if (!irqs_disabled_flags(flags)) | ||
| 201 | return; | ||
| 202 | |||
| 203 | data = tr->data[cpu]; | ||
| 204 | disabled = atomic_inc_return(&data->disabled); | ||
| 205 | |||
| 206 | if (likely(disabled == 1)) { | ||
| 207 | pc = preempt_count(); | ||
| 208 | __trace_graph_return(tr, trace, flags, pc); | ||
| 209 | } | ||
| 210 | |||
| 211 | atomic_dec(&data->disabled); | 211 | atomic_dec(&data->disabled); |
| 212 | } | 212 | } |
| 213 | 213 | ||
| @@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter) | |||
| 229 | 229 | ||
| 230 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) | 230 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) |
| 231 | { | 231 | { |
| 232 | u32 flags = GRAPH_TRACER_FLAGS; | ||
| 233 | |||
| 234 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
| 235 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 236 | else | ||
| 237 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 238 | |||
| 239 | /* | 232 | /* |
| 240 | * In graph mode call the graph tracer output function, | 233 | * In graph mode call the graph tracer output function, |
| 241 | * otherwise go with the TRACE_FN event handler | 234 | * otherwise go with the TRACE_FN event handler |
| 242 | */ | 235 | */ |
| 243 | if (is_graph()) | 236 | if (is_graph()) |
| 244 | return print_graph_function_flags(iter, flags); | 237 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); |
| 245 | 238 | ||
| 246 | return TRACE_TYPE_UNHANDLED; | 239 | return TRACE_TYPE_UNHANDLED; |
| 247 | } | 240 | } |
| 248 | 241 | ||
| 249 | static void irqsoff_print_header(struct seq_file *s) | 242 | static void irqsoff_print_header(struct seq_file *s) |
| 250 | { | 243 | { |
| 251 | if (is_graph()) { | 244 | if (is_graph()) |
| 252 | struct trace_iterator *iter = s->private; | 245 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); |
| 253 | u32 flags = GRAPH_TRACER_FLAGS; | 246 | else |
| 254 | |||
| 255 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
| 256 | /* print nothing if the buffers are empty */ | ||
| 257 | if (trace_empty(iter)) | ||
| 258 | return; | ||
| 259 | |||
| 260 | print_trace_header(s, iter); | ||
| 261 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 262 | } else | ||
| 263 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 264 | |||
| 265 | print_graph_headers_flags(s, flags); | ||
| 266 | } else | ||
| 267 | trace_default_header(s); | 247 | trace_default_header(s); |
| 268 | } | 248 | } |
| 269 | 249 | ||
| 270 | static void | 250 | static void |
| 271 | trace_graph_function(struct trace_array *tr, | ||
| 272 | unsigned long ip, unsigned long flags, int pc) | ||
| 273 | { | ||
| 274 | u64 time = trace_clock_local(); | ||
| 275 | struct ftrace_graph_ent ent = { | ||
| 276 | .func = ip, | ||
| 277 | .depth = 0, | ||
| 278 | }; | ||
| 279 | struct ftrace_graph_ret ret = { | ||
| 280 | .func = ip, | ||
| 281 | .depth = 0, | ||
| 282 | .calltime = time, | ||
| 283 | .rettime = time, | ||
| 284 | }; | ||
| 285 | |||
| 286 | __trace_graph_entry(tr, &ent, flags, pc); | ||
| 287 | __trace_graph_return(tr, &ret, flags, pc); | ||
| 288 | } | ||
| 289 | |||
| 290 | static void | ||
| 291 | __trace_function(struct trace_array *tr, | 251 | __trace_function(struct trace_array *tr, |
| 292 | unsigned long ip, unsigned long parent_ip, | 252 | unsigned long ip, unsigned long parent_ip, |
| 293 | unsigned long flags, int pc) | 253 | unsigned long flags, int pc) |
| 294 | { | 254 | { |
| 295 | if (!is_graph()) | 255 | if (is_graph()) |
| 256 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
| 257 | else | ||
| 296 | trace_function(tr, ip, parent_ip, flags, pc); | 258 | trace_function(tr, ip, parent_ip, flags, pc); |
| 297 | else { | ||
| 298 | trace_graph_function(tr, parent_ip, flags, pc); | ||
| 299 | trace_graph_function(tr, ip, flags, pc); | ||
| 300 | } | ||
| 301 | } | 259 | } |
| 302 | 260 | ||
| 303 | #else | 261 | #else |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 4086eae6e81b..7319559ed59f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
| @@ -31,48 +31,98 @@ static int wakeup_rt; | |||
| 31 | static arch_spinlock_t wakeup_lock = | 31 | static arch_spinlock_t wakeup_lock = |
| 32 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 32 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
| 33 | 33 | ||
| 34 | static void wakeup_reset(struct trace_array *tr); | ||
| 34 | static void __wakeup_reset(struct trace_array *tr); | 35 | static void __wakeup_reset(struct trace_array *tr); |
| 36 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace); | ||
| 37 | static void wakeup_graph_return(struct ftrace_graph_ret *trace); | ||
| 35 | 38 | ||
| 36 | static int save_lat_flag; | 39 | static int save_lat_flag; |
| 37 | 40 | ||
| 41 | #define TRACE_DISPLAY_GRAPH 1 | ||
| 42 | |||
| 43 | static struct tracer_opt trace_opts[] = { | ||
| 44 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 45 | /* display latency trace as call graph */ | ||
| 46 | { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, | ||
| 47 | #endif | ||
| 48 | { } /* Empty entry */ | ||
| 49 | }; | ||
| 50 | |||
| 51 | static struct tracer_flags tracer_flags = { | ||
| 52 | .val = 0, | ||
| 53 | .opts = trace_opts, | ||
| 54 | }; | ||
| 55 | |||
| 56 | #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) | ||
| 57 | |||
| 38 | #ifdef CONFIG_FUNCTION_TRACER | 58 | #ifdef CONFIG_FUNCTION_TRACER |
| 59 | |||
| 39 | /* | 60 | /* |
| 40 | * irqsoff uses its own tracer function to keep the overhead down: | 61 | * Prologue for the wakeup function tracers. |
| 62 | * | ||
| 63 | * Returns 1 if it is OK to continue, and preemption | ||
| 64 | * is disabled and data->disabled is incremented. | ||
| 65 | * 0 if the trace is to be ignored, and preemption | ||
| 66 | * is not disabled and data->disabled is | ||
| 67 | * kept the same. | ||
| 68 | * | ||
| 69 | * Note, this function is also used outside this ifdef but | ||
| 70 | * inside the #ifdef of the function graph tracer below. | ||
| 71 | * This is OK, since the function graph tracer is | ||
| 72 | * dependent on the function tracer. | ||
| 41 | */ | 73 | */ |
| 42 | static void | 74 | static int |
| 43 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | 75 | func_prolog_preempt_disable(struct trace_array *tr, |
| 76 | struct trace_array_cpu **data, | ||
| 77 | int *pc) | ||
| 44 | { | 78 | { |
| 45 | struct trace_array *tr = wakeup_trace; | ||
| 46 | struct trace_array_cpu *data; | ||
| 47 | unsigned long flags; | ||
| 48 | long disabled; | 79 | long disabled; |
| 49 | int cpu; | 80 | int cpu; |
| 50 | int pc; | ||
| 51 | 81 | ||
| 52 | if (likely(!wakeup_task)) | 82 | if (likely(!wakeup_task)) |
| 53 | return; | 83 | return 0; |
| 54 | 84 | ||
| 55 | pc = preempt_count(); | 85 | *pc = preempt_count(); |
| 56 | preempt_disable_notrace(); | 86 | preempt_disable_notrace(); |
| 57 | 87 | ||
| 58 | cpu = raw_smp_processor_id(); | 88 | cpu = raw_smp_processor_id(); |
| 59 | if (cpu != wakeup_current_cpu) | 89 | if (cpu != wakeup_current_cpu) |
| 60 | goto out_enable; | 90 | goto out_enable; |
| 61 | 91 | ||
| 62 | data = tr->data[cpu]; | 92 | *data = tr->data[cpu]; |
| 63 | disabled = atomic_inc_return(&data->disabled); | 93 | disabled = atomic_inc_return(&(*data)->disabled); |
| 64 | if (unlikely(disabled != 1)) | 94 | if (unlikely(disabled != 1)) |
| 65 | goto out; | 95 | goto out; |
| 66 | 96 | ||
| 67 | local_irq_save(flags); | 97 | return 1; |
| 68 | 98 | ||
| 69 | trace_function(tr, ip, parent_ip, flags, pc); | 99 | out: |
| 100 | atomic_dec(&(*data)->disabled); | ||
| 101 | |||
| 102 | out_enable: | ||
| 103 | preempt_enable_notrace(); | ||
| 104 | return 0; | ||
| 105 | } | ||
| 70 | 106 | ||
| 107 | /* | ||
| 108 | * wakeup uses its own tracer function to keep the overhead down: | ||
| 109 | */ | ||
| 110 | static void | ||
| 111 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
| 112 | { | ||
| 113 | struct trace_array *tr = wakeup_trace; | ||
| 114 | struct trace_array_cpu *data; | ||
| 115 | unsigned long flags; | ||
| 116 | int pc; | ||
| 117 | |||
| 118 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
| 119 | return; | ||
| 120 | |||
| 121 | local_irq_save(flags); | ||
| 122 | trace_function(tr, ip, parent_ip, flags, pc); | ||
| 71 | local_irq_restore(flags); | 123 | local_irq_restore(flags); |
| 72 | 124 | ||
| 73 | out: | ||
| 74 | atomic_dec(&data->disabled); | 125 | atomic_dec(&data->disabled); |
| 75 | out_enable: | ||
| 76 | preempt_enable_notrace(); | 126 | preempt_enable_notrace(); |
| 77 | } | 127 | } |
| 78 | 128 | ||
| @@ -82,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly = | |||
| 82 | }; | 132 | }; |
| 83 | #endif /* CONFIG_FUNCTION_TRACER */ | 133 | #endif /* CONFIG_FUNCTION_TRACER */ |
| 84 | 134 | ||
| 135 | static int start_func_tracer(int graph) | ||
| 136 | { | ||
| 137 | int ret; | ||
| 138 | |||
| 139 | if (!graph) | ||
| 140 | ret = register_ftrace_function(&trace_ops); | ||
| 141 | else | ||
| 142 | ret = register_ftrace_graph(&wakeup_graph_return, | ||
| 143 | &wakeup_graph_entry); | ||
| 144 | |||
| 145 | if (!ret && tracing_is_enabled()) | ||
| 146 | tracer_enabled = 1; | ||
| 147 | else | ||
| 148 | tracer_enabled = 0; | ||
| 149 | |||
| 150 | return ret; | ||
| 151 | } | ||
| 152 | |||
| 153 | static void stop_func_tracer(int graph) | ||
| 154 | { | ||
| 155 | tracer_enabled = 0; | ||
| 156 | |||
| 157 | if (!graph) | ||
| 158 | unregister_ftrace_function(&trace_ops); | ||
| 159 | else | ||
| 160 | unregister_ftrace_graph(); | ||
| 161 | } | ||
| 162 | |||
| 163 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 164 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
| 165 | { | ||
| 166 | |||
| 167 | if (!(bit & TRACE_DISPLAY_GRAPH)) | ||
| 168 | return -EINVAL; | ||
| 169 | |||
| 170 | if (!(is_graph() ^ set)) | ||
| 171 | return 0; | ||
| 172 | |||
| 173 | stop_func_tracer(!set); | ||
| 174 | |||
| 175 | wakeup_reset(wakeup_trace); | ||
| 176 | tracing_max_latency = 0; | ||
| 177 | |||
| 178 | return start_func_tracer(set); | ||
| 179 | } | ||
| 180 | |||
| 181 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
| 182 | { | ||
| 183 | struct trace_array *tr = wakeup_trace; | ||
| 184 | struct trace_array_cpu *data; | ||
| 185 | unsigned long flags; | ||
| 186 | int pc, ret = 0; | ||
| 187 | |||
| 188 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
| 189 | return 0; | ||
| 190 | |||
| 191 | local_save_flags(flags); | ||
| 192 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
| 193 | atomic_dec(&data->disabled); | ||
| 194 | preempt_enable_notrace(); | ||
| 195 | |||
| 196 | return ret; | ||
| 197 | } | ||
| 198 | |||
| 199 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) | ||
| 200 | { | ||
| 201 | struct trace_array *tr = wakeup_trace; | ||
| 202 | struct trace_array_cpu *data; | ||
| 203 | unsigned long flags; | ||
| 204 | int pc; | ||
| 205 | |||
| 206 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
| 207 | return; | ||
| 208 | |||
| 209 | local_save_flags(flags); | ||
| 210 | __trace_graph_return(tr, trace, flags, pc); | ||
| 211 | atomic_dec(&data->disabled); | ||
| 212 | |||
| 213 | preempt_enable_notrace(); | ||
| 214 | return; | ||
| 215 | } | ||
| 216 | |||
| 217 | static void wakeup_trace_open(struct trace_iterator *iter) | ||
| 218 | { | ||
| 219 | if (is_graph()) | ||
| 220 | graph_trace_open(iter); | ||
| 221 | } | ||
| 222 | |||
| 223 | static void wakeup_trace_close(struct trace_iterator *iter) | ||
| 224 | { | ||
| 225 | if (iter->private) | ||
| 226 | graph_trace_close(iter); | ||
| 227 | } | ||
| 228 | |||
| 229 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) | ||
| 230 | |||
| 231 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
| 232 | { | ||
| 233 | /* | ||
| 234 | * In graph mode call the graph tracer output function, | ||
| 235 | * otherwise go with the TRACE_FN event handler | ||
| 236 | */ | ||
| 237 | if (is_graph()) | ||
| 238 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); | ||
| 239 | |||
| 240 | return TRACE_TYPE_UNHANDLED; | ||
| 241 | } | ||
| 242 | |||
| 243 | static void wakeup_print_header(struct seq_file *s) | ||
| 244 | { | ||
| 245 | if (is_graph()) | ||
| 246 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); | ||
| 247 | else | ||
| 248 | trace_default_header(s); | ||
| 249 | } | ||
| 250 | |||
| 251 | static void | ||
| 252 | __trace_function(struct trace_array *tr, | ||
| 253 | unsigned long ip, unsigned long parent_ip, | ||
| 254 | unsigned long flags, int pc) | ||
| 255 | { | ||
| 256 | if (is_graph()) | ||
| 257 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
| 258 | else | ||
| 259 | trace_function(tr, ip, parent_ip, flags, pc); | ||
| 260 | } | ||
| 261 | #else | ||
| 262 | #define __trace_function trace_function | ||
| 263 | |||
| 264 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
| 265 | { | ||
| 266 | return -EINVAL; | ||
| 267 | } | ||
| 268 | |||
| 269 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
| 270 | { | ||
| 271 | return -1; | ||
| 272 | } | ||
| 273 | |||
| 274 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
| 275 | { | ||
| 276 | return TRACE_TYPE_UNHANDLED; | ||
| 277 | } | ||
| 278 | |||
| 279 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } | ||
| 280 | static void wakeup_print_header(struct seq_file *s) { } | ||
| 281 | static void wakeup_trace_open(struct trace_iterator *iter) { } | ||
| 282 | static void wakeup_trace_close(struct trace_iterator *iter) { } | ||
| 283 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
| 284 | |||
| 85 | /* | 285 | /* |
| 86 | * Should this new latency be reported/recorded? | 286 | * Should this new latency be reported/recorded? |
| 87 | */ | 287 | */ |
| @@ -152,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore, | |||
| 152 | /* The task we are waiting for is waking up */ | 352 | /* The task we are waiting for is waking up */ |
| 153 | data = wakeup_trace->data[wakeup_cpu]; | 353 | data = wakeup_trace->data[wakeup_cpu]; |
| 154 | 354 | ||
| 155 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); | 355 | __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
| 156 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 356 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
| 157 | 357 | ||
| 158 | T0 = data->preempt_timestamp; | 358 | T0 = data->preempt_timestamp; |
| @@ -252,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) | |||
| 252 | * is not called by an assembly function (where as schedule is) | 452 | * is not called by an assembly function (where as schedule is) |
| 253 | * it should be safe to use it here. | 453 | * it should be safe to use it here. |
| 254 | */ | 454 | */ |
| 255 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 455 | __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); |
| 256 | 456 | ||
| 257 | out_locked: | 457 | out_locked: |
| 258 | arch_spin_unlock(&wakeup_lock); | 458 | arch_spin_unlock(&wakeup_lock); |
| @@ -303,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr) | |||
| 303 | */ | 503 | */ |
| 304 | smp_wmb(); | 504 | smp_wmb(); |
| 305 | 505 | ||
| 306 | register_ftrace_function(&trace_ops); | 506 | if (start_func_tracer(is_graph())) |
| 307 | 507 | printk(KERN_ERR "failed to start wakeup tracer\n"); | |
| 308 | if (tracing_is_enabled()) | ||
| 309 | tracer_enabled = 1; | ||
| 310 | else | ||
| 311 | tracer_enabled = 0; | ||
| 312 | 508 | ||
| 313 | return; | 509 | return; |
| 314 | fail_deprobe_wake_new: | 510 | fail_deprobe_wake_new: |
| @@ -320,7 +516,7 @@ fail_deprobe: | |||
| 320 | static void stop_wakeup_tracer(struct trace_array *tr) | 516 | static void stop_wakeup_tracer(struct trace_array *tr) |
| 321 | { | 517 | { |
| 322 | tracer_enabled = 0; | 518 | tracer_enabled = 0; |
| 323 | unregister_ftrace_function(&trace_ops); | 519 | stop_func_tracer(is_graph()); |
| 324 | unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); | 520 | unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); |
| 325 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); | 521 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); |
| 326 | unregister_trace_sched_wakeup(probe_wakeup, NULL); | 522 | unregister_trace_sched_wakeup(probe_wakeup, NULL); |
| @@ -379,9 +575,15 @@ static struct tracer wakeup_tracer __read_mostly = | |||
| 379 | .start = wakeup_tracer_start, | 575 | .start = wakeup_tracer_start, |
| 380 | .stop = wakeup_tracer_stop, | 576 | .stop = wakeup_tracer_stop, |
| 381 | .print_max = 1, | 577 | .print_max = 1, |
| 578 | .print_header = wakeup_print_header, | ||
| 579 | .print_line = wakeup_print_line, | ||
| 580 | .flags = &tracer_flags, | ||
| 581 | .set_flag = wakeup_set_flag, | ||
| 382 | #ifdef CONFIG_FTRACE_SELFTEST | 582 | #ifdef CONFIG_FTRACE_SELFTEST |
| 383 | .selftest = trace_selftest_startup_wakeup, | 583 | .selftest = trace_selftest_startup_wakeup, |
| 384 | #endif | 584 | #endif |
| 585 | .open = wakeup_trace_open, | ||
| 586 | .close = wakeup_trace_close, | ||
| 385 | .use_max_tr = 1, | 587 | .use_max_tr = 1, |
| 386 | }; | 588 | }; |
| 387 | 589 | ||
| @@ -394,9 +596,15 @@ static struct tracer wakeup_rt_tracer __read_mostly = | |||
| 394 | .stop = wakeup_tracer_stop, | 596 | .stop = wakeup_tracer_stop, |
| 395 | .wait_pipe = poll_wait_pipe, | 597 | .wait_pipe = poll_wait_pipe, |
| 396 | .print_max = 1, | 598 | .print_max = 1, |
| 599 | .print_header = wakeup_print_header, | ||
| 600 | .print_line = wakeup_print_line, | ||
| 601 | .flags = &tracer_flags, | ||
| 602 | .set_flag = wakeup_set_flag, | ||
| 397 | #ifdef CONFIG_FTRACE_SELFTEST | 603 | #ifdef CONFIG_FTRACE_SELFTEST |
| 398 | .selftest = trace_selftest_startup_wakeup, | 604 | .selftest = trace_selftest_startup_wakeup, |
| 399 | #endif | 605 | #endif |
| 606 | .open = wakeup_trace_open, | ||
| 607 | .close = wakeup_trace_close, | ||
| 400 | .use_max_tr = 1, | 608 | .use_max_tr = 1, |
| 401 | }; | 609 | }; |
| 402 | 610 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index dc8e16824b51..bafba687a6d8 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -196,7 +196,7 @@ static struct perf_event_attr wd_hw_attr = { | |||
| 196 | }; | 196 | }; |
| 197 | 197 | ||
| 198 | /* Callback function for perf event subsystem */ | 198 | /* Callback function for perf event subsystem */ |
| 199 | void watchdog_overflow_callback(struct perf_event *event, int nmi, | 199 | static void watchdog_overflow_callback(struct perf_event *event, int nmi, |
| 200 | struct perf_sample_data *data, | 200 | struct perf_sample_data *data, |
| 201 | struct pt_regs *regs) | 201 | struct pt_regs *regs) |
| 202 | { | 202 | { |
