aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c331
-rw-r--r--kernel/posix-timers.c40
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/time/ntp.c4
-rw-r--r--kernel/time/tick-sched.c44
-rw-r--r--kernel/trace/trace_sysprof.c1
6 files changed, 107 insertions, 315 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 47e63349d1b2..bda9cb924276 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -442,22 +442,6 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
442static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } 442static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
443#endif 443#endif
444 444
445/*
446 * Check, whether the timer is on the callback pending list
447 */
448static inline int hrtimer_cb_pending(const struct hrtimer *timer)
449{
450 return timer->state & HRTIMER_STATE_PENDING;
451}
452
453/*
454 * Remove a timer from the callback pending list
455 */
456static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
457{
458 list_del_init(&timer->cb_entry);
459}
460
461/* High resolution timer related functions */ 445/* High resolution timer related functions */
462#ifdef CONFIG_HIGH_RES_TIMERS 446#ifdef CONFIG_HIGH_RES_TIMERS
463 447
@@ -651,6 +635,8 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
651{ 635{
652} 636}
653 637
638static void __run_hrtimer(struct hrtimer *timer);
639
654/* 640/*
655 * When High resolution timers are active, try to reprogram. Note, that in case 641 * When High resolution timers are active, try to reprogram. Note, that in case
656 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry 642 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
@@ -661,31 +647,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
661 struct hrtimer_clock_base *base) 647 struct hrtimer_clock_base *base)
662{ 648{
663 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { 649 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
664 650 /*
665 /* Timer is expired, act upon the callback mode */ 651 * XXX: recursion check?
666 switch(timer->cb_mode) { 652 * hrtimer_forward() should round up with timer granularity
667 case HRTIMER_CB_IRQSAFE_PERCPU: 653 * so that we never get into inf recursion here,
668 case HRTIMER_CB_IRQSAFE_UNLOCKED: 654 * it doesn't do that though
669 /* 655 */
670 * This is solely for the sched tick emulation with 656 __run_hrtimer(timer);
671 * dynamic tick support to ensure that we do not 657 return 1;
672 * restart the tick right on the edge and end up with
673 * the tick timer in the softirq ! The calling site
674 * takes care of this. Also used for hrtimer sleeper !
675 */
676 debug_hrtimer_deactivate(timer);
677 return 1;
678 case HRTIMER_CB_SOFTIRQ:
679 /*
680 * Move everything else into the softirq pending list !
681 */
682 list_add_tail(&timer->cb_entry,
683 &base->cpu_base->cb_pending);
684 timer->state = HRTIMER_STATE_PENDING;
685 return 1;
686 default:
687 BUG();
688 }
689 } 658 }
690 return 0; 659 return 0;
691} 660}
@@ -724,11 +693,6 @@ static int hrtimer_switch_to_hres(void)
724 return 1; 693 return 1;
725} 694}
726 695
727static inline void hrtimer_raise_softirq(void)
728{
729 raise_softirq(HRTIMER_SOFTIRQ);
730}
731
732#else 696#else
733 697
734static inline int hrtimer_hres_active(void) { return 0; } 698static inline int hrtimer_hres_active(void) { return 0; }
@@ -747,7 +711,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer,
747{ 711{
748 return 0; 712 return 0;
749} 713}
750static inline void hrtimer_raise_softirq(void) { }
751 714
752#endif /* CONFIG_HIGH_RES_TIMERS */ 715#endif /* CONFIG_HIGH_RES_TIMERS */
753 716
@@ -890,10 +853,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
890 struct hrtimer_clock_base *base, 853 struct hrtimer_clock_base *base,
891 unsigned long newstate, int reprogram) 854 unsigned long newstate, int reprogram)
892{ 855{
893 /* High res. callback list. NOP for !HIGHRES */ 856 if (timer->state & HRTIMER_STATE_ENQUEUED) {
894 if (hrtimer_cb_pending(timer))
895 hrtimer_remove_cb_pending(timer);
896 else {
897 /* 857 /*
898 * Remove the timer from the rbtree and replace the 858 * Remove the timer from the rbtree and replace the
899 * first entry pointer if necessary. 859 * first entry pointer if necessary.
@@ -953,7 +913,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
953{ 913{
954 struct hrtimer_clock_base *base, *new_base; 914 struct hrtimer_clock_base *base, *new_base;
955 unsigned long flags; 915 unsigned long flags;
956 int ret, raise; 916 int ret;
957 917
958 base = lock_hrtimer_base(timer, &flags); 918 base = lock_hrtimer_base(timer, &flags);
959 919
@@ -988,26 +948,8 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
988 enqueue_hrtimer(timer, new_base, 948 enqueue_hrtimer(timer, new_base,
989 new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); 949 new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
990 950
991 /*
992 * The timer may be expired and moved to the cb_pending
993 * list. We can not raise the softirq with base lock held due
994 * to a possible deadlock with runqueue lock.
995 */
996 raise = timer->state == HRTIMER_STATE_PENDING;
997
998 /*
999 * We use preempt_disable to prevent this task from migrating after
1000 * setting up the softirq and raising it. Otherwise, if me migrate
1001 * we will raise the softirq on the wrong CPU.
1002 */
1003 preempt_disable();
1004
1005 unlock_hrtimer_base(timer, &flags); 951 unlock_hrtimer_base(timer, &flags);
1006 952
1007 if (raise)
1008 hrtimer_raise_softirq();
1009 preempt_enable();
1010
1011 return ret; 953 return ret;
1012} 954}
1013EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 955EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
@@ -1192,75 +1134,6 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1192} 1134}
1193EXPORT_SYMBOL_GPL(hrtimer_get_res); 1135EXPORT_SYMBOL_GPL(hrtimer_get_res);
1194 1136
1195static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1196{
1197 spin_lock_irq(&cpu_base->lock);
1198
1199 while (!list_empty(&cpu_base->cb_pending)) {
1200 enum hrtimer_restart (*fn)(struct hrtimer *);
1201 struct hrtimer *timer;
1202 int restart;
1203 int emulate_hardirq_ctx = 0;
1204
1205 timer = list_entry(cpu_base->cb_pending.next,
1206 struct hrtimer, cb_entry);
1207
1208 debug_hrtimer_deactivate(timer);
1209 timer_stats_account_hrtimer(timer);
1210
1211 fn = timer->function;
1212 /*
1213 * A timer might have been added to the cb_pending list
1214 * when it was migrated during a cpu-offline operation.
1215 * Emulate hardirq context for such timers.
1216 */
1217 if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1218 timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
1219 emulate_hardirq_ctx = 1;
1220
1221 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1222 spin_unlock_irq(&cpu_base->lock);
1223
1224 if (unlikely(emulate_hardirq_ctx)) {
1225 local_irq_disable();
1226 restart = fn(timer);
1227 local_irq_enable();
1228 } else
1229 restart = fn(timer);
1230
1231 spin_lock_irq(&cpu_base->lock);
1232
1233 timer->state &= ~HRTIMER_STATE_CALLBACK;
1234 if (restart == HRTIMER_RESTART) {
1235 BUG_ON(hrtimer_active(timer));
1236 /*
1237 * Enqueue the timer, allow reprogramming of the event
1238 * device
1239 */
1240 enqueue_hrtimer(timer, timer->base, 1);
1241 } else if (hrtimer_active(timer)) {
1242 /*
1243 * If the timer was rearmed on another CPU, reprogram
1244 * the event device.
1245 */
1246 struct hrtimer_clock_base *base = timer->base;
1247
1248 if (base->first == &timer->node &&
1249 hrtimer_reprogram(timer, base)) {
1250 /*
1251 * Timer is expired. Thus move it from tree to
1252 * pending list again.
1253 */
1254 __remove_hrtimer(timer, base,
1255 HRTIMER_STATE_PENDING, 0);
1256 list_add_tail(&timer->cb_entry,
1257 &base->cpu_base->cb_pending);
1258 }
1259 }
1260 }
1261 spin_unlock_irq(&cpu_base->lock);
1262}
1263
1264static void __run_hrtimer(struct hrtimer *timer) 1137static void __run_hrtimer(struct hrtimer *timer)
1265{ 1138{
1266 struct hrtimer_clock_base *base = timer->base; 1139 struct hrtimer_clock_base *base = timer->base;
@@ -1268,25 +1141,21 @@ static void __run_hrtimer(struct hrtimer *timer)
1268 enum hrtimer_restart (*fn)(struct hrtimer *); 1141 enum hrtimer_restart (*fn)(struct hrtimer *);
1269 int restart; 1142 int restart;
1270 1143
1144 WARN_ON(!irqs_disabled());
1145
1271 debug_hrtimer_deactivate(timer); 1146 debug_hrtimer_deactivate(timer);
1272 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); 1147 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
1273 timer_stats_account_hrtimer(timer); 1148 timer_stats_account_hrtimer(timer);
1274
1275 fn = timer->function; 1149 fn = timer->function;
1276 if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || 1150
1277 timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { 1151 /*
1278 /* 1152 * Because we run timers from hardirq context, there is no chance
1279 * Used for scheduler timers, avoid lock inversion with 1153 * they get migrated to another cpu, therefore its safe to unlock
1280 * rq->lock and tasklist_lock. 1154 * the timer base.
1281 * 1155 */
1282 * These timers are required to deal with enqueue expiry 1156 spin_unlock(&cpu_base->lock);
1283 * themselves and are not allowed to migrate. 1157 restart = fn(timer);
1284 */ 1158 spin_lock(&cpu_base->lock);
1285 spin_unlock(&cpu_base->lock);
1286 restart = fn(timer);
1287 spin_lock(&cpu_base->lock);
1288 } else
1289 restart = fn(timer);
1290 1159
1291 /* 1160 /*
1292 * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid 1161 * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
@@ -1311,7 +1180,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1311 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1180 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1312 struct hrtimer_clock_base *base; 1181 struct hrtimer_clock_base *base;
1313 ktime_t expires_next, now; 1182 ktime_t expires_next, now;
1314 int i, raise = 0; 1183 int i;
1315 1184
1316 BUG_ON(!cpu_base->hres_active); 1185 BUG_ON(!cpu_base->hres_active);
1317 cpu_base->nr_events++; 1186 cpu_base->nr_events++;
@@ -1360,16 +1229,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1360 break; 1229 break;
1361 } 1230 }
1362 1231
1363 /* Move softirq callbacks to the pending list */
1364 if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1365 __remove_hrtimer(timer, base,
1366 HRTIMER_STATE_PENDING, 0);
1367 list_add_tail(&timer->cb_entry,
1368 &base->cpu_base->cb_pending);
1369 raise = 1;
1370 continue;
1371 }
1372
1373 __run_hrtimer(timer); 1232 __run_hrtimer(timer);
1374 } 1233 }
1375 spin_unlock(&cpu_base->lock); 1234 spin_unlock(&cpu_base->lock);
@@ -1383,10 +1242,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1383 if (tick_program_event(expires_next, 0)) 1242 if (tick_program_event(expires_next, 0))
1384 goto retry; 1243 goto retry;
1385 } 1244 }
1386
1387 /* Raise softirq ? */
1388 if (raise)
1389 raise_softirq(HRTIMER_SOFTIRQ);
1390} 1245}
1391 1246
1392/** 1247/**
@@ -1413,11 +1268,6 @@ void hrtimer_peek_ahead_timers(void)
1413 local_irq_restore(flags); 1268 local_irq_restore(flags);
1414} 1269}
1415 1270
1416static void run_hrtimer_softirq(struct softirq_action *h)
1417{
1418 run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
1419}
1420
1421#endif /* CONFIG_HIGH_RES_TIMERS */ 1271#endif /* CONFIG_HIGH_RES_TIMERS */
1422 1272
1423/* 1273/*
@@ -1429,8 +1279,6 @@ static void run_hrtimer_softirq(struct softirq_action *h)
1429 */ 1279 */
1430void hrtimer_run_pending(void) 1280void hrtimer_run_pending(void)
1431{ 1281{
1432 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1433
1434 if (hrtimer_hres_active()) 1282 if (hrtimer_hres_active())
1435 return; 1283 return;
1436 1284
@@ -1444,8 +1292,6 @@ void hrtimer_run_pending(void)
1444 */ 1292 */
1445 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) 1293 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1446 hrtimer_switch_to_hres(); 1294 hrtimer_switch_to_hres();
1447
1448 run_hrtimer_pending(cpu_base);
1449} 1295}
1450 1296
1451/* 1297/*
@@ -1482,14 +1328,6 @@ void hrtimer_run_queues(void)
1482 hrtimer_get_expires_tv64(timer)) 1328 hrtimer_get_expires_tv64(timer))
1483 break; 1329 break;
1484 1330
1485 if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1486 __remove_hrtimer(timer, base,
1487 HRTIMER_STATE_PENDING, 0);
1488 list_add_tail(&timer->cb_entry,
1489 &base->cpu_base->cb_pending);
1490 continue;
1491 }
1492
1493 __run_hrtimer(timer); 1331 __run_hrtimer(timer);
1494 } 1332 }
1495 spin_unlock(&cpu_base->lock); 1333 spin_unlock(&cpu_base->lock);
@@ -1516,9 +1354,6 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
1516{ 1354{
1517 sl->timer.function = hrtimer_wakeup; 1355 sl->timer.function = hrtimer_wakeup;
1518 sl->task = task; 1356 sl->task = task;
1519#ifdef CONFIG_HIGH_RES_TIMERS
1520 sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
1521#endif
1522} 1357}
1523 1358
1524static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 1359static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
@@ -1655,18 +1490,16 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1655 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 1490 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1656 cpu_base->clock_base[i].cpu_base = cpu_base; 1491 cpu_base->clock_base[i].cpu_base = cpu_base;
1657 1492
1658 INIT_LIST_HEAD(&cpu_base->cb_pending);
1659 hrtimer_init_hres(cpu_base); 1493 hrtimer_init_hres(cpu_base);
1660} 1494}
1661 1495
1662#ifdef CONFIG_HOTPLUG_CPU 1496#ifdef CONFIG_HOTPLUG_CPU
1663 1497
1664static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, 1498static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1665 struct hrtimer_clock_base *new_base, int dcpu) 1499 struct hrtimer_clock_base *new_base)
1666{ 1500{
1667 struct hrtimer *timer; 1501 struct hrtimer *timer;
1668 struct rb_node *node; 1502 struct rb_node *node;
1669 int raise = 0;
1670 1503
1671 while ((node = rb_first(&old_base->active))) { 1504 while ((node = rb_first(&old_base->active))) {
1672 timer = rb_entry(node, struct hrtimer, node); 1505 timer = rb_entry(node, struct hrtimer, node);
@@ -1674,18 +1507,6 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1674 debug_hrtimer_deactivate(timer); 1507 debug_hrtimer_deactivate(timer);
1675 1508
1676 /* 1509 /*
1677 * Should not happen. Per CPU timers should be
1678 * canceled _before_ the migration code is called
1679 */
1680 if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
1681 __remove_hrtimer(timer, old_base,
1682 HRTIMER_STATE_INACTIVE, 0);
1683 WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
1684 timer, timer->function, dcpu);
1685 continue;
1686 }
1687
1688 /*
1689 * Mark it as STATE_MIGRATE not INACTIVE otherwise the 1510 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
1690 * timer could be seen as !active and just vanish away 1511 * timer could be seen as !active and just vanish away
1691 * under us on another CPU 1512 * under us on another CPU
@@ -1693,69 +1514,34 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1693 __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); 1514 __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
1694 timer->base = new_base; 1515 timer->base = new_base;
1695 /* 1516 /*
1696 * Enqueue the timer. Allow reprogramming of the event device 1517 * Enqueue the timers on the new cpu, but do not reprogram
1518 * the timer as that would enable a deadlock between
1519 * hrtimer_enqueue_reprogramm() running the timer and us still
1520 * holding a nested base lock.
1521 *
1522 * Instead we tickle the hrtimer interrupt after the migration
1523 * is done, which will run all expired timers and re-programm
1524 * the timer device.
1697 */ 1525 */
1698 enqueue_hrtimer(timer, new_base, 1); 1526 enqueue_hrtimer(timer, new_base, 0);
1699 1527
1700#ifdef CONFIG_HIGH_RES_TIMERS
1701 /*
1702 * Happens with high res enabled when the timer was
1703 * already expired and the callback mode is
1704 * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
1705 * enqueue code does not move them to the soft irq
1706 * pending list for performance/latency reasons, but
1707 * in the migration state, we need to do that
1708 * otherwise we end up with a stale timer.
1709 */
1710 if (timer->state == HRTIMER_STATE_MIGRATE) {
1711 timer->state = HRTIMER_STATE_PENDING;
1712 list_add_tail(&timer->cb_entry,
1713 &new_base->cpu_base->cb_pending);
1714 raise = 1;
1715 }
1716#endif
1717 /* Clear the migration state bit */ 1528 /* Clear the migration state bit */
1718 timer->state &= ~HRTIMER_STATE_MIGRATE; 1529 timer->state &= ~HRTIMER_STATE_MIGRATE;
1719 } 1530 }
1720 return raise;
1721}
1722
1723#ifdef CONFIG_HIGH_RES_TIMERS
1724static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
1725 struct hrtimer_cpu_base *new_base)
1726{
1727 struct hrtimer *timer;
1728 int raise = 0;
1729
1730 while (!list_empty(&old_base->cb_pending)) {
1731 timer = list_entry(old_base->cb_pending.next,
1732 struct hrtimer, cb_entry);
1733
1734 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
1735 timer->base = &new_base->clock_base[timer->base->index];
1736 list_add_tail(&timer->cb_entry, &new_base->cb_pending);
1737 raise = 1;
1738 }
1739 return raise;
1740}
1741#else
1742static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
1743 struct hrtimer_cpu_base *new_base)
1744{
1745 return 0;
1746} 1531}
1747#endif
1748 1532
1749static void migrate_hrtimers(int cpu) 1533static int migrate_hrtimers(int scpu)
1750{ 1534{
1751 struct hrtimer_cpu_base *old_base, *new_base; 1535 struct hrtimer_cpu_base *old_base, *new_base;
1752 int i, raise = 0; 1536 int dcpu, i;
1753 1537
1754 BUG_ON(cpu_online(cpu)); 1538 BUG_ON(cpu_online(scpu));
1755 old_base = &per_cpu(hrtimer_bases, cpu); 1539 old_base = &per_cpu(hrtimer_bases, scpu);
1756 new_base = &get_cpu_var(hrtimer_bases); 1540 new_base = &get_cpu_var(hrtimer_bases);
1757 1541
1758 tick_cancel_sched_timer(cpu); 1542 dcpu = smp_processor_id();
1543
1544 tick_cancel_sched_timer(scpu);
1759 /* 1545 /*
1760 * The caller is globally serialized and nobody else 1546 * The caller is globally serialized and nobody else
1761 * takes two locks at once, deadlock is not possible. 1547 * takes two locks at once, deadlock is not possible.
@@ -1764,41 +1550,47 @@ static void migrate_hrtimers(int cpu)
1764 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1550 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1765 1551
1766 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1552 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1767 if (migrate_hrtimer_list(&old_base->clock_base[i], 1553 migrate_hrtimer_list(&old_base->clock_base[i],
1768 &new_base->clock_base[i], cpu)) 1554 &new_base->clock_base[i]);
1769 raise = 1;
1770 } 1555 }
1771 1556
1772 if (migrate_hrtimer_pending(old_base, new_base))
1773 raise = 1;
1774
1775 spin_unlock(&old_base->lock); 1557 spin_unlock(&old_base->lock);
1776 spin_unlock_irq(&new_base->lock); 1558 spin_unlock_irq(&new_base->lock);
1777 put_cpu_var(hrtimer_bases); 1559 put_cpu_var(hrtimer_bases);
1778 1560
1779 if (raise) 1561 return dcpu;
1780 hrtimer_raise_softirq(); 1562}
1563
1564static void tickle_timers(void *arg)
1565{
1566 hrtimer_peek_ahead_timers();
1781} 1567}
1568
1782#endif /* CONFIG_HOTPLUG_CPU */ 1569#endif /* CONFIG_HOTPLUG_CPU */
1783 1570
1784static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, 1571static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1785 unsigned long action, void *hcpu) 1572 unsigned long action, void *hcpu)
1786{ 1573{
1787 unsigned int cpu = (long)hcpu; 1574 int scpu = (long)hcpu;
1788 1575
1789 switch (action) { 1576 switch (action) {
1790 1577
1791 case CPU_UP_PREPARE: 1578 case CPU_UP_PREPARE:
1792 case CPU_UP_PREPARE_FROZEN: 1579 case CPU_UP_PREPARE_FROZEN:
1793 init_hrtimers_cpu(cpu); 1580 init_hrtimers_cpu(scpu);
1794 break; 1581 break;
1795 1582
1796#ifdef CONFIG_HOTPLUG_CPU 1583#ifdef CONFIG_HOTPLUG_CPU
1797 case CPU_DEAD: 1584 case CPU_DEAD:
1798 case CPU_DEAD_FROZEN: 1585 case CPU_DEAD_FROZEN:
1799 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); 1586 {
1800 migrate_hrtimers(cpu); 1587 int dcpu;
1588
1589 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
1590 dcpu = migrate_hrtimers(scpu);
1591 smp_call_function_single(dcpu, tickle_timers, NULL, 0);
1801 break; 1592 break;
1593 }
1802#endif 1594#endif
1803 1595
1804 default: 1596 default:
@@ -1817,9 +1609,6 @@ void __init hrtimers_init(void)
1817 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, 1609 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1818 (void *)(long)smp_processor_id()); 1610 (void *)(long)smp_processor_id());
1819 register_cpu_notifier(&hrtimers_nb); 1611 register_cpu_notifier(&hrtimers_nb);
1820#ifdef CONFIG_HIGH_RES_TIMERS
1821 open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
1822#endif
1823} 1612}
1824 1613
1825/** 1614/**
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index a140e44eebba..887c63787de6 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -116,7 +116,7 @@ static DEFINE_SPINLOCK(idr_lock);
116 * must supply functions here, even if the function just returns 116 * must supply functions here, even if the function just returns
117 * ENOSYS. The standard POSIX timer management code assumes the 117 * ENOSYS. The standard POSIX timer management code assumes the
118 * following: 1.) The k_itimer struct (sched.h) is used for the 118 * following: 1.) The k_itimer struct (sched.h) is used for the
119 * timer. 2.) The list, it_lock, it_clock, it_id and it_process 119 * timer. 2.) The list, it_lock, it_clock, it_id and it_pid
120 * fields are not modified by timer code. 120 * fields are not modified by timer code.
121 * 121 *
122 * At this time all functions EXCEPT clock_nanosleep can be 122 * At this time all functions EXCEPT clock_nanosleep can be
@@ -319,7 +319,8 @@ void do_schedule_next_timer(struct siginfo *info)
319 319
320int posix_timer_event(struct k_itimer *timr, int si_private) 320int posix_timer_event(struct k_itimer *timr, int si_private)
321{ 321{
322 int shared, ret; 322 struct task_struct *task;
323 int shared, ret = -1;
323 /* 324 /*
324 * FIXME: if ->sigq is queued we can race with 325 * FIXME: if ->sigq is queued we can race with
325 * dequeue_signal()->do_schedule_next_timer(). 326 * dequeue_signal()->do_schedule_next_timer().
@@ -333,8 +334,13 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
333 */ 334 */
334 timr->sigq->info.si_sys_private = si_private; 335 timr->sigq->info.si_sys_private = si_private;
335 336
336 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID); 337 rcu_read_lock();
337 ret = send_sigqueue(timr->sigq, timr->it_process, shared); 338 task = pid_task(timr->it_pid, PIDTYPE_PID);
339 if (task) {
340 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
341 ret = send_sigqueue(timr->sigq, task, shared);
342 }
343 rcu_read_unlock();
338 /* If we failed to send the signal the timer stops. */ 344 /* If we failed to send the signal the timer stops. */
339 return ret > 0; 345 return ret > 0;
340} 346}
@@ -411,7 +417,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
411 return ret; 417 return ret;
412} 418}
413 419
414static struct task_struct * good_sigevent(sigevent_t * event) 420static struct pid *good_sigevent(sigevent_t * event)
415{ 421{
416 struct task_struct *rtn = current->group_leader; 422 struct task_struct *rtn = current->group_leader;
417 423
@@ -425,7 +431,7 @@ static struct task_struct * good_sigevent(sigevent_t * event)
425 ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) 431 ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
426 return NULL; 432 return NULL;
427 433
428 return rtn; 434 return task_pid(rtn);
429} 435}
430 436
431void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) 437void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
@@ -464,6 +470,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
464 idr_remove(&posix_timers_id, tmr->it_id); 470 idr_remove(&posix_timers_id, tmr->it_id);
465 spin_unlock_irqrestore(&idr_lock, flags); 471 spin_unlock_irqrestore(&idr_lock, flags);
466 } 472 }
473 put_pid(tmr->it_pid);
467 sigqueue_free(tmr->sigq); 474 sigqueue_free(tmr->sigq);
468 kmem_cache_free(posix_timers_cache, tmr); 475 kmem_cache_free(posix_timers_cache, tmr);
469} 476}
@@ -477,7 +484,6 @@ sys_timer_create(const clockid_t which_clock,
477{ 484{
478 struct k_itimer *new_timer; 485 struct k_itimer *new_timer;
479 int error, new_timer_id; 486 int error, new_timer_id;
480 struct task_struct *process;
481 sigevent_t event; 487 sigevent_t event;
482 int it_id_set = IT_ID_NOT_SET; 488 int it_id_set = IT_ID_NOT_SET;
483 489
@@ -531,11 +537,9 @@ sys_timer_create(const clockid_t which_clock,
531 goto out; 537 goto out;
532 } 538 }
533 rcu_read_lock(); 539 rcu_read_lock();
534 process = good_sigevent(&event); 540 new_timer->it_pid = get_pid(good_sigevent(&event));
535 if (process)
536 get_task_struct(process);
537 rcu_read_unlock(); 541 rcu_read_unlock();
538 if (!process) { 542 if (!new_timer->it_pid) {
539 error = -EINVAL; 543 error = -EINVAL;
540 goto out; 544 goto out;
541 } 545 }
@@ -543,8 +547,7 @@ sys_timer_create(const clockid_t which_clock,
543 event.sigev_notify = SIGEV_SIGNAL; 547 event.sigev_notify = SIGEV_SIGNAL;
544 event.sigev_signo = SIGALRM; 548 event.sigev_signo = SIGALRM;
545 event.sigev_value.sival_int = new_timer->it_id; 549 event.sigev_value.sival_int = new_timer->it_id;
546 process = current->group_leader; 550 new_timer->it_pid = get_pid(task_tgid(current));
547 get_task_struct(process);
548 } 551 }
549 552
550 new_timer->it_sigev_notify = event.sigev_notify; 553 new_timer->it_sigev_notify = event.sigev_notify;
@@ -554,7 +557,7 @@ sys_timer_create(const clockid_t which_clock,
554 new_timer->sigq->info.si_code = SI_TIMER; 557 new_timer->sigq->info.si_code = SI_TIMER;
555 558
556 spin_lock_irq(&current->sighand->siglock); 559 spin_lock_irq(&current->sighand->siglock);
557 new_timer->it_process = process; 560 new_timer->it_signal = current->signal;
558 list_add(&new_timer->list, &current->signal->posix_timers); 561 list_add(&new_timer->list, &current->signal->posix_timers);
559 spin_unlock_irq(&current->sighand->siglock); 562 spin_unlock_irq(&current->sighand->siglock);
560 563
@@ -589,8 +592,7 @@ static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
589 timr = idr_find(&posix_timers_id, (int)timer_id); 592 timr = idr_find(&posix_timers_id, (int)timer_id);
590 if (timr) { 593 if (timr) {
591 spin_lock(&timr->it_lock); 594 spin_lock(&timr->it_lock);
592 if (timr->it_process && 595 if (timr->it_signal == current->signal) {
593 same_thread_group(timr->it_process, current)) {
594 spin_unlock(&idr_lock); 596 spin_unlock(&idr_lock);
595 return timr; 597 return timr;
596 } 598 }
@@ -837,8 +839,7 @@ retry_delete:
837 * This keeps any tasks waiting on the spin lock from thinking 839 * This keeps any tasks waiting on the spin lock from thinking
838 * they got something (see the lock code above). 840 * they got something (see the lock code above).
839 */ 841 */
840 put_task_struct(timer->it_process); 842 timer->it_signal = NULL;
841 timer->it_process = NULL;
842 843
843 unlock_timer(timer, flags); 844 unlock_timer(timer, flags);
844 release_posix_timer(timer, IT_ID_SET); 845 release_posix_timer(timer, IT_ID_SET);
@@ -864,8 +865,7 @@ retry_delete:
864 * This keeps any tasks waiting on the spin lock from thinking 865 * This keeps any tasks waiting on the spin lock from thinking
865 * they got something (see the lock code above). 866 * they got something (see the lock code above).
866 */ 867 */
867 put_task_struct(timer->it_process); 868 timer->it_signal = NULL;
868 timer->it_process = NULL;
869 869
870 unlock_timer(timer, flags); 870 unlock_timer(timer, flags);
871 release_posix_timer(timer, IT_ID_SET); 871 release_posix_timer(timer, IT_ID_SET);
diff --git a/kernel/sched.c b/kernel/sched.c
index e4bb1dd7b308..22c532a6f82c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -203,7 +203,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
203 hrtimer_init(&rt_b->rt_period_timer, 203 hrtimer_init(&rt_b->rt_period_timer,
204 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 204 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
205 rt_b->rt_period_timer.function = sched_rt_period_timer; 205 rt_b->rt_period_timer.function = sched_rt_period_timer;
206 rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
207} 206}
208 207
209static inline int rt_bandwidth_enabled(void) 208static inline int rt_bandwidth_enabled(void)
@@ -1139,7 +1138,6 @@ static void init_rq_hrtick(struct rq *rq)
1139 1138
1140 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1139 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1141 rq->hrtick_timer.function = hrtick; 1140 rq->hrtick_timer.function = hrtick;
1142 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
1143} 1141}
1144#else /* CONFIG_SCHED_HRTICK */ 1142#else /* CONFIG_SCHED_HRTICK */
1145static inline void hrtick_clear(struct rq *rq) 1143static inline void hrtick_clear(struct rq *rq)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8ff15e5d486b..f5f793d92415 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -131,7 +131,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
131{ 131{
132 enum hrtimer_restart res = HRTIMER_NORESTART; 132 enum hrtimer_restart res = HRTIMER_NORESTART;
133 133
134 write_seqlock_irq(&xtime_lock); 134 write_seqlock(&xtime_lock);
135 135
136 switch (time_state) { 136 switch (time_state) {
137 case TIME_OK: 137 case TIME_OK:
@@ -164,7 +164,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
164 } 164 }
165 update_vsyscall(&xtime, clock); 165 update_vsyscall(&xtime, clock);
166 166
167 write_sequnlock_irq(&xtime_lock); 167 write_sequnlock(&xtime_lock);
168 168
169 return res; 169 return res;
170} 170}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 342fc9ccab46..8f3fc2582d38 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -247,7 +247,7 @@ void tick_nohz_stop_sched_tick(int inidle)
247 if (need_resched()) 247 if (need_resched())
248 goto end; 248 goto end;
249 249
250 if (unlikely(local_softirq_pending())) { 250 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
251 static int ratelimit; 251 static int ratelimit;
252 252
253 if (ratelimit < 10) { 253 if (ratelimit < 10) {
@@ -282,8 +282,31 @@ void tick_nohz_stop_sched_tick(int inidle)
282 /* Schedule the tick, if we are at least one jiffie off */ 282 /* Schedule the tick, if we are at least one jiffie off */
283 if ((long)delta_jiffies >= 1) { 283 if ((long)delta_jiffies >= 1) {
284 284
285 /*
286 * calculate the expiry time for the next timer wheel
287 * timer
288 */
289 expires = ktime_add_ns(last_update, tick_period.tv64 *
290 delta_jiffies);
291
292 /*
293 * If this cpu is the one which updates jiffies, then
294 * give up the assignment and let it be taken by the
295 * cpu which runs the tick timer next, which might be
296 * this cpu as well. If we don't drop this here the
297 * jiffies might be stale and do_timer() never
298 * invoked.
299 */
300 if (cpu == tick_do_timer_cpu)
301 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
302
285 if (delta_jiffies > 1) 303 if (delta_jiffies > 1)
286 cpu_set(cpu, nohz_cpu_mask); 304 cpu_set(cpu, nohz_cpu_mask);
305
306 /* Skip reprogram of event if its not changed */
307 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
308 goto out;
309
287 /* 310 /*
288 * nohz_stop_sched_tick can be called several times before 311 * nohz_stop_sched_tick can be called several times before
289 * the nohz_restart_sched_tick is called. This happens when 312 * the nohz_restart_sched_tick is called. This happens when
@@ -306,17 +329,6 @@ void tick_nohz_stop_sched_tick(int inidle)
306 rcu_enter_nohz(); 329 rcu_enter_nohz();
307 } 330 }
308 331
309 /*
310 * If this cpu is the one which updates jiffies, then
311 * give up the assignment and let it be taken by the
312 * cpu which runs the tick timer next, which might be
313 * this cpu as well. If we don't drop this here the
314 * jiffies might be stale and do_timer() never
315 * invoked.
316 */
317 if (cpu == tick_do_timer_cpu)
318 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
319
320 ts->idle_sleeps++; 332 ts->idle_sleeps++;
321 333
322 /* 334 /*
@@ -332,12 +344,7 @@ void tick_nohz_stop_sched_tick(int inidle)
332 goto out; 344 goto out;
333 } 345 }
334 346
335 /* 347 /* Mark expiries */
336 * calculate the expiry time for the next timer wheel
337 * timer
338 */
339 expires = ktime_add_ns(last_update, tick_period.tv64 *
340 delta_jiffies);
341 ts->idle_expires = expires; 348 ts->idle_expires = expires;
342 349
343 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 350 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
@@ -681,7 +688,6 @@ void tick_setup_sched_timer(void)
681 */ 688 */
682 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 689 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
683 ts->sched_timer.function = tick_sched_timer; 690 ts->sched_timer.function = tick_sched_timer;
684 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
685 691
686 /* Get the next period (per cpu) */ 692 /* Get the next period (per cpu) */
687 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 693 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..ae542e2e38d5 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,6 @@ static void start_stack_timer(int cpu)
202 202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
206 205
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); 206 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
208} 207}