diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-02-16 04:27:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-16 11:13:58 -0500 |
commit | 1cfd68496e53f7be09a3c1358d1d389004217541 (patch) | |
tree | d854323b9b7821ab45ddea10d97766edce91be99 | |
parent | dde4b2b5f4ed275250488dabdaf282d9c6e7e2b8 (diff) |
[PATCH] Fix cascade lookup of next_timer_interrupt
When searching for the next pending timer in the timer wheel we need to take
the cascade into account. The current code has several problems:
1. it looks into the previous cascade
2. it ignores a pending cascade
3. it ignores multiple cascades
Change the cascade lookup, so it calculates the array index from the point of
the next cascade and always look at the cascade buckets, when the cascade is
pending, i.e. gets executed in the next timer softirq. When multiple
cascades are pending, then lookup the next buckets too.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | kernel/timer.c | 151 |
1 files changed, 81 insertions, 70 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index b68a21a82e17..201bee07b8e0 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -597,99 +597,110 @@ static inline void __run_timers(tvec_base_t *base) | |||
597 | * is used on S/390 to stop all activity when a cpus is idle. | 597 | * is used on S/390 to stop all activity when a cpus is idle. |
598 | * This functions needs to be called disabled. | 598 | * This functions needs to be called disabled. |
599 | */ | 599 | */ |
600 | unsigned long next_timer_interrupt(void) | 600 | static unsigned long __next_timer_interrupt(tvec_base_t *base) |
601 | { | 601 | { |
602 | tvec_base_t *base; | 602 | unsigned long timer_jiffies = base->timer_jiffies; |
603 | struct list_head *list; | 603 | unsigned long expires = timer_jiffies + (LONG_MAX >> 1); |
604 | int index, slot, array, found = 0; | ||
604 | struct timer_list *nte; | 605 | struct timer_list *nte; |
605 | unsigned long expires; | ||
606 | unsigned long hr_expires = MAX_JIFFY_OFFSET; | ||
607 | ktime_t hr_delta; | ||
608 | tvec_t *varray[4]; | 606 | tvec_t *varray[4]; |
609 | int i, j; | ||
610 | |||
611 | hr_delta = hrtimer_get_next_event(); | ||
612 | if (hr_delta.tv64 != KTIME_MAX) { | ||
613 | struct timespec tsdelta; | ||
614 | tsdelta = ktime_to_timespec(hr_delta); | ||
615 | hr_expires = timespec_to_jiffies(&tsdelta); | ||
616 | if (hr_expires < 3) | ||
617 | return hr_expires + jiffies; | ||
618 | } | ||
619 | hr_expires += jiffies; | ||
620 | |||
621 | base = __get_cpu_var(tvec_bases); | ||
622 | spin_lock(&base->lock); | ||
623 | expires = base->timer_jiffies + (LONG_MAX >> 1); | ||
624 | list = NULL; | ||
625 | 607 | ||
626 | /* Look for timer events in tv1. */ | 608 | /* Look for timer events in tv1. */ |
627 | j = base->timer_jiffies & TVR_MASK; | 609 | index = slot = timer_jiffies & TVR_MASK; |
628 | do { | 610 | do { |
629 | list_for_each_entry(nte, base->tv1.vec + j, entry) { | 611 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { |
612 | found = 1; | ||
630 | expires = nte->expires; | 613 | expires = nte->expires; |
631 | if (j < (base->timer_jiffies & TVR_MASK)) | 614 | /* Look at the cascade bucket(s)? */ |
632 | list = base->tv2.vec + (INDEX(0)); | 615 | if (!index || slot < index) |
633 | goto found; | 616 | goto cascade; |
617 | return expires; | ||
634 | } | 618 | } |
635 | j = (j + 1) & TVR_MASK; | 619 | slot = (slot + 1) & TVR_MASK; |
636 | } while (j != (base->timer_jiffies & TVR_MASK)); | 620 | } while (slot != index); |
621 | |||
622 | cascade: | ||
623 | /* Calculate the next cascade event */ | ||
624 | if (index) | ||
625 | timer_jiffies += TVR_SIZE - index; | ||
626 | timer_jiffies >>= TVR_BITS; | ||
637 | 627 | ||
638 | /* Check tv2-tv5. */ | 628 | /* Check tv2-tv5. */ |
639 | varray[0] = &base->tv2; | 629 | varray[0] = &base->tv2; |
640 | varray[1] = &base->tv3; | 630 | varray[1] = &base->tv3; |
641 | varray[2] = &base->tv4; | 631 | varray[2] = &base->tv4; |
642 | varray[3] = &base->tv5; | 632 | varray[3] = &base->tv5; |
643 | for (i = 0; i < 4; i++) { | 633 | |
644 | j = INDEX(i); | 634 | for (array = 0; array < 4; array++) { |
635 | tvec_t *varp = varray[array]; | ||
636 | |||
637 | index = slot = timer_jiffies & TVN_MASK; | ||
645 | do { | 638 | do { |
646 | if (list_empty(varray[i]->vec + j)) { | 639 | list_for_each_entry(nte, varp->vec + slot, entry) { |
647 | j = (j + 1) & TVN_MASK; | 640 | found = 1; |
648 | continue; | ||
649 | } | ||
650 | list_for_each_entry(nte, varray[i]->vec + j, entry) | ||
651 | if (time_before(nte->expires, expires)) | 641 | if (time_before(nte->expires, expires)) |
652 | expires = nte->expires; | 642 | expires = nte->expires; |
653 | if (j < (INDEX(i)) && i < 3) | 643 | } |
654 | list = varray[i + 1]->vec + (INDEX(i + 1)); | 644 | /* |
655 | goto found; | 645 | * Do we still search for the first timer or are |
656 | } while (j != (INDEX(i))); | 646 | * we looking up the cascade buckets ? |
657 | } | 647 | */ |
658 | found: | 648 | if (found) { |
659 | if (list) { | 649 | /* Look at the cascade bucket(s)? */ |
660 | /* | 650 | if (!index || slot < index) |
661 | * The search wrapped. We need to look at the next list | 651 | break; |
662 | * from next tv element that would cascade into tv element | 652 | return expires; |
663 | * where we found the timer element. | 653 | } |
664 | */ | 654 | slot = (slot + 1) & TVN_MASK; |
665 | list_for_each_entry(nte, list, entry) { | 655 | } while (slot != index); |
666 | if (time_before(nte->expires, expires)) | 656 | |
667 | expires = nte->expires; | 657 | if (index) |
668 | } | 658 | timer_jiffies += TVN_SIZE - index; |
659 | timer_jiffies >>= TVN_BITS; | ||
669 | } | 660 | } |
670 | spin_unlock(&base->lock); | 661 | return expires; |
662 | } | ||
671 | 663 | ||
672 | /* | 664 | /* |
673 | * It can happen that other CPUs service timer IRQs and increment | 665 | * Check, if the next hrtimer event is before the next timer wheel |
674 | * jiffies, but we have not yet got a local timer tick to process | 666 | * event: |
675 | * the timer wheels. In that case, the expiry time can be before | 667 | */ |
676 | * jiffies, but since the high-resolution timer here is relative to | 668 | static unsigned long cmp_next_hrtimer_event(unsigned long now, |
677 | * jiffies, the default expression when high-resolution timers are | 669 | unsigned long expires) |
678 | * not active, | 670 | { |
679 | * | 671 | ktime_t hr_delta = hrtimer_get_next_event(); |
680 | * time_before(MAX_JIFFY_OFFSET + jiffies, expires) | 672 | struct timespec tsdelta; |
681 | * | ||
682 | * would falsely evaluate to true. If that is the case, just | ||
683 | * return jiffies so that we can immediately fire the local timer | ||
684 | */ | ||
685 | if (time_before(expires, jiffies)) | ||
686 | return jiffies; | ||
687 | 673 | ||
688 | if (time_before(hr_expires, expires)) | 674 | if (hr_delta.tv64 == KTIME_MAX) |
689 | return hr_expires; | 675 | return expires; |
690 | 676 | ||
677 | if (hr_delta.tv64 <= TICK_NSEC) | ||
678 | return now; | ||
679 | |||
680 | tsdelta = ktime_to_timespec(hr_delta); | ||
681 | now += timespec_to_jiffies(&tsdelta); | ||
682 | if (time_before(now, expires)) | ||
683 | return now; | ||
691 | return expires; | 684 | return expires; |
692 | } | 685 | } |
686 | |||
687 | /** | ||
688 | * next_timer_interrupt - return the jiffy of the next pending timer | ||
689 | */ | ||
690 | unsigned long next_timer_interrupt(void) | ||
691 | { | ||
692 | tvec_base_t *base = __get_cpu_var(tvec_bases); | ||
693 | unsigned long expires, now = jiffies; | ||
694 | |||
695 | spin_lock(&base->lock); | ||
696 | expires = __next_timer_interrupt(base); | ||
697 | spin_unlock(&base->lock); | ||
698 | |||
699 | if (time_before_eq(expires, now)) | ||
700 | return now; | ||
701 | |||
702 | return cmp_next_hrtimer_event(now, expires); | ||
703 | } | ||
693 | #endif | 704 | #endif |
694 | 705 | ||
695 | /******************************************************************/ | 706 | /******************************************************************/ |