aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2008-12-31 09:11:41 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-12-31 09:11:48 -0500
commit9cfb9b3c3a7361c793c031e9c3583b177ac5debd (patch)
treeb735c71e6fee3fd1464c21ce53f93d98ceddf90d
parent6f43092441bda528dd38f2dc6c1e2522c5079fb7 (diff)
[PATCH] improve idle cputime accounting
Distinguish the cputime of the idle process where idle is actually using cpu cycles from the cputime where idle is sleeping on an enabled wait psw. The former is accounted as system time, the later as idle time. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/include/asm/cpu.h4
-rw-r--r--arch/s390/include/asm/timer.h16
-rw-r--r--arch/s390/kernel/entry.S5
-rw-r--r--arch/s390/kernel/entry64.S5
-rw-r--r--arch/s390/kernel/process.c64
-rw-r--r--arch/s390/kernel/s390_ext.c2
-rw-r--r--arch/s390/kernel/vtime.c412
-rw-r--r--drivers/s390/cio/cio.c2
-rw-r--r--drivers/s390/s390mach.c3
9 files changed, 248 insertions, 265 deletions
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index 89456df43c4a..d60a2eefb17b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -21,12 +21,12 @@ struct s390_idle_data {
21 21
22DECLARE_PER_CPU(struct s390_idle_data, s390_idle); 22DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
23 23
24void s390_idle_leave(void); 24void vtime_start_cpu(void);
25 25
26static inline void s390_idle_check(void) 26static inline void s390_idle_check(void)
27{ 27{
28 if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) 28 if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
29 s390_idle_leave(); 29 vtime_start_cpu();
30} 30}
31 31
32#endif /* _ASM_S390_CPU_H_ */ 32#endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 61705d60f995..e4bcab739c19 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -23,20 +23,18 @@ struct vtimer_list {
23 __u64 expires; 23 __u64 expires;
24 __u64 interval; 24 __u64 interval;
25 25
26 spinlock_t lock;
27 unsigned long magic;
28
29 void (*function)(unsigned long); 26 void (*function)(unsigned long);
30 unsigned long data; 27 unsigned long data;
31}; 28};
32 29
33/* the offset value will wrap after ca. 71 years */ 30/* the vtimer value will wrap after ca. 71 years */
34struct vtimer_queue { 31struct vtimer_queue {
35 struct list_head list; 32 struct list_head list;
36 spinlock_t lock; 33 spinlock_t lock;
37 __u64 to_expire; /* current event expire time */ 34 __u64 timer; /* last programmed timer */
38 __u64 offset; /* list offset to zero */ 35 __u64 elapsed; /* elapsed time of timer expire values */
39 __u64 idle; /* temp var for idle */ 36 __u64 idle; /* temp var for idle */
37 int do_spt; /* =1: reprogram cpu timer in idle */
40}; 38};
41 39
42extern void init_virt_timer(struct vtimer_list *timer); 40extern void init_virt_timer(struct vtimer_list *timer);
@@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer);
48extern void init_cpu_vtimer(void); 46extern void init_cpu_vtimer(void);
49extern void vtime_init(void); 47extern void vtime_init(void);
50 48
51extern void vtime_start_cpu_timer(void); 49extern void vtime_stop_cpu(void);
52extern void vtime_stop_cpu_timer(void); 50extern void vtime_start_leave(void);
53 51
54#endif /* __KERNEL__ */ 52#endif /* __KERNEL__ */
55 53
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55de521aef77..1268aa2991bf 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -583,8 +583,8 @@ kernel_per:
583 583
584 .globl io_int_handler 584 .globl io_int_handler
585io_int_handler: 585io_int_handler:
586 stpt __LC_ASYNC_ENTER_TIMER
587 stck __LC_INT_CLOCK 586 stck __LC_INT_CLOCK
587 stpt __LC_ASYNC_ENTER_TIMER
588 SAVE_ALL_BASE __LC_SAVE_AREA+16 588 SAVE_ALL_BASE __LC_SAVE_AREA+16
589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
@@ -723,8 +723,8 @@ io_notify_resume:
723 723
724 .globl ext_int_handler 724 .globl ext_int_handler
725ext_int_handler: 725ext_int_handler:
726 stpt __LC_ASYNC_ENTER_TIMER
727 stck __LC_INT_CLOCK 726 stck __LC_INT_CLOCK
727 stpt __LC_ASYNC_ENTER_TIMER
728 SAVE_ALL_BASE __LC_SAVE_AREA+16 728 SAVE_ALL_BASE __LC_SAVE_AREA+16
729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
@@ -750,6 +750,7 @@ __critical_end:
750 750
751 .globl mcck_int_handler 751 .globl mcck_int_handler
752mcck_int_handler: 752mcck_int_handler:
753 stck __LC_INT_CLOCK
753 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
754 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs 755 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
755 SAVE_ALL_BASE __LC_SAVE_AREA+32 756 SAVE_ALL_BASE __LC_SAVE_AREA+32
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 16bb4fd1a403..ae83c195171c 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -559,8 +559,8 @@ kernel_per:
559 */ 559 */
560 .globl io_int_handler 560 .globl io_int_handler
561io_int_handler: 561io_int_handler:
562 stpt __LC_ASYNC_ENTER_TIMER
563 stck __LC_INT_CLOCK 562 stck __LC_INT_CLOCK
563 stpt __LC_ASYNC_ENTER_TIMER
564 SAVE_ALL_BASE __LC_SAVE_AREA+32 564 SAVE_ALL_BASE __LC_SAVE_AREA+32
565 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 565 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
566 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 566 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
@@ -721,8 +721,8 @@ io_notify_resume:
721 */ 721 */
722 .globl ext_int_handler 722 .globl ext_int_handler
723ext_int_handler: 723ext_int_handler:
724 stpt __LC_ASYNC_ENTER_TIMER
725 stck __LC_INT_CLOCK 724 stck __LC_INT_CLOCK
725 stpt __LC_ASYNC_ENTER_TIMER
726 SAVE_ALL_BASE __LC_SAVE_AREA+32 726 SAVE_ALL_BASE __LC_SAVE_AREA+32
727 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 727 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
728 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 728 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
@@ -746,6 +746,7 @@ __critical_end:
746 */ 746 */
747 .globl mcck_int_handler 747 .globl mcck_int_handler
748mcck_int_handler: 748mcck_int_handler:
749 stck __LC_INT_CLOCK
749 la %r1,4095 # revalidate r1 750 la %r1,4095 # revalidate r1
750 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer 751 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
751 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs 752 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 1e06436f07c2..b6110bdf8dc2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -46,7 +46,6 @@
46#include <asm/processor.h> 46#include <asm/processor.h>
47#include <asm/irq.h> 47#include <asm/irq.h>
48#include <asm/timer.h> 48#include <asm/timer.h>
49#include <asm/cpu.h>
50#include "entry.h" 49#include "entry.h"
51 50
52asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); 51asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -76,35 +75,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
76 return sf->gprs[8]; 75 return sf->gprs[8];
77} 76}
78 77
79DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
80 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
81};
82
83void s390_idle_leave(void)
84{
85 struct s390_idle_data *idle;
86 unsigned long long idle_time;
87
88 idle = &__get_cpu_var(s390_idle);
89 idle_time = S390_lowcore.int_clock - idle->idle_enter;
90 spin_lock(&idle->lock);
91 idle->idle_time += idle_time;
92 idle->idle_enter = 0ULL;
93 idle->idle_count++;
94 spin_unlock(&idle->lock);
95 vtime_start_cpu_timer();
96}
97
98extern void s390_handle_mcck(void); 78extern void s390_handle_mcck(void);
99/* 79/*
100 * The idle loop on a S390... 80 * The idle loop on a S390...
101 */ 81 */
102static void default_idle(void) 82static void default_idle(void)
103{ 83{
104 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
105 unsigned long addr;
106 psw_t psw;
107
108 /* CPU is going idle. */ 84 /* CPU is going idle. */
109 local_irq_disable(); 85 local_irq_disable();
110 if (need_resched()) { 86 if (need_resched()) {
@@ -120,7 +96,6 @@ static void default_idle(void)
120 local_mcck_disable(); 96 local_mcck_disable();
121 if (test_thread_flag(TIF_MCCK_PENDING)) { 97 if (test_thread_flag(TIF_MCCK_PENDING)) {
122 local_mcck_enable(); 98 local_mcck_enable();
123 s390_idle_leave();
124 local_irq_enable(); 99 local_irq_enable();
125 s390_handle_mcck(); 100 s390_handle_mcck();
126 return; 101 return;
@@ -128,42 +103,9 @@ static void default_idle(void)
128 trace_hardirqs_on(); 103 trace_hardirqs_on();
129 /* Don't trace preempt off for idle. */ 104 /* Don't trace preempt off for idle. */
130 stop_critical_timings(); 105 stop_critical_timings();
131 vtime_stop_cpu_timer(); 106 /* Stop virtual timer and halt the cpu. */
132 107 vtime_stop_cpu();
133 /* 108 /* Reenable preemption tracer. */
134 * The inline assembly is equivalent to
135 * idle->idle_enter = get_clock();
136 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
137 * PSW_MASK_IO | PSW_MASK_EXT);
138 * The difference is that the inline assembly makes sure that
139 * the stck instruction is right before the lpsw instruction.
140 * This is done to increase the precision.
141 */
142
143 /* Wait for external, I/O or machine check interrupt. */
144 psw.mask = psw_kernel_bits|PSW_MASK_WAIT|PSW_MASK_IO|PSW_MASK_EXT;
145#ifndef __s390x__
146 asm volatile(
147 " basr %0,0\n"
148 "0: ahi %0,1f-0b\n"
149 " st %0,4(%2)\n"
150 " stck 0(%3)\n"
151 " lpsw 0(%2)\n"
152 "1:"
153 : "=&d" (addr), "=m" (idle->idle_enter)
154 : "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
155 : "memory", "cc");
156#else /* __s390x__ */
157 asm volatile(
158 " larl %0,1f\n"
159 " stg %0,8(%2)\n"
160 " stck 0(%3)\n"
161 " lpswe 0(%2)\n"
162 "1:"
163 : "=&d" (addr), "=m" (idle->idle_enter)
164 : "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
165 : "memory", "cc");
166#endif /* __s390x__ */
167 start_critical_timings(); 109 start_critical_timings();
168} 110}
169 111
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index e019b419efc6..a0d2d55d7fb3 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
119 struct pt_regs *old_regs; 119 struct pt_regs *old_regs;
120 120
121 old_regs = set_irq_regs(regs); 121 old_regs = set_irq_regs(regs);
122 irq_enter();
123 s390_idle_check(); 122 s390_idle_check();
123 irq_enter();
124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
125 /* Serve timer interrupts first. */ 125 /* Serve timer interrupts first. */
126 clock_comparator_work(); 126 clock_comparator_work();
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 25d21fef76ba..2fb36e462194 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,10 +23,35 @@
23#include <asm/s390_ext.h> 23#include <asm/s390_ext.h>
24#include <asm/timer.h> 24#include <asm/timer.h>
25#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
26#include <asm/cpu.h>
26 27
27static ext_int_info_t ext_int_info_timer; 28static ext_int_info_t ext_int_info_timer;
29
28static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); 30static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
29 31
32DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
33 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
34};
35
36static inline __u64 get_vtimer(void)
37{
38 __u64 timer;
39
40 asm volatile("STPT %0" : "=m" (timer));
41 return timer;
42}
43
44static inline void set_vtimer(__u64 expires)
45{
46 __u64 timer;
47
48 asm volatile (" STPT %0\n" /* Store current cpu timer value */
49 " SPT %1" /* Set new value immediatly afterwards */
50 : "=m" (timer) : "m" (expires) );
51 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
52 S390_lowcore.last_update_timer = expires;
53}
54
30/* 55/*
31 * Update process times based on virtual cpu times stored by entry.S 56 * Update process times based on virtual cpu times stored by entry.S
32 * to the lowcore fields user_timer, system_timer & steal_clock. 57 * to the lowcore fields user_timer, system_timer & steal_clock.
@@ -53,18 +78,12 @@ static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
53 system = S390_lowcore.system_timer - ti->system_timer; 78 system = S390_lowcore.system_timer - ti->system_timer;
54 S390_lowcore.steal_timer -= system; 79 S390_lowcore.steal_timer -= system;
55 ti->system_timer = S390_lowcore.system_timer; 80 ti->system_timer = S390_lowcore.system_timer;
56 if (idle_task(smp_processor_id()) != current) 81 account_system_time(tsk, hardirq_offset, system, system);
57 account_system_time(tsk, hardirq_offset, system, system);
58 else
59 account_idle_time(system);
60 82
61 steal = S390_lowcore.steal_timer; 83 steal = S390_lowcore.steal_timer;
62 if ((s64) steal > 0) { 84 if ((s64) steal > 0) {
63 S390_lowcore.steal_timer = 0; 85 S390_lowcore.steal_timer = 0;
64 if (idle_task(smp_processor_id()) != current) 86 account_steal_time(steal);
65 account_steal_time(steal);
66 else
67 account_idle_time(steal);
68 } 87 }
69} 88}
70 89
@@ -96,80 +115,127 @@ void account_system_vtime(struct task_struct *tsk)
96 __u64 timer, system; 115 __u64 timer, system;
97 116
98 timer = S390_lowcore.last_update_timer; 117 timer = S390_lowcore.last_update_timer;
99 asm volatile (" STPT %0" /* Store current cpu timer value */ 118 S390_lowcore.last_update_timer = get_vtimer();
100 : "=m" (S390_lowcore.last_update_timer) );
101 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 119 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
102 120
103 system = S390_lowcore.system_timer - ti->system_timer; 121 system = S390_lowcore.system_timer - ti->system_timer;
104 S390_lowcore.steal_timer -= system; 122 S390_lowcore.steal_timer -= system;
105 ti->system_timer = S390_lowcore.system_timer; 123 ti->system_timer = S390_lowcore.system_timer;
106 if (in_irq() || idle_task(smp_processor_id()) != current) 124 account_system_time(tsk, 0, system, system);
107 account_system_time(tsk, 0, system, system);
108 else
109 account_idle_time(system);
110} 125}
111EXPORT_SYMBOL_GPL(account_system_vtime); 126EXPORT_SYMBOL_GPL(account_system_vtime);
112 127
113static inline void set_vtimer(__u64 expires) 128void vtime_start_cpu(void)
114{ 129{
130 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
115 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); 131 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
116 __u64 timer; 132 __u64 idle_time, expires;
117 133
118 asm volatile (" STPT %0\n" /* Store current cpu timer value */ 134 /* Account time spent with enabled wait psw loaded as idle time. */
119 " SPT %1" /* Set new value immediatly afterwards */ 135 idle_time = S390_lowcore.int_clock - idle->idle_enter;
120 : "=m" (timer) : "m" (expires) ); 136 account_idle_time(idle_time);
121 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; 137 S390_lowcore.last_update_clock = S390_lowcore.int_clock;
122 S390_lowcore.last_update_timer = expires; 138
123 139 /* Account system time spent going idle. */
124 /* store expire time for this CPU timer */ 140 S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
125 vq->to_expire = expires; 141 S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
126} 142
127 143 /* Restart vtime CPU timer */
128void vtime_start_cpu_timer(void) 144 if (vq->do_spt) {
129{ 145 /* Program old expire value but first save progress. */
130 struct vtimer_queue *vt_list; 146 expires = vq->idle - S390_lowcore.async_enter_timer;
131 147 expires += get_vtimer();
132 vt_list = &__get_cpu_var(virt_cpu_timer); 148 set_vtimer(expires);
133 149 } else {
134 /* CPU timer interrupt is pending, don't reprogramm it */ 150 /* Don't account the CPU timer delta while the cpu was idle. */
135 if (vt_list->idle & 1LL<<63) 151 vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
136 return; 152 }
137 153
138 if (!list_empty(&vt_list->list)) 154 spin_lock(&idle->lock);
139 set_vtimer(vt_list->idle); 155 idle->idle_time += idle_time;
156 idle->idle_enter = 0ULL;
157 idle->idle_count++;
158 spin_unlock(&idle->lock);
140} 159}
141 160
142void vtime_stop_cpu_timer(void) 161void vtime_stop_cpu(void)
143{ 162{
144 struct vtimer_queue *vt_list; 163 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
145 164 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
146 vt_list = &__get_cpu_var(virt_cpu_timer); 165 psw_t psw;
147 166
148 /* nothing to do */ 167 /* Wait for external, I/O or machine check interrupt. */
149 if (list_empty(&vt_list->list)) { 168 psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
150 vt_list->idle = VTIMER_MAX_SLICE; 169
151 goto fire; 170 /* Check if the CPU timer needs to be reprogrammed. */
171 if (vq->do_spt) {
172 __u64 vmax = VTIMER_MAX_SLICE;
173 /*
174 * The inline assembly is equivalent to
175 * vq->idle = get_cpu_timer();
176 * set_cpu_timer(VTIMER_MAX_SLICE);
177 * idle->idle_enter = get_clock();
178 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
179 * PSW_MASK_IO | PSW_MASK_EXT);
180 * The difference is that the inline assembly makes sure that
181 * the last three instruction are stpt, stck and lpsw in that
182 * order. This is done to increase the precision.
183 */
184 asm volatile(
185#ifndef CONFIG_64BIT
186 " basr 1,0\n"
187 "0: ahi 1,1f-0b\n"
188 " st 1,4(%2)\n"
189#else /* CONFIG_64BIT */
190 " larl 1,1f\n"
191 " stg 1,8(%2)\n"
192#endif /* CONFIG_64BIT */
193 " stpt 0(%4)\n"
194 " spt 0(%5)\n"
195 " stck 0(%3)\n"
196#ifndef CONFIG_64BIT
197 " lpsw 0(%2)\n"
198#else /* CONFIG_64BIT */
199 " lpswe 0(%2)\n"
200#endif /* CONFIG_64BIT */
201 "1:"
202 : "=m" (idle->idle_enter), "=m" (vq->idle)
203 : "a" (&psw), "a" (&idle->idle_enter),
204 "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
205 : "memory", "cc", "1");
206 } else {
207 /*
208 * The inline assembly is equivalent to
209 * vq->idle = get_cpu_timer();
210 * idle->idle_enter = get_clock();
211 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
212 * PSW_MASK_IO | PSW_MASK_EXT);
213 * The difference is that the inline assembly makes sure that
214 * the last three instruction are stpt, stck and lpsw in that
215 * order. This is done to increase the precision.
216 */
217 asm volatile(
218#ifndef CONFIG_64BIT
219 " basr 1,0\n"
220 "0: ahi 1,1f-0b\n"
221 " st 1,4(%2)\n"
222#else /* CONFIG_64BIT */
223 " larl 1,1f\n"
224 " stg 1,8(%2)\n"
225#endif /* CONFIG_64BIT */
226 " stpt 0(%4)\n"
227 " stck 0(%3)\n"
228#ifndef CONFIG_64BIT
229 " lpsw 0(%2)\n"
230#else /* CONFIG_64BIT */
231 " lpswe 0(%2)\n"
232#endif /* CONFIG_64BIT */
233 "1:"
234 : "=m" (idle->idle_enter), "=m" (vq->idle)
235 : "a" (&psw), "a" (&idle->idle_enter),
236 "a" (&vq->idle), "m" (psw)
237 : "memory", "cc", "1");
152 } 238 }
153
154 /* store the actual expire value */
155 asm volatile ("STPT %0" : "=m" (vt_list->idle));
156
157 /*
158 * If the CPU timer is negative we don't reprogramm
159 * it because we will get instantly an interrupt.
160 */
161 if (vt_list->idle & 1LL<<63)
162 return;
163
164 vt_list->offset += vt_list->to_expire - vt_list->idle;
165
166 /*
167 * We cannot halt the CPU timer, we just write a value that
168 * nearly never expires (only after 71 years) and re-write
169 * the stored expire value if we continue the timer
170 */
171 fire:
172 set_vtimer(VTIMER_MAX_SLICE);
173} 239}
174 240
175/* 241/*
@@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
195 */ 261 */
196static void do_callbacks(struct list_head *cb_list) 262static void do_callbacks(struct list_head *cb_list)
197{ 263{
198 struct vtimer_queue *vt_list; 264 struct vtimer_queue *vq;
199 struct vtimer_list *event, *tmp; 265 struct vtimer_list *event, *tmp;
200 void (*fn)(unsigned long);
201 unsigned long data;
202 266
203 if (list_empty(cb_list)) 267 if (list_empty(cb_list))
204 return; 268 return;
205 269
206 vt_list = &__get_cpu_var(virt_cpu_timer); 270 vq = &__get_cpu_var(virt_cpu_timer);
207 271
208 list_for_each_entry_safe(event, tmp, cb_list, entry) { 272 list_for_each_entry_safe(event, tmp, cb_list, entry) {
209 fn = event->function; 273 list_del_init(&event->entry);
210 data = event->data; 274 (event->function)(event->data);
211 fn(data); 275 if (event->interval) {
212 276 /* Recharge interval timer */
213 if (!event->interval) 277 event->expires = event->interval + vq->elapsed;
214 /* delete one shot timer */ 278 spin_lock(&vq->lock);
215 list_del_init(&event->entry); 279 list_add_sorted(event, &vq->list);
216 else { 280 spin_unlock(&vq->lock);
217 /* move interval timer back to list */
218 spin_lock(&vt_list->lock);
219 list_del_init(&event->entry);
220 list_add_sorted(event, &vt_list->list);
221 spin_unlock(&vt_list->lock);
222 } 281 }
223 } 282 }
224} 283}
@@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list)
228 */ 287 */
229static void do_cpu_timer_interrupt(__u16 error_code) 288static void do_cpu_timer_interrupt(__u16 error_code)
230{ 289{
231 __u64 next, delta; 290 struct vtimer_queue *vq;
232 struct vtimer_queue *vt_list;
233 struct vtimer_list *event, *tmp; 291 struct vtimer_list *event, *tmp;
234 struct list_head *ptr; 292 struct list_head cb_list; /* the callback queue */
235 /* the callback queue */ 293 __u64 elapsed, next;
236 struct list_head cb_list;
237 294
238 INIT_LIST_HEAD(&cb_list); 295 INIT_LIST_HEAD(&cb_list);
239 vt_list = &__get_cpu_var(virt_cpu_timer); 296 vq = &__get_cpu_var(virt_cpu_timer);
240 297
241 /* walk timer list, fire all expired events */ 298 /* walk timer list, fire all expired events */
242 spin_lock(&vt_list->lock); 299 spin_lock(&vq->lock);
243 300
244 if (vt_list->to_expire < VTIMER_MAX_SLICE) 301 elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
245 vt_list->offset += vt_list->to_expire; 302 BUG_ON((s64) elapsed < 0);
246 303 vq->elapsed = 0;
247 list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { 304 list_for_each_entry_safe(event, tmp, &vq->list, entry) {
248 if (event->expires > vt_list->offset) 305 if (event->expires < elapsed)
249 /* found first unexpired event, leave */ 306 /* move expired timer to the callback queue */
250 break; 307 list_move_tail(&event->entry, &cb_list);
251 308 else
252 /* re-charge interval timer, we have to add the offset */ 309 event->expires -= elapsed;
253 if (event->interval)
254 event->expires = event->interval + vt_list->offset;
255
256 /* move expired timer to the callback queue */
257 list_move_tail(&event->entry, &cb_list);
258 } 310 }
259 spin_unlock(&vt_list->lock); 311 spin_unlock(&vq->lock);
312
313 vq->do_spt = list_empty(&cb_list);
260 do_callbacks(&cb_list); 314 do_callbacks(&cb_list);
261 315
262 /* next event is first in list */ 316 /* next event is first in list */
263 spin_lock(&vt_list->lock); 317 next = VTIMER_MAX_SLICE;
264 if (!list_empty(&vt_list->list)) { 318 spin_lock(&vq->lock);
265 ptr = vt_list->list.next; 319 if (!list_empty(&vq->list)) {
266 event = list_entry(ptr, struct vtimer_list, entry); 320 event = list_first_entry(&vq->list, struct vtimer_list, entry);
267 next = event->expires - vt_list->offset; 321 next = event->expires;
268 322 } else
269 /* add the expired time from this interrupt handler 323 vq->do_spt = 0;
270 * and the callback functions 324 spin_unlock(&vq->lock);
271 */ 325 /*
272 asm volatile ("STPT %0" : "=m" (delta)); 326 * To improve precision add the time spent by the
273 delta = 0xffffffffffffffffLL - delta + 1; 327 * interrupt handler to the elapsed time.
274 vt_list->offset += delta; 328 * Note: CPU timer counts down and we got an interrupt,
275 next -= delta; 329 * the current content is negative
276 } else { 330 */
277 vt_list->offset = 0; 331 elapsed = S390_lowcore.async_enter_timer - get_vtimer();
278 next = VTIMER_MAX_SLICE; 332 set_vtimer(next - elapsed);
279 } 333 vq->timer = next - elapsed;
280 spin_unlock(&vt_list->lock); 334 vq->elapsed = elapsed;
281 set_vtimer(next);
282} 335}
283 336
284void init_virt_timer(struct vtimer_list *timer) 337void init_virt_timer(struct vtimer_list *timer)
285{ 338{
286 timer->function = NULL; 339 timer->function = NULL;
287 INIT_LIST_HEAD(&timer->entry); 340 INIT_LIST_HEAD(&timer->entry);
288 spin_lock_init(&timer->lock);
289} 341}
290EXPORT_SYMBOL(init_virt_timer); 342EXPORT_SYMBOL(init_virt_timer);
291 343
@@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer)
299 */ 351 */
300static void internal_add_vtimer(struct vtimer_list *timer) 352static void internal_add_vtimer(struct vtimer_list *timer)
301{ 353{
354 struct vtimer_queue *vq;
302 unsigned long flags; 355 unsigned long flags;
303 __u64 done; 356 __u64 left, expires;
304 struct vtimer_list *event;
305 struct vtimer_queue *vt_list;
306 357
307 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 358 vq = &per_cpu(virt_cpu_timer, timer->cpu);
308 spin_lock_irqsave(&vt_list->lock, flags); 359 spin_lock_irqsave(&vq->lock, flags);
309 360
310 BUG_ON(timer->cpu != smp_processor_id()); 361 BUG_ON(timer->cpu != smp_processor_id());
311 362
312 /* if list is empty we only have to set the timer */ 363 if (list_empty(&vq->list)) {
313 if (list_empty(&vt_list->list)) { 364 /* First timer on this cpu, just program it. */
314 /* reset the offset, this may happen if the last timer was 365 list_add(&timer->entry, &vq->list);
315 * just deleted by mod_virt_timer and the interrupt 366 set_vtimer(timer->expires);
316 * didn't happen until here 367 vq->timer = timer->expires;
317 */ 368 vq->elapsed = 0;
318 vt_list->offset = 0; 369 } else {
319 goto fire; 370 /* Check progress of old timers. */
371 expires = timer->expires;
372 left = get_vtimer();
373 if (likely((s64) expires < (s64) left)) {
374 /* The new timer expires before the current timer. */
375 set_vtimer(expires);
376 vq->elapsed += vq->timer - left;
377 vq->timer = expires;
378 } else {
379 vq->elapsed += vq->timer - left;
380 vq->timer = left;
381 }
382 /* Insert new timer into per cpu list. */
383 timer->expires += vq->elapsed;
384 list_add_sorted(timer, &vq->list);
320 } 385 }
321 386
322 /* save progress */ 387 spin_unlock_irqrestore(&vq->lock, flags);
323 asm volatile ("STPT %0" : "=m" (done));
324
325 /* calculate completed work */
326 done = vt_list->to_expire - done + vt_list->offset;
327 vt_list->offset = 0;
328
329 list_for_each_entry(event, &vt_list->list, entry)
330 event->expires -= done;
331
332 fire:
333 list_add_sorted(timer, &vt_list->list);
334
335 /* get first element, which is the next vtimer slice */
336 event = list_entry(vt_list->list.next, struct vtimer_list, entry);
337
338 set_vtimer(event->expires);
339 spin_unlock_irqrestore(&vt_list->lock, flags);
340 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ 388 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */
341 put_cpu(); 389 put_cpu();
342} 390}
@@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic);
381 * If we change a pending timer the function must be called on the CPU 429 * If we change a pending timer the function must be called on the CPU
382 * where the timer is running on, e.g. by smp_call_function_single() 430 * where the timer is running on, e.g. by smp_call_function_single()
383 * 431 *
384 * The original mod_timer adds the timer if it is not pending. For compatibility 432 * The original mod_timer adds the timer if it is not pending. For
385 * we do the same. The timer will be added on the current CPU as a oneshot timer. 433 * compatibility we do the same. The timer will be added on the current
434 * CPU as a oneshot timer.
386 * 435 *
387 * returns whether it has modified a pending timer (1) or not (0) 436 * returns whether it has modified a pending timer (1) or not (0)
388 */ 437 */
389int mod_virt_timer(struct vtimer_list *timer, __u64 expires) 438int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
390{ 439{
391 struct vtimer_queue *vt_list; 440 struct vtimer_queue *vq;
392 unsigned long flags; 441 unsigned long flags;
393 int cpu; 442 int cpu;
394 443
@@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
404 return 1; 453 return 1;
405 454
406 cpu = get_cpu(); 455 cpu = get_cpu();
407 vt_list = &per_cpu(virt_cpu_timer, cpu); 456 vq = &per_cpu(virt_cpu_timer, cpu);
408 457
409 /* check if we run on the right CPU */ 458 /* check if we run on the right CPU */
410 BUG_ON(timer->cpu != cpu); 459 BUG_ON(timer->cpu != cpu);
411 460
412 /* disable interrupts before test if timer is pending */ 461 /* disable interrupts before test if timer is pending */
413 spin_lock_irqsave(&vt_list->lock, flags); 462 spin_lock_irqsave(&vq->lock, flags);
414 463
415 /* if timer isn't pending add it on the current CPU */ 464 /* if timer isn't pending add it on the current CPU */
416 if (!vtimer_pending(timer)) { 465 if (!vtimer_pending(timer)) {
417 spin_unlock_irqrestore(&vt_list->lock, flags); 466 spin_unlock_irqrestore(&vq->lock, flags);
418 /* we do not activate an interval timer with mod_virt_timer */ 467 /* we do not activate an interval timer with mod_virt_timer */
419 timer->interval = 0; 468 timer->interval = 0;
420 timer->expires = expires; 469 timer->expires = expires;
@@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
431 timer->interval = expires; 480 timer->interval = expires;
432 481
433 /* the timer can't expire anymore so we can release the lock */ 482 /* the timer can't expire anymore so we can release the lock */
434 spin_unlock_irqrestore(&vt_list->lock, flags); 483 spin_unlock_irqrestore(&vq->lock, flags);
435 internal_add_vtimer(timer); 484 internal_add_vtimer(timer);
436 return 1; 485 return 1;
437} 486}
@@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer);
445int del_virt_timer(struct vtimer_list *timer) 494int del_virt_timer(struct vtimer_list *timer)
446{ 495{
447 unsigned long flags; 496 unsigned long flags;
448 struct vtimer_queue *vt_list; 497 struct vtimer_queue *vq;
449 498
450 /* check if timer is pending */ 499 /* check if timer is pending */
451 if (!vtimer_pending(timer)) 500 if (!vtimer_pending(timer))
452 return 0; 501 return 0;
453 502
454 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 503 vq = &per_cpu(virt_cpu_timer, timer->cpu);
455 spin_lock_irqsave(&vt_list->lock, flags); 504 spin_lock_irqsave(&vq->lock, flags);
456 505
457 /* we don't interrupt a running timer, just let it expire! */ 506 /* we don't interrupt a running timer, just let it expire! */
458 list_del_init(&timer->entry); 507 list_del_init(&timer->entry);
459 508
460 /* last timer removed */ 509 spin_unlock_irqrestore(&vq->lock, flags);
461 if (list_empty(&vt_list->list)) {
462 vt_list->to_expire = 0;
463 vt_list->offset = 0;
464 }
465
466 spin_unlock_irqrestore(&vt_list->lock, flags);
467 return 1; 510 return 1;
468} 511}
469EXPORT_SYMBOL(del_virt_timer); 512EXPORT_SYMBOL(del_virt_timer);
@@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer);
473 */ 516 */
474void init_cpu_vtimer(void) 517void init_cpu_vtimer(void)
475{ 518{
476 struct vtimer_queue *vt_list; 519 struct vtimer_queue *vq;
477 520
478 /* kick the virtual timer */ 521 /* kick the virtual timer */
479 S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
480 S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
481 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); 522 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
482 asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); 523 asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
524
525 /* initialize per cpu vtimer structure */
526 vq = &__get_cpu_var(virt_cpu_timer);
527 INIT_LIST_HEAD(&vq->list);
528 spin_lock_init(&vq->lock);
483 529
484 /* enable cpu timer interrupts */ 530 /* enable cpu timer interrupts */
485 __ctl_set_bit(0,10); 531 __ctl_set_bit(0,10);
486
487 vt_list = &__get_cpu_var(virt_cpu_timer);
488 INIT_LIST_HEAD(&vt_list->list);
489 spin_lock_init(&vt_list->lock);
490 vt_list->to_expire = 0;
491 vt_list->offset = 0;
492 vt_list->idle = 0;
493
494} 532}
495 533
496void __init vtime_init(void) 534void __init vtime_init(void)
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 8a8df7552969..06b71823f399 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -632,8 +632,8 @@ do_IRQ (struct pt_regs *regs)
632 struct pt_regs *old_regs; 632 struct pt_regs *old_regs;
633 633
634 old_regs = set_irq_regs(regs); 634 old_regs = set_irq_regs(regs);
635 irq_enter();
636 s390_idle_check(); 635 s390_idle_check();
636 irq_enter();
637 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 637 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
638 /* Serve timer interrupts first. */ 638 /* Serve timer interrupts first. */
639 clock_comparator_work(); 639 clock_comparator_work();
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 834e9ee7e934..92b0417f8e12 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -18,6 +18,7 @@
18#include <asm/etr.h> 18#include <asm/etr.h>
19#include <asm/lowcore.h> 19#include <asm/lowcore.h>
20#include <asm/cio.h> 20#include <asm/cio.h>
21#include <asm/cpu.h>
21#include "s390mach.h" 22#include "s390mach.h"
22 23
23static struct semaphore m_sem; 24static struct semaphore m_sem;
@@ -369,6 +370,8 @@ s390_do_machine_check(struct pt_regs *regs)
369 370
370 lockdep_off(); 371 lockdep_off();
371 372
373 s390_idle_check();
374
372 mci = (struct mci *) &S390_lowcore.mcck_interruption_code; 375 mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
373 mcck = &__get_cpu_var(cpu_mcck); 376 mcck = &__get_cpu_var(cpu_mcck);
374 umode = user_mode(regs); 377 umode = user_mode(regs);