aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-03 14:56:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-03 14:56:24 -0500
commit61420f59a589c0668f70cbe725785837c78ece90 (patch)
tree79ae77d731cd2425677b9527d50079d8cf34c3b2
parentd97106ab53f812910a62d18afb9dbe882819c1ba (diff)
parentc742b31c03f37c5c499178f09f57381aa6c70131 (diff)
Merge branch 'cputime' of git://git390.osdl.marist.edu/pub/scm/linux-2.6
* 'cputime' of git://git390.osdl.marist.edu/pub/scm/linux-2.6: [PATCH] fast vdso implementation for CLOCK_THREAD_CPUTIME_ID [PATCH] improve idle cputime accounting [PATCH] improve precision of idle time detection. [PATCH] improve precision of process accounting. [PATCH] idle cputime accounting [PATCH] fix scaled & unscaled cputime accounting
-rw-r--r--arch/ia64/kernel/time.c18
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/time.c18
-rw-r--r--arch/s390/include/asm/cpu.h7
-rw-r--r--arch/s390/include/asm/cputime.h42
-rw-r--r--arch/s390/include/asm/lowcore.h49
-rw-r--r--arch/s390/include/asm/system.h4
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/timer.h16
-rw-r--r--arch/s390/include/asm/vdso.h15
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/entry.S5
-rw-r--r--arch/s390/kernel/entry64.S50
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/process.c43
-rw-r--r--arch/s390/kernel/s390_ext.c2
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c34
-rw-r--r--arch/s390/kernel/vdso.c123
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S39
-rw-r--r--arch/s390/kernel/vtime.c486
-rw-r--r--arch/x86/xen/time.c10
-rw-r--r--drivers/s390/cio/cio.c2
-rw-r--r--drivers/s390/s390mach.c3
-rw-r--r--include/linux/kernel_stat.h13
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched.c115
-rw-r--r--kernel/time/tick-sched.c12
-rw-r--r--kernel/timer.c15
30 files changed, 683 insertions, 456 deletions
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a42c88f..f0ebb342409d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,13 +93,14 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
93 now = ia64_get_itc(); 93 now = ia64_get_itc();
94 94
95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); 95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
96 account_system_time(prev, 0, delta_stime); 96 if (idle_task(smp_processor_id()) != prev)
97 account_system_time_scaled(prev, delta_stime); 97 account_system_time(prev, 0, delta_stime, delta_stime);
98 else
99 account_idle_time(delta_stime);
98 100
99 if (pi->ac_utime) { 101 if (pi->ac_utime) {
100 delta_utime = cycle_to_cputime(pi->ac_utime); 102 delta_utime = cycle_to_cputime(pi->ac_utime);
101 account_user_time(prev, delta_utime); 103 account_user_time(prev, delta_utime, delta_utime);
102 account_user_time_scaled(prev, delta_utime);
103 } 104 }
104 105
105 pi->ac_stamp = ni->ac_stamp = now; 106 pi->ac_stamp = ni->ac_stamp = now;
@@ -122,8 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
122 now = ia64_get_itc(); 123 now = ia64_get_itc();
123 124
124 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); 125 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
125 account_system_time(tsk, 0, delta_stime); 126 if (irq_count() || idle_task(smp_processor_id()) != tsk)
126 account_system_time_scaled(tsk, delta_stime); 127 account_system_time(tsk, 0, delta_stime, delta_stime);
128 else
129 account_idle_time(delta_stime);
127 ti->ac_stime = 0; 130 ti->ac_stime = 0;
128 131
129 ti->ac_stamp = now; 132 ti->ac_stamp = now;
@@ -143,8 +146,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
143 146
144 if (ti->ac_utime) { 147 if (ti->ac_utime) {
145 delta_utime = cycle_to_cputime(ti->ac_utime); 148 delta_utime = cycle_to_cputime(ti->ac_utime);
146 account_user_time(p, delta_utime); 149 account_user_time(p, delta_utime, delta_utime);
147 account_user_time_scaled(p, delta_utime);
148 ti->ac_utime = 0; 150 ti->ac_utime = 0;
149 } 151 }
150} 152}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
33#include <linux/mqueue.h> 33#include <linux/mqueue.h>
34#include <linux/hardirq.h> 34#include <linux/hardirq.h>
35#include <linux/utsname.h> 35#include <linux/utsname.h>
36#include <linux/kernel_stat.h>
36 37
37#include <asm/pgtable.h> 38#include <asm/pgtable.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 99f1ddd68582..c9564031a2a9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
256 delta += sys_time; 256 delta += sys_time;
257 get_paca()->system_time = 0; 257 get_paca()->system_time = 0;
258 } 258 }
259 account_system_time(tsk, 0, delta); 259 if (in_irq() || idle_task(smp_processor_id()) != tsk)
260 account_system_time_scaled(tsk, deltascaled); 260 account_system_time(tsk, 0, delta, deltascaled);
261 else
262 account_idle_time(delta);
261 per_cpu(cputime_last_delta, smp_processor_id()) = delta; 263 per_cpu(cputime_last_delta, smp_processor_id()) = delta;
262 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; 264 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
263 local_irq_restore(flags); 265 local_irq_restore(flags);
@@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
275 277
276 utime = get_paca()->user_time; 278 utime = get_paca()->user_time;
277 get_paca()->user_time = 0; 279 get_paca()->user_time = 0;
278 account_user_time(tsk, utime);
279
280 utimescaled = cputime_to_scaled(utime); 280 utimescaled = cputime_to_scaled(utime);
281 account_user_time_scaled(tsk, utimescaled); 281 account_user_time(tsk, utime, utimescaled);
282} 282}
283 283
284/* 284/*
@@ -338,8 +338,12 @@ void calculate_steal_time(void)
338 tb = mftb(); 338 tb = mftb();
339 purr = mfspr(SPRN_PURR); 339 purr = mfspr(SPRN_PURR);
340 stolen = (tb - pme->tb) - (purr - pme->purr); 340 stolen = (tb - pme->tb) - (purr - pme->purr);
341 if (stolen > 0) 341 if (stolen > 0) {
342 account_steal_time(current, stolen); 342 if (idle_task(smp_processor_id()) != current)
343 account_steal_time(stolen);
344 else
345 account_idle_time(stolen);
346 }
343 pme->tb = tb; 347 pme->tb = tb;
344 pme->purr = purr; 348 pme->purr = purr;
345} 349}
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index e5a6a9ba3adf..d60a2eefb17b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -14,7 +14,6 @@
14 14
15struct s390_idle_data { 15struct s390_idle_data {
16 spinlock_t lock; 16 spinlock_t lock;
17 unsigned int in_idle;
18 unsigned long long idle_count; 17 unsigned long long idle_count;
19 unsigned long long idle_enter; 18 unsigned long long idle_enter;
20 unsigned long long idle_time; 19 unsigned long long idle_time;
@@ -22,12 +21,12 @@ struct s390_idle_data {
22 21
23DECLARE_PER_CPU(struct s390_idle_data, s390_idle); 22DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
24 23
25void s390_idle_leave(void); 24void vtime_start_cpu(void);
26 25
27static inline void s390_idle_check(void) 26static inline void s390_idle_check(void)
28{ 27{
29 if ((&__get_cpu_var(s390_idle))->in_idle) 28 if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
30 s390_idle_leave(); 29 vtime_start_cpu();
31} 30}
32 31
33#endif /* _ASM_S390_CPU_H_ */ 32#endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 133ce054fc89..521726430afa 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,7 +11,7 @@
11 11
12#include <asm/div64.h> 12#include <asm/div64.h>
13 13
14/* We want to use micro-second resolution. */ 14/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
15 15
16typedef unsigned long long cputime_t; 16typedef unsigned long long cputime_t;
17typedef unsigned long long cputime64_t; 17typedef unsigned long long cputime64_t;
@@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base)
53#define cputime_ge(__a, __b) ((__a) >= (__b)) 53#define cputime_ge(__a, __b) ((__a) >= (__b))
54#define cputime_lt(__a, __b) ((__a) < (__b)) 54#define cputime_lt(__a, __b) ((__a) < (__b))
55#define cputime_le(__a, __b) ((__a) <= (__b)) 55#define cputime_le(__a, __b) ((__a) <= (__b))
56#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) 56#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
57#define cputime_to_scaled(__ct) (__ct) 57#define cputime_to_scaled(__ct) (__ct)
58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) 58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
59 59
60#define cputime64_zero (0ULL) 60#define cputime64_zero (0ULL)
61#define cputime64_add(__a, __b) ((__a) + (__b)) 61#define cputime64_add(__a, __b) ((__a) + (__b))
@@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base)
64static inline u64 64static inline u64
65cputime64_to_jiffies64(cputime64_t cputime) 65cputime64_to_jiffies64(cputime64_t cputime)
66{ 66{
67 do_div(cputime, 1000000 / HZ); 67 do_div(cputime, 4096000000ULL / HZ);
68 return cputime; 68 return cputime;
69} 69}
70 70
@@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime)
74static inline unsigned int 74static inline unsigned int
75cputime_to_msecs(const cputime_t cputime) 75cputime_to_msecs(const cputime_t cputime)
76{ 76{
77 return __div(cputime, 1000); 77 return __div(cputime, 4096000);
78} 78}
79 79
80static inline cputime_t 80static inline cputime_t
81msecs_to_cputime(const unsigned int m) 81msecs_to_cputime(const unsigned int m)
82{ 82{
83 return (cputime_t) m * 1000; 83 return (cputime_t) m * 4096000;
84} 84}
85 85
86/* 86/*
@@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m)
89static inline unsigned int 89static inline unsigned int
90cputime_to_secs(const cputime_t cputime) 90cputime_to_secs(const cputime_t cputime)
91{ 91{
92 return __div(cputime, 1000000); 92 return __div(cputime, 2048000000) >> 1;
93} 93}
94 94
95static inline cputime_t 95static inline cputime_t
96secs_to_cputime(const unsigned int s) 96secs_to_cputime(const unsigned int s)
97{ 97{
98 return (cputime_t) s * 1000000; 98 return (cputime_t) s * 4096000000ULL;
99} 99}
100 100
101/* 101/*
@@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s)
104static inline cputime_t 104static inline cputime_t
105timespec_to_cputime(const struct timespec *value) 105timespec_to_cputime(const struct timespec *value)
106{ 106{
107 return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; 107 return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
108} 108}
109 109
110static inline void 110static inline void
@@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
114 register_pair rp; 114 register_pair rp;
115 115
116 rp.pair = cputime >> 1; 116 rp.pair = cputime >> 1;
117 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 117 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
118 value->tv_nsec = rp.subreg.even * 1000; 118 value->tv_nsec = rp.subreg.even * 1000 / 4096;
119 value->tv_sec = rp.subreg.odd; 119 value->tv_sec = rp.subreg.odd;
120#else 120#else
121 value->tv_nsec = (cputime % 1000000) * 1000; 121 value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
122 value->tv_sec = cputime / 1000000; 122 value->tv_sec = cputime / 4096000000ULL;
123#endif 123#endif
124} 124}
125 125
@@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
131static inline cputime_t 131static inline cputime_t
132timeval_to_cputime(const struct timeval *value) 132timeval_to_cputime(const struct timeval *value)
133{ 133{
134 return value->tv_usec + (u64) value->tv_sec * 1000000; 134 return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
135} 135}
136 136
137static inline void 137static inline void
@@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
141 register_pair rp; 141 register_pair rp;
142 142
143 rp.pair = cputime >> 1; 143 rp.pair = cputime >> 1;
144 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 144 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
145 value->tv_usec = rp.subreg.even; 145 value->tv_usec = rp.subreg.even / 4096;
146 value->tv_sec = rp.subreg.odd; 146 value->tv_sec = rp.subreg.odd;
147#else 147#else
148 value->tv_usec = cputime % 1000000; 148 value->tv_usec = cputime % 4096000000ULL;
149 value->tv_sec = cputime / 1000000; 149 value->tv_sec = cputime / 4096000000ULL;
150#endif 150#endif
151} 151}
152 152
@@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
156static inline clock_t 156static inline clock_t
157cputime_to_clock_t(cputime_t cputime) 157cputime_to_clock_t(cputime_t cputime)
158{ 158{
159 return __div(cputime, 1000000 / USER_HZ); 159 return __div(cputime, 4096000000ULL / USER_HZ);
160} 160}
161 161
162static inline cputime_t 162static inline cputime_t
163clock_t_to_cputime(unsigned long x) 163clock_t_to_cputime(unsigned long x)
164{ 164{
165 return (cputime_t) x * (1000000 / USER_HZ); 165 return (cputime_t) x * (4096000000ULL / USER_HZ);
166} 166}
167 167
168/* 168/*
@@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x)
171static inline clock_t 171static inline clock_t
172cputime64_to_clock_t(cputime64_t cputime) 172cputime64_to_clock_t(cputime64_t cputime)
173{ 173{
174 return __div(cputime, 1000000 / USER_HZ); 174 return __div(cputime, 4096000000ULL / USER_HZ);
175} 175}
176 176
177#endif /* _S390_CPUTIME_H */ 177#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 0bc51d52a899..ffdef5fe8587 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,11 +67,11 @@
67#define __LC_SYNC_ENTER_TIMER 0x248 67#define __LC_SYNC_ENTER_TIMER 0x248
68#define __LC_ASYNC_ENTER_TIMER 0x250 68#define __LC_ASYNC_ENTER_TIMER 0x250
69#define __LC_EXIT_TIMER 0x258 69#define __LC_EXIT_TIMER 0x258
70#define __LC_LAST_UPDATE_TIMER 0x260 70#define __LC_USER_TIMER 0x260
71#define __LC_USER_TIMER 0x268 71#define __LC_SYSTEM_TIMER 0x268
72#define __LC_SYSTEM_TIMER 0x270 72#define __LC_STEAL_TIMER 0x270
73#define __LC_LAST_UPDATE_CLOCK 0x278 73#define __LC_LAST_UPDATE_TIMER 0x278
74#define __LC_STEAL_CLOCK 0x280 74#define __LC_LAST_UPDATE_CLOCK 0x280
75#define __LC_RETURN_MCCK_PSW 0x288 75#define __LC_RETURN_MCCK_PSW 0x288
76#define __LC_KERNEL_STACK 0xC40 76#define __LC_KERNEL_STACK 0xC40
77#define __LC_THREAD_INFO 0xC44 77#define __LC_THREAD_INFO 0xC44
@@ -89,11 +89,11 @@
89#define __LC_SYNC_ENTER_TIMER 0x250 89#define __LC_SYNC_ENTER_TIMER 0x250
90#define __LC_ASYNC_ENTER_TIMER 0x258 90#define __LC_ASYNC_ENTER_TIMER 0x258
91#define __LC_EXIT_TIMER 0x260 91#define __LC_EXIT_TIMER 0x260
92#define __LC_LAST_UPDATE_TIMER 0x268 92#define __LC_USER_TIMER 0x268
93#define __LC_USER_TIMER 0x270 93#define __LC_SYSTEM_TIMER 0x270
94#define __LC_SYSTEM_TIMER 0x278 94#define __LC_STEAL_TIMER 0x278
95#define __LC_LAST_UPDATE_CLOCK 0x280 95#define __LC_LAST_UPDATE_TIMER 0x280
96#define __LC_STEAL_CLOCK 0x288 96#define __LC_LAST_UPDATE_CLOCK 0x288
97#define __LC_RETURN_MCCK_PSW 0x290 97#define __LC_RETURN_MCCK_PSW 0x290
98#define __LC_KERNEL_STACK 0xD40 98#define __LC_KERNEL_STACK 0xD40
99#define __LC_THREAD_INFO 0xD48 99#define __LC_THREAD_INFO 0xD48
@@ -106,8 +106,10 @@
106#define __LC_IPLDEV 0xDB8 106#define __LC_IPLDEV 0xDB8
107#define __LC_CURRENT 0xDD8 107#define __LC_CURRENT 0xDD8
108#define __LC_INT_CLOCK 0xDE8 108#define __LC_INT_CLOCK 0xDE8
109#define __LC_VDSO_PER_CPU 0xE38
109#endif /* __s390x__ */ 110#endif /* __s390x__ */
110 111
112#define __LC_PASTE 0xE40
111 113
112#define __LC_PANIC_MAGIC 0xE00 114#define __LC_PANIC_MAGIC 0xE00
113#ifndef __s390x__ 115#ifndef __s390x__
@@ -252,11 +254,11 @@ struct _lowcore
252 __u64 sync_enter_timer; /* 0x248 */ 254 __u64 sync_enter_timer; /* 0x248 */
253 __u64 async_enter_timer; /* 0x250 */ 255 __u64 async_enter_timer; /* 0x250 */
254 __u64 exit_timer; /* 0x258 */ 256 __u64 exit_timer; /* 0x258 */
255 __u64 last_update_timer; /* 0x260 */ 257 __u64 user_timer; /* 0x260 */
256 __u64 user_timer; /* 0x268 */ 258 __u64 system_timer; /* 0x268 */
257 __u64 system_timer; /* 0x270 */ 259 __u64 steal_timer; /* 0x270 */
258 __u64 last_update_clock; /* 0x278 */ 260 __u64 last_update_timer; /* 0x278 */
259 __u64 steal_clock; /* 0x280 */ 261 __u64 last_update_clock; /* 0x280 */
260 psw_t return_mcck_psw; /* 0x288 */ 262 psw_t return_mcck_psw; /* 0x288 */
261 __u8 pad8[0xc00-0x290]; /* 0x290 */ 263 __u8 pad8[0xc00-0x290]; /* 0x290 */
262 264
@@ -343,11 +345,11 @@ struct _lowcore
343 __u64 sync_enter_timer; /* 0x250 */ 345 __u64 sync_enter_timer; /* 0x250 */
344 __u64 async_enter_timer; /* 0x258 */ 346 __u64 async_enter_timer; /* 0x258 */
345 __u64 exit_timer; /* 0x260 */ 347 __u64 exit_timer; /* 0x260 */
346 __u64 last_update_timer; /* 0x268 */ 348 __u64 user_timer; /* 0x268 */
347 __u64 user_timer; /* 0x270 */ 349 __u64 system_timer; /* 0x270 */
348 __u64 system_timer; /* 0x278 */ 350 __u64 steal_timer; /* 0x278 */
349 __u64 last_update_clock; /* 0x280 */ 351 __u64 last_update_timer; /* 0x280 */
350 __u64 steal_clock; /* 0x288 */ 352 __u64 last_update_clock; /* 0x288 */
351 psw_t return_mcck_psw; /* 0x290 */ 353 psw_t return_mcck_psw; /* 0x290 */
352 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ 354 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */
353 /* System info area */ 355 /* System info area */
@@ -381,7 +383,12 @@ struct _lowcore
381 /* whether the kernel died with panic() or not */ 383 /* whether the kernel died with panic() or not */
382 __u32 panic_magic; /* 0xe00 */ 384 __u32 panic_magic; /* 0xe00 */
383 385
384 __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ 386 /* Per cpu primary space access list */
387 __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */
388 __u32 vdso_per_cpu_data; /* 0xe3c */
389 __u32 paste[16]; /* 0xe40 */
390
391 __u8 pad13[0x11b8-0xe80]; /* 0xe80 */
385 392
386 /* 64 bit extparam used for pfault, diag 250 etc */ 393 /* 64 bit extparam used for pfault, diag 250 etc */
387 __u64 ext_params2; /* 0x11B8 */ 394 __u64 ext_params2; /* 0x11B8 */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 024ef42ed6d7..3a8b26eb1f2e 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs)
99 prev = __switch_to(prev,next); \ 99 prev = __switch_to(prev,next); \
100} while (0) 100} while (0)
101 101
102extern void account_vtime(struct task_struct *); 102extern void account_vtime(struct task_struct *, struct task_struct *);
103extern void account_tick_vtime(struct task_struct *); 103extern void account_tick_vtime(struct task_struct *);
104extern void account_system_vtime(struct task_struct *); 104extern void account_system_vtime(struct task_struct *);
105 105
@@ -121,7 +121,7 @@ static inline void cmma_init(void) { }
121 121
122#define finish_arch_switch(prev) do { \ 122#define finish_arch_switch(prev) do { \
123 set_fs(current->thread.mm_segment); \ 123 set_fs(current->thread.mm_segment); \
124 account_vtime(prev); \ 124 account_vtime(prev, current); \
125} while (0) 125} while (0)
126 126
127#define nop() asm volatile("nop") 127#define nop() asm volatile("nop")
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c1eaf9604da7..c544aa524535 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -47,6 +47,8 @@ struct thread_info {
47 unsigned int cpu; /* current CPU */ 47 unsigned int cpu; /* current CPU */
48 int preempt_count; /* 0 => preemptable, <0 => BUG */ 48 int preempt_count; /* 0 => preemptable, <0 => BUG */
49 struct restart_block restart_block; 49 struct restart_block restart_block;
50 __u64 user_timer;
51 __u64 system_timer;
50}; 52};
51 53
52/* 54/*
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 61705d60f995..e4bcab739c19 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -23,20 +23,18 @@ struct vtimer_list {
23 __u64 expires; 23 __u64 expires;
24 __u64 interval; 24 __u64 interval;
25 25
26 spinlock_t lock;
27 unsigned long magic;
28
29 void (*function)(unsigned long); 26 void (*function)(unsigned long);
30 unsigned long data; 27 unsigned long data;
31}; 28};
32 29
33/* the offset value will wrap after ca. 71 years */ 30/* the vtimer value will wrap after ca. 71 years */
34struct vtimer_queue { 31struct vtimer_queue {
35 struct list_head list; 32 struct list_head list;
36 spinlock_t lock; 33 spinlock_t lock;
37 __u64 to_expire; /* current event expire time */ 34 __u64 timer; /* last programmed timer */
38 __u64 offset; /* list offset to zero */ 35 __u64 elapsed; /* elapsed time of timer expire values */
39 __u64 idle; /* temp var for idle */ 36 __u64 idle; /* temp var for idle */
37 int do_spt; /* =1: reprogram cpu timer in idle */
40}; 38};
41 39
42extern void init_virt_timer(struct vtimer_list *timer); 40extern void init_virt_timer(struct vtimer_list *timer);
@@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer);
48extern void init_cpu_vtimer(void); 46extern void init_cpu_vtimer(void);
49extern void vtime_init(void); 47extern void vtime_init(void);
50 48
51extern void vtime_start_cpu_timer(void); 49extern void vtime_stop_cpu(void);
52extern void vtime_stop_cpu_timer(void); 50extern void vtime_start_leave(void);
53 51
54#endif /* __KERNEL__ */ 52#endif /* __KERNEL__ */
55 53
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a44f4fe16a35..7bdd7c8ebc91 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -12,9 +12,9 @@
12#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
13 13
14/* 14/*
15 * Note about this structure: 15 * Note about the vdso_data and vdso_per_cpu_data structures:
16 * 16 *
17 * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this 17 * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
18 * structure is supposed to be known only to the function in the vdso 18 * structure is supposed to be known only to the function in the vdso
19 * itself and may change without notice. 19 * itself and may change without notice.
20 */ 20 */
@@ -28,10 +28,21 @@ struct vdso_data {
28 __u64 wtom_clock_nsec; /* 0x28 */ 28 __u64 wtom_clock_nsec; /* 0x28 */
29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ 29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
30 __u32 tz_dsttime; /* Type of dst correction 0x34 */ 30 __u32 tz_dsttime; /* Type of dst correction 0x34 */
31 __u32 ectg_available;
32};
33
34struct vdso_per_cpu_data {
35 __u64 ectg_timer_base;
36 __u64 ectg_user_time;
31}; 37};
32 38
33extern struct vdso_data *vdso_data; 39extern struct vdso_data *vdso_data;
34 40
41#ifdef CONFIG_64BIT
42int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
43void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
44#endif
45
35#endif /* __ASSEMBLY__ */ 46#endif /* __ASSEMBLY__ */
36 47
37#endif /* __KERNEL__ */ 48#endif /* __KERNEL__ */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e641f60bac99..67a60016babb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -48,6 +48,11 @@ int main(void)
48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); 48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); 49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); 50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
51 DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
52 DEFINE(__VDSO_ECTG_BASE,
53 offsetof(struct vdso_per_cpu_data, ectg_timer_base));
54 DEFINE(__VDSO_ECTG_USER,
55 offsetof(struct vdso_per_cpu_data, ectg_user_time));
51 /* constants used by the vdso */ 56 /* constants used by the vdso */
52 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); 57 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
53 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); 58 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55de521aef77..1268aa2991bf 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -583,8 +583,8 @@ kernel_per:
583 583
584 .globl io_int_handler 584 .globl io_int_handler
585io_int_handler: 585io_int_handler:
586 stpt __LC_ASYNC_ENTER_TIMER
587 stck __LC_INT_CLOCK 586 stck __LC_INT_CLOCK
587 stpt __LC_ASYNC_ENTER_TIMER
588 SAVE_ALL_BASE __LC_SAVE_AREA+16 588 SAVE_ALL_BASE __LC_SAVE_AREA+16
589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
@@ -723,8 +723,8 @@ io_notify_resume:
723 723
724 .globl ext_int_handler 724 .globl ext_int_handler
725ext_int_handler: 725ext_int_handler:
726 stpt __LC_ASYNC_ENTER_TIMER
727 stck __LC_INT_CLOCK 726 stck __LC_INT_CLOCK
727 stpt __LC_ASYNC_ENTER_TIMER
728 SAVE_ALL_BASE __LC_SAVE_AREA+16 728 SAVE_ALL_BASE __LC_SAVE_AREA+16
729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
@@ -750,6 +750,7 @@ __critical_end:
750 750
751 .globl mcck_int_handler 751 .globl mcck_int_handler
752mcck_int_handler: 752mcck_int_handler:
753 stck __LC_INT_CLOCK
753 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
754 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs 755 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
755 SAVE_ALL_BASE __LC_SAVE_AREA+32 756 SAVE_ALL_BASE __LC_SAVE_AREA+32
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 16bb4fd1a403..c6fbde13971a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
177 .if !\sync 177 .if !\sync
178 ni \psworg+1,0xfd # clear wait state bit 178 ni \psworg+1,0xfd # clear wait state bit
179 .endif 179 .endif
180 lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user 180 lg %r14,__LC_VDSO_PER_CPU
181 lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
181 stpt __LC_EXIT_TIMER 182 stpt __LC_EXIT_TIMER
183 mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
184 lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
182 lpswe \psworg # back to caller 185 lpswe \psworg # back to caller
183 .endm 186 .endm
184 187
@@ -559,8 +562,8 @@ kernel_per:
559 */ 562 */
560 .globl io_int_handler 563 .globl io_int_handler
561io_int_handler: 564io_int_handler:
562 stpt __LC_ASYNC_ENTER_TIMER
563 stck __LC_INT_CLOCK 565 stck __LC_INT_CLOCK
566 stpt __LC_ASYNC_ENTER_TIMER
564 SAVE_ALL_BASE __LC_SAVE_AREA+32 567 SAVE_ALL_BASE __LC_SAVE_AREA+32
565 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 568 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
566 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 569 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
@@ -721,8 +724,8 @@ io_notify_resume:
721 */ 724 */
722 .globl ext_int_handler 725 .globl ext_int_handler
723ext_int_handler: 726ext_int_handler:
724 stpt __LC_ASYNC_ENTER_TIMER
725 stck __LC_INT_CLOCK 727 stck __LC_INT_CLOCK
728 stpt __LC_ASYNC_ENTER_TIMER
726 SAVE_ALL_BASE __LC_SAVE_AREA+32 729 SAVE_ALL_BASE __LC_SAVE_AREA+32
727 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 730 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
728 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 731 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
@@ -746,6 +749,7 @@ __critical_end:
746 */ 749 */
747 .globl mcck_int_handler 750 .globl mcck_int_handler
748mcck_int_handler: 751mcck_int_handler:
752 stck __LC_INT_CLOCK
749 la %r1,4095 # revalidate r1 753 la %r1,4095 # revalidate r1
750 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
751 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs 755 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
@@ -979,23 +983,23 @@ cleanup_sysc_return:
979 983
980cleanup_sysc_leave: 984cleanup_sysc_leave:
981 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) 985 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
982 je 2f 986 je 3f
983 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
984 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) 987 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
985 je 2f 988 jhe 0f
986 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 989 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
9900: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
987 cghi %r12,__LC_MCK_OLD_PSW 991 cghi %r12,__LC_MCK_OLD_PSW
988 jne 0f 992 jne 1f
989 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 993 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
990 j 1f 994 j 2f
9910: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 9951: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
9921: lmg %r0,%r11,SP_R0(%r15) 9962: lmg %r0,%r11,SP_R0(%r15)
993 lg %r15,SP_R15(%r15) 997 lg %r15,SP_R15(%r15)
9942: la %r12,__LC_RETURN_PSW 9983: la %r12,__LC_RETURN_PSW
995 br %r14 999 br %r14
996cleanup_sysc_leave_insn: 1000cleanup_sysc_leave_insn:
997 .quad sysc_done - 4 1001 .quad sysc_done - 4
998 .quad sysc_done - 8 1002 .quad sysc_done - 16
999 1003
1000cleanup_io_return: 1004cleanup_io_return:
1001 mvc __LC_RETURN_PSW(8),0(%r12) 1005 mvc __LC_RETURN_PSW(8),0(%r12)
@@ -1005,23 +1009,23 @@ cleanup_io_return:
1005 1009
1006cleanup_io_leave: 1010cleanup_io_leave:
1007 clc 8(8,%r12),BASED(cleanup_io_leave_insn) 1011 clc 8(8,%r12),BASED(cleanup_io_leave_insn)
1008 je 2f 1012 je 3f
1009 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
1010 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) 1013 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
1011 je 2f 1014 jhe 0f
1012 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 1015 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
10160: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
1013 cghi %r12,__LC_MCK_OLD_PSW 1017 cghi %r12,__LC_MCK_OLD_PSW
1014 jne 0f 1018 jne 1f
1015 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 1019 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
1016 j 1f 1020 j 2f
10170: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 10211: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
10181: lmg %r0,%r11,SP_R0(%r15) 10222: lmg %r0,%r11,SP_R0(%r15)
1019 lg %r15,SP_R15(%r15) 1023 lg %r15,SP_R15(%r15)
10202: la %r12,__LC_RETURN_PSW 10243: la %r12,__LC_RETURN_PSW
1021 br %r14 1025 br %r14
1022cleanup_io_leave_insn: 1026cleanup_io_leave_insn:
1023 .quad io_done - 4 1027 .quad io_done - 4
1024 .quad io_done - 8 1028 .quad io_done - 16
1025 1029
1026/* 1030/*
1027 * Integer constants 1031 * Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3ccd36b24b8f..f9f70aa15244 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -87,6 +87,8 @@ startup_continue:
87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area 87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
88 # move IPL device to lowcore 88 # move IPL device to lowcore
89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) 89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
90 lghi %r0,__LC_PASTE
91 stg %r0,__LC_VDSO_PER_CPU
90# 92#
91# Setup stack 93# Setup stack
92# 94#
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 04f8c67a6101..b6110bdf8dc2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -38,6 +38,7 @@
38#include <linux/utsname.h> 38#include <linux/utsname.h>
39#include <linux/tick.h> 39#include <linux/tick.h>
40#include <linux/elfcore.h> 40#include <linux/elfcore.h>
41#include <linux/kernel_stat.h>
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
@@ -45,7 +46,6 @@
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/irq.h> 47#include <asm/irq.h>
47#include <asm/timer.h> 48#include <asm/timer.h>
48#include <asm/cpu.h>
49#include "entry.h" 49#include "entry.h"
50 50
51asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); 51asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
75 return sf->gprs[8]; 75 return sf->gprs[8];
76} 76}
77 77
78DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
79 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
80};
81
82static int s390_idle_enter(void)
83{
84 struct s390_idle_data *idle;
85
86 idle = &__get_cpu_var(s390_idle);
87 spin_lock(&idle->lock);
88 idle->idle_count++;
89 idle->in_idle = 1;
90 idle->idle_enter = get_clock();
91 spin_unlock(&idle->lock);
92 vtime_stop_cpu_timer();
93 return NOTIFY_OK;
94}
95
96void s390_idle_leave(void)
97{
98 struct s390_idle_data *idle;
99
100 vtime_start_cpu_timer();
101 idle = &__get_cpu_var(s390_idle);
102 spin_lock(&idle->lock);
103 idle->idle_time += get_clock() - idle->idle_enter;
104 idle->in_idle = 0;
105 spin_unlock(&idle->lock);
106}
107
108extern void s390_handle_mcck(void); 78extern void s390_handle_mcck(void);
109/* 79/*
110 * The idle loop on a S390... 80 * The idle loop on a S390...
@@ -117,10 +87,6 @@ static void default_idle(void)
117 local_irq_enable(); 87 local_irq_enable();
118 return; 88 return;
119 } 89 }
120 if (s390_idle_enter() == NOTIFY_BAD) {
121 local_irq_enable();
122 return;
123 }
124#ifdef CONFIG_HOTPLUG_CPU 90#ifdef CONFIG_HOTPLUG_CPU
125 if (cpu_is_offline(smp_processor_id())) { 91 if (cpu_is_offline(smp_processor_id())) {
126 preempt_enable_no_resched(); 92 preempt_enable_no_resched();
@@ -130,7 +96,6 @@ static void default_idle(void)
130 local_mcck_disable(); 96 local_mcck_disable();
131 if (test_thread_flag(TIF_MCCK_PENDING)) { 97 if (test_thread_flag(TIF_MCCK_PENDING)) {
132 local_mcck_enable(); 98 local_mcck_enable();
133 s390_idle_leave();
134 local_irq_enable(); 99 local_irq_enable();
135 s390_handle_mcck(); 100 s390_handle_mcck();
136 return; 101 return;
@@ -138,9 +103,9 @@ static void default_idle(void)
138 trace_hardirqs_on(); 103 trace_hardirqs_on();
139 /* Don't trace preempt off for idle. */ 104 /* Don't trace preempt off for idle. */
140 stop_critical_timings(); 105 stop_critical_timings();
141 /* Wait for external, I/O or machine check interrupt. */ 106 /* Stop virtual timer and halt the cpu. */
142 __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | 107 vtime_stop_cpu();
143 PSW_MASK_IO | PSW_MASK_EXT); 108 /* Reenable preemption tracer. */
144 start_critical_timings(); 109 start_critical_timings();
145} 110}
146 111
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index e019b419efc6..a0d2d55d7fb3 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
119 struct pt_regs *old_regs; 119 struct pt_regs *old_regs;
120 120
121 old_regs = set_irq_regs(regs); 121 old_regs = set_irq_regs(regs);
122 irq_enter();
123 s390_idle_check(); 122 s390_idle_check();
123 irq_enter();
124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
125 /* Serve timer interrupts first. */ 125 /* Serve timer interrupts first. */
126 clock_comparator_work(); 126 clock_comparator_work();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b7a1efd5522c..d825f4950e4e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,6 +427,8 @@ setup_lowcore(void)
427 /* enable extended save area */ 427 /* enable extended save area */
428 __ctl_set_bit(14, 29); 428 __ctl_set_bit(14, 29);
429 } 429 }
430#else
431 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
430#endif 432#endif
431 set_prefix((u32)(unsigned long) lc); 433 set_prefix((u32)(unsigned long) lc);
432} 434}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3ed5c7a83c6c..9c0ccb532a45 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,6 +47,7 @@
47#include <asm/lowcore.h> 47#include <asm/lowcore.h>
48#include <asm/sclp.h> 48#include <asm/sclp.h>
49#include <asm/cpu.h> 49#include <asm/cpu.h>
50#include <asm/vdso.h>
50#include "entry.h" 51#include "entry.h"
51 52
52/* 53/*
@@ -500,6 +501,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
500 goto out; 501 goto out;
501 lowcore->extended_save_area_addr = (u32) save_area; 502 lowcore->extended_save_area_addr = (u32) save_area;
502 } 503 }
504#else
505 if (vdso_alloc_per_cpu(cpu, lowcore))
506 goto out;
503#endif 507#endif
504 lowcore_ptr[cpu] = lowcore; 508 lowcore_ptr[cpu] = lowcore;
505 return 0; 509 return 0;
@@ -522,6 +526,8 @@ static void smp_free_lowcore(int cpu)
522#ifndef CONFIG_64BIT 526#ifndef CONFIG_64BIT
523 if (MACHINE_HAS_IEEE) 527 if (MACHINE_HAS_IEEE)
524 free_page((unsigned long) lowcore->extended_save_area_addr); 528 free_page((unsigned long) lowcore->extended_save_area_addr);
529#else
530 vdso_free_per_cpu(cpu, lowcore);
525#endif 531#endif
526 free_page(lowcore->panic_stack - PAGE_SIZE); 532 free_page(lowcore->panic_stack - PAGE_SIZE);
527 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); 533 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
@@ -664,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
664 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); 670 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
665 panic_stack = __get_free_page(GFP_KERNEL); 671 panic_stack = __get_free_page(GFP_KERNEL);
666 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); 672 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
673 BUG_ON(!lowcore || !panic_stack || !async_stack);
667#ifndef CONFIG_64BIT 674#ifndef CONFIG_64BIT
668 if (MACHINE_HAS_IEEE) 675 if (MACHINE_HAS_IEEE)
669 save_area = get_zeroed_page(GFP_KERNEL); 676 save_area = get_zeroed_page(GFP_KERNEL);
@@ -677,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
677#ifndef CONFIG_64BIT 684#ifndef CONFIG_64BIT
678 if (MACHINE_HAS_IEEE) 685 if (MACHINE_HAS_IEEE)
679 lowcore->extended_save_area_addr = (u32) save_area; 686 lowcore->extended_save_area_addr = (u32) save_area;
687#else
688 BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
680#endif 689#endif
681 set_prefix((u32)(unsigned long) lowcore); 690 set_prefix((u32)(unsigned long) lowcore);
682 local_mcck_enable(); 691 local_mcck_enable();
@@ -845,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev,
845 unsigned long long idle_count; 854 unsigned long long idle_count;
846 855
847 idle = &per_cpu(s390_idle, dev->id); 856 idle = &per_cpu(s390_idle, dev->id);
848 spin_lock_irq(&idle->lock); 857 spin_lock(&idle->lock);
849 idle_count = idle->idle_count; 858 idle_count = idle->idle_count;
850 spin_unlock_irq(&idle->lock); 859 if (idle->idle_enter)
860 idle_count++;
861 spin_unlock(&idle->lock);
851 return sprintf(buf, "%llu\n", idle_count); 862 return sprintf(buf, "%llu\n", idle_count);
852} 863}
853static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); 864static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -856,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev,
856 struct sysdev_attribute *attr, char *buf) 867 struct sysdev_attribute *attr, char *buf)
857{ 868{
858 struct s390_idle_data *idle; 869 struct s390_idle_data *idle;
859 unsigned long long new_time; 870 unsigned long long now, idle_time, idle_enter;
860 871
861 idle = &per_cpu(s390_idle, dev->id); 872 idle = &per_cpu(s390_idle, dev->id);
862 spin_lock_irq(&idle->lock); 873 spin_lock(&idle->lock);
863 if (idle->in_idle) { 874 now = get_clock();
864 new_time = get_clock(); 875 idle_time = idle->idle_time;
865 idle->idle_time += new_time - idle->idle_enter; 876 idle_enter = idle->idle_enter;
866 idle->idle_enter = new_time; 877 if (idle_enter != 0ULL && idle_enter < now)
867 } 878 idle_time += now - idle_enter;
868 new_time = idle->idle_time; 879 spin_unlock(&idle->lock);
869 spin_unlock_irq(&idle->lock); 880 return sprintf(buf, "%llu\n", idle_time >> 12);
870 return sprintf(buf, "%llu\n", new_time >> 12);
871} 881}
872static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); 882static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
873 883
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10a6ccef4412..25a6a82f1c02 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -31,9 +31,6 @@
31#include <asm/sections.h> 31#include <asm/sections.h>
32#include <asm/vdso.h> 32#include <asm/vdso.h>
33 33
34/* Max supported size for symbol names */
35#define MAX_SYMNAME 64
36
37#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 34#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
38extern char vdso32_start, vdso32_end; 35extern char vdso32_start, vdso32_end;
39static void *vdso32_kbase = &vdso32_start; 36static void *vdso32_kbase = &vdso32_start;
@@ -71,6 +68,119 @@ static union {
71struct vdso_data *vdso_data = &vdso_data_store.data; 68struct vdso_data *vdso_data = &vdso_data_store.data;
72 69
73/* 70/*
71 * Setup vdso data page.
72 */
73static void vdso_init_data(struct vdso_data *vd)
74{
75 unsigned int facility_list;
76
77 facility_list = stfl();
78 vd->ectg_available = switch_amode && (facility_list & 1);
79}
80
81#ifdef CONFIG_64BIT
82/*
83 * Setup per cpu vdso data page.
84 */
85static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
86{
87}
88
89/*
90 * Allocate/free per cpu vdso data.
91 */
92#ifdef CONFIG_64BIT
93#define SEGMENT_ORDER 2
94#else
95#define SEGMENT_ORDER 1
96#endif
97
98int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
99{
100 unsigned long segment_table, page_table, page_frame;
101 u32 *psal, *aste;
102 int i;
103
104 lowcore->vdso_per_cpu_data = __LC_PASTE;
105
106 if (!switch_amode || !vdso_enabled)
107 return 0;
108
109 segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
110 page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
111 page_frame = get_zeroed_page(GFP_KERNEL);
112 if (!segment_table || !page_table || !page_frame)
113 goto out;
114
115 clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
116 PAGE_SIZE << SEGMENT_ORDER);
117 clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
118 256*sizeof(unsigned long));
119
120 *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
121 *(unsigned long *) page_table = _PAGE_RO + page_frame;
122
123 psal = (u32 *) (page_table + 256*sizeof(unsigned long));
124 aste = psal + 32;
125
126 for (i = 4; i < 32; i += 4)
127 psal[i] = 0x80000000;
128
129 lowcore->paste[4] = (u32)(addr_t) psal;
130 psal[0] = 0x20000000;
131 psal[2] = (u32)(addr_t) aste;
132 *(unsigned long *) (aste + 2) = segment_table +
133 _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
134 aste[4] = (u32)(addr_t) psal;
135 lowcore->vdso_per_cpu_data = page_frame;
136
137 vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
138 return 0;
139
140out:
141 free_page(page_frame);
142 free_page(page_table);
143 free_pages(segment_table, SEGMENT_ORDER);
144 return -ENOMEM;
145}
146
147#ifdef CONFIG_HOTPLUG_CPU
148void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
149{
150 unsigned long segment_table, page_table, page_frame;
151 u32 *psal, *aste;
152
153 if (!switch_amode || !vdso_enabled)
154 return;
155
156 psal = (u32 *)(addr_t) lowcore->paste[4];
157 aste = (u32 *)(addr_t) psal[2];
158 segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
159 page_table = *(unsigned long *) segment_table;
160 page_frame = *(unsigned long *) page_table;
161
162 free_page(page_frame);
163 free_page(page_table);
164 free_pages(segment_table, SEGMENT_ORDER);
165}
166#endif /* CONFIG_HOTPLUG_CPU */
167
168static void __vdso_init_cr5(void *dummy)
169{
170 unsigned long cr5;
171
172 cr5 = offsetof(struct _lowcore, paste);
173 __ctl_load(cr5, 5, 5);
174}
175
176static void vdso_init_cr5(void)
177{
178 if (switch_amode && vdso_enabled)
179 on_each_cpu(__vdso_init_cr5, NULL, 1);
180}
181#endif /* CONFIG_64BIT */
182
183/*
74 * This is called from binfmt_elf, we create the special vma for the 184 * This is called from binfmt_elf, we create the special vma for the
75 * vDSO and insert it into the mm struct tree 185 * vDSO and insert it into the mm struct tree
76 */ 186 */
@@ -172,6 +282,9 @@ static int __init vdso_init(void)
172{ 282{
173 int i; 283 int i;
174 284
285 if (!vdso_enabled)
286 return 0;
287 vdso_init_data(vdso_data);
175#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 288#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
176 /* Calculate the size of the 32 bit vDSO */ 289 /* Calculate the size of the 32 bit vDSO */
177 vdso32_pages = ((&vdso32_end - &vdso32_start 290 vdso32_pages = ((&vdso32_end - &vdso32_start
@@ -208,6 +321,10 @@ static int __init vdso_init(void)
208 } 321 }
209 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); 322 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
210 vdso64_pagelist[vdso64_pages] = NULL; 323 vdso64_pagelist[vdso64_pages] = NULL;
324#ifndef CONFIG_SMP
325 BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
326#endif
327 vdso_init_cr5();
211#endif /* CONFIG_64BIT */ 328#endif /* CONFIG_64BIT */
212 329
213 get_page(virt_to_page(vdso_data)); 330 get_page(virt_to_page(vdso_data));
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 488e31a3c0e7..9ce8caafdb4e 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -22,7 +22,12 @@ __kernel_clock_getres:
22 cghi %r2,CLOCK_REALTIME 22 cghi %r2,CLOCK_REALTIME
23 je 0f 23 je 0f
24 cghi %r2,CLOCK_MONOTONIC 24 cghi %r2,CLOCK_MONOTONIC
25 je 0f
26 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
25 jne 2f 27 jne 2f
28 larl %r5,_vdso_data
29 icm %r0,15,__LC_ECTG_OK(%r5)
30 jz 2f
260: ltgr %r3,%r3 310: ltgr %r3,%r3
27 jz 1f /* res == NULL */ 32 jz 1f /* res == NULL */
28 larl %r1,3f 33 larl %r1,3f
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 738a410b7eb2..79dbfee831ec 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,8 +22,10 @@ __kernel_clock_gettime:
22 larl %r5,_vdso_data 22 larl %r5,_vdso_data
23 cghi %r2,CLOCK_REALTIME 23 cghi %r2,CLOCK_REALTIME
24 je 4f 24 je 4f
25 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
26 je 9f
25 cghi %r2,CLOCK_MONOTONIC 27 cghi %r2,CLOCK_MONOTONIC
26 jne 9f 28 jne 12f
27 29
28 /* CLOCK_MONOTONIC */ 30 /* CLOCK_MONOTONIC */
29 ltgr %r3,%r3 31 ltgr %r3,%r3
@@ -42,7 +44,7 @@ __kernel_clock_gettime:
42 alg %r0,__VDSO_WTOM_SEC(%r5) 44 alg %r0,__VDSO_WTOM_SEC(%r5)
43 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 45 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
44 jne 0b 46 jne 0b
45 larl %r5,10f 47 larl %r5,13f
461: clg %r1,0(%r5) 481: clg %r1,0(%r5)
47 jl 2f 49 jl 2f
48 slg %r1,0(%r5) 50 slg %r1,0(%r5)
@@ -68,7 +70,7 @@ __kernel_clock_gettime:
68 lg %r0,__VDSO_XTIME_SEC(%r5) 70 lg %r0,__VDSO_XTIME_SEC(%r5)
69 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 71 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
70 jne 5b 72 jne 5b
71 larl %r5,10f 73 larl %r5,13f
726: clg %r1,0(%r5) 746: clg %r1,0(%r5)
73 jl 7f 75 jl 7f
74 slg %r1,0(%r5) 76 slg %r1,0(%r5)
@@ -79,11 +81,38 @@ __kernel_clock_gettime:
798: lghi %r2,0 818: lghi %r2,0
80 br %r14 82 br %r14
81 83
84 /* CLOCK_THREAD_CPUTIME_ID for this thread */
859: icm %r0,15,__VDSO_ECTG_OK(%r5)
86 jz 12f
87 ear %r2,%a4
88 llilh %r4,0x0100
89 sar %a4,%r4
90 lghi %r4,0
91 sacf 512 /* Magic ectg instruction */
92 .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
93 sacf 0
94 sar %a4,%r2
95 algr %r1,%r0 /* r1 = cputime as TOD value */
96 mghi %r1,1000 /* convert to nanoseconds */
97 srlg %r1,%r1,12 /* r1 = cputime in nanosec */
98 lgr %r4,%r1
99 larl %r5,13f
100 srlg %r1,%r1,9 /* divide by 1000000000 */
101 mlg %r0,8(%r5)
102 srlg %r0,%r0,11 /* r0 = tv_sec */
103 stg %r0,0(%r3)
104 msg %r0,0(%r5) /* calculate tv_nsec */
105 slgr %r4,%r0 /* r4 = tv_nsec */
106 stg %r4,8(%r3)
107 lghi %r2,0
108 br %r14
109
82 /* Fallback to system call */ 110 /* Fallback to system call */
839: lghi %r1,__NR_clock_gettime 11112: lghi %r1,__NR_clock_gettime
84 svc 0 112 svc 0
85 br %r14 113 br %r14
86 114
8710: .quad 1000000000 11513: .quad 1000000000
11614: .quad 19342813113834067
88 .cfi_endproc 117 .cfi_endproc
89 .size __kernel_clock_gettime,.-__kernel_clock_gettime 118 .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 75a6e62ea973..2fb36e462194 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,19 +23,43 @@
23#include <asm/s390_ext.h> 23#include <asm/s390_ext.h>
24#include <asm/timer.h> 24#include <asm/timer.h>
25#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
26#include <asm/cpu.h>
26 27
27static ext_int_info_t ext_int_info_timer; 28static ext_int_info_t ext_int_info_timer;
29
28static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); 30static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
29 31
32DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
33 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
34};
35
36static inline __u64 get_vtimer(void)
37{
38 __u64 timer;
39
40 asm volatile("STPT %0" : "=m" (timer));
41 return timer;
42}
43
44static inline void set_vtimer(__u64 expires)
45{
46 __u64 timer;
47
48 asm volatile (" STPT %0\n" /* Store current cpu timer value */
49 " SPT %1" /* Set new value immediatly afterwards */
50 : "=m" (timer) : "m" (expires) );
51 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
52 S390_lowcore.last_update_timer = expires;
53}
54
30/* 55/*
31 * Update process times based on virtual cpu times stored by entry.S 56 * Update process times based on virtual cpu times stored by entry.S
32 * to the lowcore fields user_timer, system_timer & steal_clock. 57 * to the lowcore fields user_timer, system_timer & steal_clock.
33 */ 58 */
34void account_process_tick(struct task_struct *tsk, int user_tick) 59static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
35{ 60{
36 cputime_t cputime; 61 struct thread_info *ti = task_thread_info(tsk);
37 __u64 timer, clock; 62 __u64 timer, clock, user, system, steal;
38 int rcu_user_flag;
39 63
40 timer = S390_lowcore.last_update_timer; 64 timer = S390_lowcore.last_update_timer;
41 clock = S390_lowcore.last_update_clock; 65 clock = S390_lowcore.last_update_clock;
@@ -44,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
44 : "=m" (S390_lowcore.last_update_timer), 68 : "=m" (S390_lowcore.last_update_timer),
45 "=m" (S390_lowcore.last_update_clock) ); 69 "=m" (S390_lowcore.last_update_clock) );
46 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 70 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
47 S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; 71 S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
48 72
49 cputime = S390_lowcore.user_timer >> 12; 73 user = S390_lowcore.user_timer - ti->user_timer;
50 rcu_user_flag = cputime != 0; 74 S390_lowcore.steal_timer -= user;
51 S390_lowcore.user_timer -= cputime << 12; 75 ti->user_timer = S390_lowcore.user_timer;
52 S390_lowcore.steal_clock -= cputime << 12; 76 account_user_time(tsk, user, user);
53 account_user_time(tsk, cputime); 77
54 78 system = S390_lowcore.system_timer - ti->system_timer;
55 cputime = S390_lowcore.system_timer >> 12; 79 S390_lowcore.steal_timer -= system;
56 S390_lowcore.system_timer -= cputime << 12; 80 ti->system_timer = S390_lowcore.system_timer;
57 S390_lowcore.steal_clock -= cputime << 12; 81 account_system_time(tsk, hardirq_offset, system, system);
58 account_system_time(tsk, HARDIRQ_OFFSET, cputime); 82
59 83 steal = S390_lowcore.steal_timer;
60 cputime = S390_lowcore.steal_clock; 84 if ((s64) steal > 0) {
61 if ((__s64) cputime > 0) { 85 S390_lowcore.steal_timer = 0;
62 cputime >>= 12; 86 account_steal_time(steal);
63 S390_lowcore.steal_clock -= cputime << 12;
64 account_steal_time(tsk, cputime);
65 } 87 }
66} 88}
67 89
68/* 90void account_vtime(struct task_struct *prev, struct task_struct *next)
69 * Update process times based on virtual cpu times stored by entry.S
70 * to the lowcore fields user_timer, system_timer & steal_clock.
71 */
72void account_vtime(struct task_struct *tsk)
73{ 91{
74 cputime_t cputime; 92 struct thread_info *ti;
75 __u64 timer; 93
76 94 do_account_vtime(prev, 0);
77 timer = S390_lowcore.last_update_timer; 95 ti = task_thread_info(prev);
78 asm volatile (" STPT %0" /* Store current cpu timer value */ 96 ti->user_timer = S390_lowcore.user_timer;
79 : "=m" (S390_lowcore.last_update_timer) ); 97 ti->system_timer = S390_lowcore.system_timer;
80 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 98 ti = task_thread_info(next);
81 99 S390_lowcore.user_timer = ti->user_timer;
82 cputime = S390_lowcore.user_timer >> 12; 100 S390_lowcore.system_timer = ti->system_timer;
83 S390_lowcore.user_timer -= cputime << 12; 101}
84 S390_lowcore.steal_clock -= cputime << 12;
85 account_user_time(tsk, cputime);
86 102
87 cputime = S390_lowcore.system_timer >> 12; 103void account_process_tick(struct task_struct *tsk, int user_tick)
88 S390_lowcore.system_timer -= cputime << 12; 104{
89 S390_lowcore.steal_clock -= cputime << 12; 105 do_account_vtime(tsk, HARDIRQ_OFFSET);
90 account_system_time(tsk, 0, cputime);
91} 106}
92 107
93/* 108/*
@@ -96,80 +111,131 @@ void account_vtime(struct task_struct *tsk)
96 */ 111 */
97void account_system_vtime(struct task_struct *tsk) 112void account_system_vtime(struct task_struct *tsk)
98{ 113{
99 cputime_t cputime; 114 struct thread_info *ti = task_thread_info(tsk);
100 __u64 timer; 115 __u64 timer, system;
101 116
102 timer = S390_lowcore.last_update_timer; 117 timer = S390_lowcore.last_update_timer;
103 asm volatile (" STPT %0" /* Store current cpu timer value */ 118 S390_lowcore.last_update_timer = get_vtimer();
104 : "=m" (S390_lowcore.last_update_timer) );
105 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 119 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
106 120
107 cputime = S390_lowcore.system_timer >> 12; 121 system = S390_lowcore.system_timer - ti->system_timer;
108 S390_lowcore.system_timer -= cputime << 12; 122 S390_lowcore.steal_timer -= system;
109 S390_lowcore.steal_clock -= cputime << 12; 123 ti->system_timer = S390_lowcore.system_timer;
110 account_system_time(tsk, 0, cputime); 124 account_system_time(tsk, 0, system, system);
111} 125}
112EXPORT_SYMBOL_GPL(account_system_vtime); 126EXPORT_SYMBOL_GPL(account_system_vtime);
113 127
114static inline void set_vtimer(__u64 expires) 128void vtime_start_cpu(void)
115{
116 __u64 timer;
117
118 asm volatile (" STPT %0\n" /* Store current cpu timer value */
119 " SPT %1" /* Set new value immediatly afterwards */
120 : "=m" (timer) : "m" (expires) );
121 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
122 S390_lowcore.last_update_timer = expires;
123
124 /* store expire time for this CPU timer */
125 __get_cpu_var(virt_cpu_timer).to_expire = expires;
126}
127
128void vtime_start_cpu_timer(void)
129{ 129{
130 struct vtimer_queue *vt_list; 130 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
131 131 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
132 vt_list = &__get_cpu_var(virt_cpu_timer); 132 __u64 idle_time, expires;
133 133
134 /* CPU timer interrupt is pending, don't reprogramm it */ 134 /* Account time spent with enabled wait psw loaded as idle time. */
135 if (vt_list->idle & 1LL<<63) 135 idle_time = S390_lowcore.int_clock - idle->idle_enter;
136 return; 136 account_idle_time(idle_time);
137 S390_lowcore.last_update_clock = S390_lowcore.int_clock;
138
139 /* Account system time spent going idle. */
140 S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
141 S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
142
143 /* Restart vtime CPU timer */
144 if (vq->do_spt) {
145 /* Program old expire value but first save progress. */
146 expires = vq->idle - S390_lowcore.async_enter_timer;
147 expires += get_vtimer();
148 set_vtimer(expires);
149 } else {
150 /* Don't account the CPU timer delta while the cpu was idle. */
151 vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
152 }
137 153
138 if (!list_empty(&vt_list->list)) 154 spin_lock(&idle->lock);
139 set_vtimer(vt_list->idle); 155 idle->idle_time += idle_time;
156 idle->idle_enter = 0ULL;
157 idle->idle_count++;
158 spin_unlock(&idle->lock);
140} 159}
141 160
142void vtime_stop_cpu_timer(void) 161void vtime_stop_cpu(void)
143{ 162{
144 struct vtimer_queue *vt_list; 163 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
145 164 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
146 vt_list = &__get_cpu_var(virt_cpu_timer); 165 psw_t psw;
147 166
148 /* nothing to do */ 167 /* Wait for external, I/O or machine check interrupt. */
149 if (list_empty(&vt_list->list)) { 168 psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
150 vt_list->idle = VTIMER_MAX_SLICE; 169
151 goto fire; 170 /* Check if the CPU timer needs to be reprogrammed. */
171 if (vq->do_spt) {
172 __u64 vmax = VTIMER_MAX_SLICE;
173 /*
174 * The inline assembly is equivalent to
175 * vq->idle = get_cpu_timer();
176 * set_cpu_timer(VTIMER_MAX_SLICE);
177 * idle->idle_enter = get_clock();
178 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
179 * PSW_MASK_IO | PSW_MASK_EXT);
180 * The difference is that the inline assembly makes sure that
181 * the last three instruction are stpt, stck and lpsw in that
182 * order. This is done to increase the precision.
183 */
184 asm volatile(
185#ifndef CONFIG_64BIT
186 " basr 1,0\n"
187 "0: ahi 1,1f-0b\n"
188 " st 1,4(%2)\n"
189#else /* CONFIG_64BIT */
190 " larl 1,1f\n"
191 " stg 1,8(%2)\n"
192#endif /* CONFIG_64BIT */
193 " stpt 0(%4)\n"
194 " spt 0(%5)\n"
195 " stck 0(%3)\n"
196#ifndef CONFIG_64BIT
197 " lpsw 0(%2)\n"
198#else /* CONFIG_64BIT */
199 " lpswe 0(%2)\n"
200#endif /* CONFIG_64BIT */
201 "1:"
202 : "=m" (idle->idle_enter), "=m" (vq->idle)
203 : "a" (&psw), "a" (&idle->idle_enter),
204 "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
205 : "memory", "cc", "1");
206 } else {
207 /*
208 * The inline assembly is equivalent to
209 * vq->idle = get_cpu_timer();
210 * idle->idle_enter = get_clock();
211 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
212 * PSW_MASK_IO | PSW_MASK_EXT);
213 * The difference is that the inline assembly makes sure that
214 * the last three instruction are stpt, stck and lpsw in that
215 * order. This is done to increase the precision.
216 */
217 asm volatile(
218#ifndef CONFIG_64BIT
219 " basr 1,0\n"
220 "0: ahi 1,1f-0b\n"
221 " st 1,4(%2)\n"
222#else /* CONFIG_64BIT */
223 " larl 1,1f\n"
224 " stg 1,8(%2)\n"
225#endif /* CONFIG_64BIT */
226 " stpt 0(%4)\n"
227 " stck 0(%3)\n"
228#ifndef CONFIG_64BIT
229 " lpsw 0(%2)\n"
230#else /* CONFIG_64BIT */
231 " lpswe 0(%2)\n"
232#endif /* CONFIG_64BIT */
233 "1:"
234 : "=m" (idle->idle_enter), "=m" (vq->idle)
235 : "a" (&psw), "a" (&idle->idle_enter),
236 "a" (&vq->idle), "m" (psw)
237 : "memory", "cc", "1");
152 } 238 }
153
154 /* store the actual expire value */
155 asm volatile ("STPT %0" : "=m" (vt_list->idle));
156
157 /*
158 * If the CPU timer is negative we don't reprogramm
159 * it because we will get instantly an interrupt.
160 */
161 if (vt_list->idle & 1LL<<63)
162 return;
163
164 vt_list->offset += vt_list->to_expire - vt_list->idle;
165
166 /*
167 * We cannot halt the CPU timer, we just write a value that
168 * nearly never expires (only after 71 years) and re-write
169 * the stored expire value if we continue the timer
170 */
171 fire:
172 set_vtimer(VTIMER_MAX_SLICE);
173} 239}
174 240
175/* 241/*
@@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
195 */ 261 */
196static void do_callbacks(struct list_head *cb_list) 262static void do_callbacks(struct list_head *cb_list)
197{ 263{
198 struct vtimer_queue *vt_list; 264 struct vtimer_queue *vq;
199 struct vtimer_list *event, *tmp; 265 struct vtimer_list *event, *tmp;
200 void (*fn)(unsigned long);
201 unsigned long data;
202 266
203 if (list_empty(cb_list)) 267 if (list_empty(cb_list))
204 return; 268 return;
205 269
206 vt_list = &__get_cpu_var(virt_cpu_timer); 270 vq = &__get_cpu_var(virt_cpu_timer);
207 271
208 list_for_each_entry_safe(event, tmp, cb_list, entry) { 272 list_for_each_entry_safe(event, tmp, cb_list, entry) {
209 fn = event->function; 273 list_del_init(&event->entry);
210 data = event->data; 274 (event->function)(event->data);
211 fn(data); 275 if (event->interval) {
212 276 /* Recharge interval timer */
213 if (!event->interval) 277 event->expires = event->interval + vq->elapsed;
214 /* delete one shot timer */ 278 spin_lock(&vq->lock);
215 list_del_init(&event->entry); 279 list_add_sorted(event, &vq->list);
216 else { 280 spin_unlock(&vq->lock);
217 /* move interval timer back to list */
218 spin_lock(&vt_list->lock);
219 list_del_init(&event->entry);
220 list_add_sorted(event, &vt_list->list);
221 spin_unlock(&vt_list->lock);
222 } 281 }
223 } 282 }
224} 283}
@@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list)
228 */ 287 */
229static void do_cpu_timer_interrupt(__u16 error_code) 288static void do_cpu_timer_interrupt(__u16 error_code)
230{ 289{
231 __u64 next, delta; 290 struct vtimer_queue *vq;
232 struct vtimer_queue *vt_list;
233 struct vtimer_list *event, *tmp; 291 struct vtimer_list *event, *tmp;
234 struct list_head *ptr; 292 struct list_head cb_list; /* the callback queue */
235 /* the callback queue */ 293 __u64 elapsed, next;
236 struct list_head cb_list;
237 294
238 INIT_LIST_HEAD(&cb_list); 295 INIT_LIST_HEAD(&cb_list);
239 vt_list = &__get_cpu_var(virt_cpu_timer); 296 vq = &__get_cpu_var(virt_cpu_timer);
240 297
241 /* walk timer list, fire all expired events */ 298 /* walk timer list, fire all expired events */
242 spin_lock(&vt_list->lock); 299 spin_lock(&vq->lock);
243 300
244 if (vt_list->to_expire < VTIMER_MAX_SLICE) 301 elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
245 vt_list->offset += vt_list->to_expire; 302 BUG_ON((s64) elapsed < 0);
246 303 vq->elapsed = 0;
247 list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { 304 list_for_each_entry_safe(event, tmp, &vq->list, entry) {
248 if (event->expires > vt_list->offset) 305 if (event->expires < elapsed)
249 /* found first unexpired event, leave */ 306 /* move expired timer to the callback queue */
250 break; 307 list_move_tail(&event->entry, &cb_list);
251 308 else
252 /* re-charge interval timer, we have to add the offset */ 309 event->expires -= elapsed;
253 if (event->interval)
254 event->expires = event->interval + vt_list->offset;
255
256 /* move expired timer to the callback queue */
257 list_move_tail(&event->entry, &cb_list);
258 } 310 }
259 spin_unlock(&vt_list->lock); 311 spin_unlock(&vq->lock);
312
313 vq->do_spt = list_empty(&cb_list);
260 do_callbacks(&cb_list); 314 do_callbacks(&cb_list);
261 315
262 /* next event is first in list */ 316 /* next event is first in list */
263 spin_lock(&vt_list->lock); 317 next = VTIMER_MAX_SLICE;
264 if (!list_empty(&vt_list->list)) { 318 spin_lock(&vq->lock);
265 ptr = vt_list->list.next; 319 if (!list_empty(&vq->list)) {
266 event = list_entry(ptr, struct vtimer_list, entry); 320 event = list_first_entry(&vq->list, struct vtimer_list, entry);
267 next = event->expires - vt_list->offset; 321 next = event->expires;
268 322 } else
269 /* add the expired time from this interrupt handler 323 vq->do_spt = 0;
270 * and the callback functions 324 spin_unlock(&vq->lock);
271 */ 325 /*
272 asm volatile ("STPT %0" : "=m" (delta)); 326 * To improve precision add the time spent by the
273 delta = 0xffffffffffffffffLL - delta + 1; 327 * interrupt handler to the elapsed time.
274 vt_list->offset += delta; 328 * Note: CPU timer counts down and we got an interrupt,
275 next -= delta; 329 * the current content is negative
276 } else { 330 */
277 vt_list->offset = 0; 331 elapsed = S390_lowcore.async_enter_timer - get_vtimer();
278 next = VTIMER_MAX_SLICE; 332 set_vtimer(next - elapsed);
279 } 333 vq->timer = next - elapsed;
280 spin_unlock(&vt_list->lock); 334 vq->elapsed = elapsed;
281 set_vtimer(next);
282} 335}
283 336
284void init_virt_timer(struct vtimer_list *timer) 337void init_virt_timer(struct vtimer_list *timer)
285{ 338{
286 timer->function = NULL; 339 timer->function = NULL;
287 INIT_LIST_HEAD(&timer->entry); 340 INIT_LIST_HEAD(&timer->entry);
288 spin_lock_init(&timer->lock);
289} 341}
290EXPORT_SYMBOL(init_virt_timer); 342EXPORT_SYMBOL(init_virt_timer);
291 343
@@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer)
299 */ 351 */
300static void internal_add_vtimer(struct vtimer_list *timer) 352static void internal_add_vtimer(struct vtimer_list *timer)
301{ 353{
354 struct vtimer_queue *vq;
302 unsigned long flags; 355 unsigned long flags;
303 __u64 done; 356 __u64 left, expires;
304 struct vtimer_list *event;
305 struct vtimer_queue *vt_list;
306 357
307 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 358 vq = &per_cpu(virt_cpu_timer, timer->cpu);
308 spin_lock_irqsave(&vt_list->lock, flags); 359 spin_lock_irqsave(&vq->lock, flags);
309 360
310 BUG_ON(timer->cpu != smp_processor_id()); 361 BUG_ON(timer->cpu != smp_processor_id());
311 362
312 /* if list is empty we only have to set the timer */ 363 if (list_empty(&vq->list)) {
313 if (list_empty(&vt_list->list)) { 364 /* First timer on this cpu, just program it. */
314 /* reset the offset, this may happen if the last timer was 365 list_add(&timer->entry, &vq->list);
315 * just deleted by mod_virt_timer and the interrupt 366 set_vtimer(timer->expires);
316 * didn't happen until here 367 vq->timer = timer->expires;
317 */ 368 vq->elapsed = 0;
318 vt_list->offset = 0; 369 } else {
319 goto fire; 370 /* Check progress of old timers. */
371 expires = timer->expires;
372 left = get_vtimer();
373 if (likely((s64) expires < (s64) left)) {
374 /* The new timer expires before the current timer. */
375 set_vtimer(expires);
376 vq->elapsed += vq->timer - left;
377 vq->timer = expires;
378 } else {
379 vq->elapsed += vq->timer - left;
380 vq->timer = left;
381 }
382 /* Insert new timer into per cpu list. */
383 timer->expires += vq->elapsed;
384 list_add_sorted(timer, &vq->list);
320 } 385 }
321 386
322 /* save progress */ 387 spin_unlock_irqrestore(&vq->lock, flags);
323 asm volatile ("STPT %0" : "=m" (done));
324
325 /* calculate completed work */
326 done = vt_list->to_expire - done + vt_list->offset;
327 vt_list->offset = 0;
328
329 list_for_each_entry(event, &vt_list->list, entry)
330 event->expires -= done;
331
332 fire:
333 list_add_sorted(timer, &vt_list->list);
334
335 /* get first element, which is the next vtimer slice */
336 event = list_entry(vt_list->list.next, struct vtimer_list, entry);
337
338 set_vtimer(event->expires);
339 spin_unlock_irqrestore(&vt_list->lock, flags);
340 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ 388 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */
341 put_cpu(); 389 put_cpu();
342} 390}
@@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic);
381 * If we change a pending timer the function must be called on the CPU 429 * If we change a pending timer the function must be called on the CPU
382 * where the timer is running on, e.g. by smp_call_function_single() 430 * where the timer is running on, e.g. by smp_call_function_single()
383 * 431 *
384 * The original mod_timer adds the timer if it is not pending. For compatibility 432 * The original mod_timer adds the timer if it is not pending. For
385 * we do the same. The timer will be added on the current CPU as a oneshot timer. 433 * compatibility we do the same. The timer will be added on the current
434 * CPU as a oneshot timer.
386 * 435 *
387 * returns whether it has modified a pending timer (1) or not (0) 436 * returns whether it has modified a pending timer (1) or not (0)
388 */ 437 */
389int mod_virt_timer(struct vtimer_list *timer, __u64 expires) 438int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
390{ 439{
391 struct vtimer_queue *vt_list; 440 struct vtimer_queue *vq;
392 unsigned long flags; 441 unsigned long flags;
393 int cpu; 442 int cpu;
394 443
@@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
404 return 1; 453 return 1;
405 454
406 cpu = get_cpu(); 455 cpu = get_cpu();
407 vt_list = &per_cpu(virt_cpu_timer, cpu); 456 vq = &per_cpu(virt_cpu_timer, cpu);
408 457
409 /* check if we run on the right CPU */ 458 /* check if we run on the right CPU */
410 BUG_ON(timer->cpu != cpu); 459 BUG_ON(timer->cpu != cpu);
411 460
412 /* disable interrupts before test if timer is pending */ 461 /* disable interrupts before test if timer is pending */
413 spin_lock_irqsave(&vt_list->lock, flags); 462 spin_lock_irqsave(&vq->lock, flags);
414 463
415 /* if timer isn't pending add it on the current CPU */ 464 /* if timer isn't pending add it on the current CPU */
416 if (!vtimer_pending(timer)) { 465 if (!vtimer_pending(timer)) {
417 spin_unlock_irqrestore(&vt_list->lock, flags); 466 spin_unlock_irqrestore(&vq->lock, flags);
418 /* we do not activate an interval timer with mod_virt_timer */ 467 /* we do not activate an interval timer with mod_virt_timer */
419 timer->interval = 0; 468 timer->interval = 0;
420 timer->expires = expires; 469 timer->expires = expires;
@@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
431 timer->interval = expires; 480 timer->interval = expires;
432 481
433 /* the timer can't expire anymore so we can release the lock */ 482 /* the timer can't expire anymore so we can release the lock */
434 spin_unlock_irqrestore(&vt_list->lock, flags); 483 spin_unlock_irqrestore(&vq->lock, flags);
435 internal_add_vtimer(timer); 484 internal_add_vtimer(timer);
436 return 1; 485 return 1;
437} 486}
@@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer);
445int del_virt_timer(struct vtimer_list *timer) 494int del_virt_timer(struct vtimer_list *timer)
446{ 495{
447 unsigned long flags; 496 unsigned long flags;
448 struct vtimer_queue *vt_list; 497 struct vtimer_queue *vq;
449 498
450 /* check if timer is pending */ 499 /* check if timer is pending */
451 if (!vtimer_pending(timer)) 500 if (!vtimer_pending(timer))
452 return 0; 501 return 0;
453 502
454 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 503 vq = &per_cpu(virt_cpu_timer, timer->cpu);
455 spin_lock_irqsave(&vt_list->lock, flags); 504 spin_lock_irqsave(&vq->lock, flags);
456 505
457 /* we don't interrupt a running timer, just let it expire! */ 506 /* we don't interrupt a running timer, just let it expire! */
458 list_del_init(&timer->entry); 507 list_del_init(&timer->entry);
459 508
460 /* last timer removed */ 509 spin_unlock_irqrestore(&vq->lock, flags);
461 if (list_empty(&vt_list->list)) {
462 vt_list->to_expire = 0;
463 vt_list->offset = 0;
464 }
465
466 spin_unlock_irqrestore(&vt_list->lock, flags);
467 return 1; 510 return 1;
468} 511}
469EXPORT_SYMBOL(del_virt_timer); 512EXPORT_SYMBOL(del_virt_timer);
@@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer);
473 */ 516 */
474void init_cpu_vtimer(void) 517void init_cpu_vtimer(void)
475{ 518{
476 struct vtimer_queue *vt_list; 519 struct vtimer_queue *vq;
477 520
478 /* kick the virtual timer */ 521 /* kick the virtual timer */
479 S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
480 S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
481 asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
482 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); 522 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
523 asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
524
525 /* initialize per cpu vtimer structure */
526 vq = &__get_cpu_var(virt_cpu_timer);
527 INIT_LIST_HEAD(&vq->list);
528 spin_lock_init(&vq->lock);
483 529
484 /* enable cpu timer interrupts */ 530 /* enable cpu timer interrupts */
485 __ctl_set_bit(0,10); 531 __ctl_set_bit(0,10);
486
487 vt_list = &__get_cpu_var(virt_cpu_timer);
488 INIT_LIST_HEAD(&vt_list->list);
489 spin_lock_init(&vt_list->lock);
490 vt_list->to_expire = 0;
491 vt_list->offset = 0;
492 vt_list->idle = 0;
493
494} 532}
495 533
496void __init vtime_init(void) 534void __init vtime_init(void)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 65d75a6be0ba..14f240623497 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
132 *snap = state; 132 *snap = state;
133 133
134 /* Add the appropriate number of ticks of stolen time, 134 /* Add the appropriate number of ticks of stolen time,
135 including any left-overs from last time. Passing NULL to 135 including any left-overs from last time. */
136 account_steal_time accounts the time as stolen. */
137 stolen = runnable + offline + __get_cpu_var(residual_stolen); 136 stolen = runnable + offline + __get_cpu_var(residual_stolen);
138 137
139 if (stolen < 0) 138 if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
141 140
142 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
143 __get_cpu_var(residual_stolen) = stolen; 142 __get_cpu_var(residual_stolen) = stolen;
144 account_steal_time(NULL, ticks); 143 account_steal_ticks(ticks);
145 144
146 /* Add the appropriate number of ticks of blocked time, 145 /* Add the appropriate number of ticks of blocked time,
147 including any left-overs from last time. Passing idle to 146 including any left-overs from last time. */
148 account_steal_time accounts the time as idle/wait. */
149 blocked += __get_cpu_var(residual_blocked); 147 blocked += __get_cpu_var(residual_blocked);
150 148
151 if (blocked < 0) 149 if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
153 151
154 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
155 __get_cpu_var(residual_blocked) = blocked; 153 __get_cpu_var(residual_blocked) = blocked;
156 account_steal_time(idle_task(smp_processor_id()), ticks); 154 account_idle_ticks(ticks);
157} 155}
158 156
159/* 157/*
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 8a8df7552969..06b71823f399 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -632,8 +632,8 @@ do_IRQ (struct pt_regs *regs)
632 struct pt_regs *old_regs; 632 struct pt_regs *old_regs;
633 633
634 old_regs = set_irq_regs(regs); 634 old_regs = set_irq_regs(regs);
635 irq_enter();
636 s390_idle_check(); 635 s390_idle_check();
636 irq_enter();
637 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 637 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
638 /* Serve timer interrupts first. */ 638 /* Serve timer interrupts first. */
639 clock_comparator_work(); 639 clock_comparator_work();
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 834e9ee7e934..92b0417f8e12 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -18,6 +18,7 @@
18#include <asm/etr.h> 18#include <asm/etr.h>
19#include <asm/lowcore.h> 19#include <asm/lowcore.h>
20#include <asm/cio.h> 20#include <asm/cio.h>
21#include <asm/cpu.h>
21#include "s390mach.h" 22#include "s390mach.h"
22 23
23static struct semaphore m_sem; 24static struct semaphore m_sem;
@@ -369,6 +370,8 @@ s390_do_machine_check(struct pt_regs *regs)
369 370
370 lockdep_off(); 371 lockdep_off();
371 372
373 s390_idle_check();
374
372 mci = (struct mci *) &S390_lowcore.mcck_interruption_code; 375 mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
373 mcck = &__get_cpu_var(cpu_mcck); 376 mcck = &__get_cpu_var(cpu_mcck);
374 umode = user_mode(regs); 377 umode = user_mode(regs);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4ee4b3d2316f..570d20413119 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -79,10 +79,13 @@ static inline unsigned int kstat_irqs(unsigned int irq)
79} 79}
80 80
81extern unsigned long long task_delta_exec(struct task_struct *); 81extern unsigned long long task_delta_exec(struct task_struct *);
82extern void account_user_time(struct task_struct *, cputime_t); 82extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
83extern void account_user_time_scaled(struct task_struct *, cputime_t); 83extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
84extern void account_system_time(struct task_struct *, int, cputime_t); 84extern void account_steal_time(cputime_t);
85extern void account_system_time_scaled(struct task_struct *, cputime_t); 85extern void account_idle_time(cputime_t);
86extern void account_steal_time(struct task_struct *, cputime_t); 86
87extern void account_process_tick(struct task_struct *, int user);
88extern void account_steal_ticks(unsigned long ticks);
89extern void account_idle_ticks(unsigned long ticks);
87 90
88#endif /* _LINUX_KERNEL_STAT_H */ 91#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 158d53d07765..38a3f4b15394 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -284,7 +284,6 @@ long io_schedule_timeout(long timeout);
284 284
285extern void cpu_init (void); 285extern void cpu_init (void);
286extern void trap_init(void); 286extern void trap_init(void);
287extern void account_process_tick(struct task_struct *task, int user);
288extern void update_process_times(int user); 287extern void update_process_times(int user);
289extern void scheduler_tick(void); 288extern void scheduler_tick(void);
290 289
diff --git a/kernel/sched.c b/kernel/sched.c
index 27ba1d642f0f..930bf2e6d714 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4150,13 +4150,17 @@ unsigned long long task_delta_exec(struct task_struct *p)
4150 * Account user cpu time to a process. 4150 * Account user cpu time to a process.
4151 * @p: the process that the cpu time gets accounted to 4151 * @p: the process that the cpu time gets accounted to
4152 * @cputime: the cpu time spent in user space since the last update 4152 * @cputime: the cpu time spent in user space since the last update
4153 * @cputime_scaled: cputime scaled by cpu frequency
4153 */ 4154 */
4154void account_user_time(struct task_struct *p, cputime_t cputime) 4155void account_user_time(struct task_struct *p, cputime_t cputime,
4156 cputime_t cputime_scaled)
4155{ 4157{
4156 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 4158 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4157 cputime64_t tmp; 4159 cputime64_t tmp;
4158 4160
4161 /* Add user time to process. */
4159 p->utime = cputime_add(p->utime, cputime); 4162 p->utime = cputime_add(p->utime, cputime);
4163 p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
4160 account_group_user_time(p, cputime); 4164 account_group_user_time(p, cputime);
4161 4165
4162 /* Add user time to cpustat. */ 4166 /* Add user time to cpustat. */
@@ -4173,51 +4177,48 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
4173 * Account guest cpu time to a process. 4177 * Account guest cpu time to a process.
4174 * @p: the process that the cpu time gets accounted to 4178 * @p: the process that the cpu time gets accounted to
4175 * @cputime: the cpu time spent in virtual machine since the last update 4179 * @cputime: the cpu time spent in virtual machine since the last update
4180 * @cputime_scaled: cputime scaled by cpu frequency
4176 */ 4181 */
4177static void account_guest_time(struct task_struct *p, cputime_t cputime) 4182static void account_guest_time(struct task_struct *p, cputime_t cputime,
4183 cputime_t cputime_scaled)
4178{ 4184{
4179 cputime64_t tmp; 4185 cputime64_t tmp;
4180 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 4186 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4181 4187
4182 tmp = cputime_to_cputime64(cputime); 4188 tmp = cputime_to_cputime64(cputime);
4183 4189
4190 /* Add guest time to process. */
4184 p->utime = cputime_add(p->utime, cputime); 4191 p->utime = cputime_add(p->utime, cputime);
4192 p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
4185 account_group_user_time(p, cputime); 4193 account_group_user_time(p, cputime);
4186 p->gtime = cputime_add(p->gtime, cputime); 4194 p->gtime = cputime_add(p->gtime, cputime);
4187 4195
4196 /* Add guest time to cpustat. */
4188 cpustat->user = cputime64_add(cpustat->user, tmp); 4197 cpustat->user = cputime64_add(cpustat->user, tmp);
4189 cpustat->guest = cputime64_add(cpustat->guest, tmp); 4198 cpustat->guest = cputime64_add(cpustat->guest, tmp);
4190} 4199}
4191 4200
4192/* 4201/*
4193 * Account scaled user cpu time to a process.
4194 * @p: the process that the cpu time gets accounted to
4195 * @cputime: the cpu time spent in user space since the last update
4196 */
4197void account_user_time_scaled(struct task_struct *p, cputime_t cputime)
4198{
4199 p->utimescaled = cputime_add(p->utimescaled, cputime);
4200}
4201
4202/*
4203 * Account system cpu time to a process. 4202 * Account system cpu time to a process.
4204 * @p: the process that the cpu time gets accounted to 4203 * @p: the process that the cpu time gets accounted to
4205 * @hardirq_offset: the offset to subtract from hardirq_count() 4204 * @hardirq_offset: the offset to subtract from hardirq_count()
4206 * @cputime: the cpu time spent in kernel space since the last update 4205 * @cputime: the cpu time spent in kernel space since the last update
4206 * @cputime_scaled: cputime scaled by cpu frequency
4207 */ 4207 */
4208void account_system_time(struct task_struct *p, int hardirq_offset, 4208void account_system_time(struct task_struct *p, int hardirq_offset,
4209 cputime_t cputime) 4209 cputime_t cputime, cputime_t cputime_scaled)
4210{ 4210{
4211 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 4211 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4212 struct rq *rq = this_rq();
4213 cputime64_t tmp; 4212 cputime64_t tmp;
4214 4213
4215 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 4214 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
4216 account_guest_time(p, cputime); 4215 account_guest_time(p, cputime, cputime_scaled);
4217 return; 4216 return;
4218 } 4217 }
4219 4218
4219 /* Add system time to process. */
4220 p->stime = cputime_add(p->stime, cputime); 4220 p->stime = cputime_add(p->stime, cputime);
4221 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
4221 account_group_system_time(p, cputime); 4222 account_group_system_time(p, cputime);
4222 4223
4223 /* Add system time to cpustat. */ 4224 /* Add system time to cpustat. */
@@ -4226,48 +4227,84 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4226 cpustat->irq = cputime64_add(cpustat->irq, tmp); 4227 cpustat->irq = cputime64_add(cpustat->irq, tmp);
4227 else if (softirq_count()) 4228 else if (softirq_count())
4228 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 4229 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
4229 else if (p != rq->idle)
4230 cpustat->system = cputime64_add(cpustat->system, tmp);
4231 else if (atomic_read(&rq->nr_iowait) > 0)
4232 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4233 else 4230 else
4234 cpustat->idle = cputime64_add(cpustat->idle, tmp); 4231 cpustat->system = cputime64_add(cpustat->system, tmp);
4232
4235 /* Account for system time used */ 4233 /* Account for system time used */
4236 acct_update_integrals(p); 4234 acct_update_integrals(p);
4237} 4235}
4238 4236
4239/* 4237/*
4240 * Account scaled system cpu time to a process. 4238 * Account for involuntary wait time.
4241 * @p: the process that the cpu time gets accounted to 4239 * @steal: the cpu time spent in involuntary wait
4242 * @hardirq_offset: the offset to subtract from hardirq_count()
4243 * @cputime: the cpu time spent in kernel space since the last update
4244 */ 4240 */
4245void account_system_time_scaled(struct task_struct *p, cputime_t cputime) 4241void account_steal_time(cputime_t cputime)
4246{ 4242{
4247 p->stimescaled = cputime_add(p->stimescaled, cputime); 4243 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4244 cputime64_t cputime64 = cputime_to_cputime64(cputime);
4245
4246 cpustat->steal = cputime64_add(cpustat->steal, cputime64);
4248} 4247}
4249 4248
4250/* 4249/*
4251 * Account for involuntary wait time. 4250 * Account for idle time.
4252 * @p: the process from which the cpu time has been stolen 4251 * @cputime: the cpu time spent in idle wait
4253 * @steal: the cpu time spent in involuntary wait
4254 */ 4252 */
4255void account_steal_time(struct task_struct *p, cputime_t steal) 4253void account_idle_time(cputime_t cputime)
4256{ 4254{
4257 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 4255 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4258 cputime64_t tmp = cputime_to_cputime64(steal); 4256 cputime64_t cputime64 = cputime_to_cputime64(cputime);
4259 struct rq *rq = this_rq(); 4257 struct rq *rq = this_rq();
4260 4258
4261 if (p == rq->idle) { 4259 if (atomic_read(&rq->nr_iowait) > 0)
4262 p->stime = cputime_add(p->stime, steal); 4260 cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
4263 if (atomic_read(&rq->nr_iowait) > 0) 4261 else
4264 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 4262 cpustat->idle = cputime64_add(cpustat->idle, cputime64);
4265 else
4266 cpustat->idle = cputime64_add(cpustat->idle, tmp);
4267 } else
4268 cpustat->steal = cputime64_add(cpustat->steal, tmp);
4269} 4263}
4270 4264
4265#ifndef CONFIG_VIRT_CPU_ACCOUNTING
4266
4267/*
4268 * Account a single tick of cpu time.
4269 * @p: the process that the cpu time gets accounted to
4270 * @user_tick: indicates if the tick is a user or a system tick
4271 */
4272void account_process_tick(struct task_struct *p, int user_tick)
4273{
4274 cputime_t one_jiffy = jiffies_to_cputime(1);
4275 cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
4276 struct rq *rq = this_rq();
4277
4278 if (user_tick)
4279 account_user_time(p, one_jiffy, one_jiffy_scaled);
4280 else if (p != rq->idle)
4281 account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
4282 one_jiffy_scaled);
4283 else
4284 account_idle_time(one_jiffy);
4285}
4286
4287/*
4288 * Account multiple ticks of steal time.
4289 * @p: the process from which the cpu time has been stolen
4290 * @ticks: number of stolen ticks
4291 */
4292void account_steal_ticks(unsigned long ticks)
4293{
4294 account_steal_time(jiffies_to_cputime(ticks));
4295}
4296
4297/*
4298 * Account multiple ticks of idle time.
4299 * @ticks: number of stolen ticks
4300 */
4301void account_idle_ticks(unsigned long ticks)
4302{
4303 account_idle_time(jiffies_to_cputime(ticks));
4304}
4305
4306#endif
4307
4271/* 4308/*
4272 * Use precise platform statistics if available: 4309 * Use precise platform statistics if available:
4273 */ 4310 */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 76a574bbef97..1b6c05bd0d0a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -419,7 +419,9 @@ void tick_nohz_restart_sched_tick(void)
419{ 419{
420 int cpu = smp_processor_id(); 420 int cpu = smp_processor_id();
421 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 421 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
422#ifndef CONFIG_VIRT_CPU_ACCOUNTING
422 unsigned long ticks; 423 unsigned long ticks;
424#endif
423 ktime_t now; 425 ktime_t now;
424 426
425 local_irq_disable(); 427 local_irq_disable();
@@ -441,6 +443,7 @@ void tick_nohz_restart_sched_tick(void)
441 tick_do_update_jiffies64(now); 443 tick_do_update_jiffies64(now);
442 cpumask_clear_cpu(cpu, nohz_cpu_mask); 444 cpumask_clear_cpu(cpu, nohz_cpu_mask);
443 445
446#ifndef CONFIG_VIRT_CPU_ACCOUNTING
444 /* 447 /*
445 * We stopped the tick in idle. Update process times would miss the 448 * We stopped the tick in idle. Update process times would miss the
446 * time we slept as update_process_times does only a 1 tick 449 * time we slept as update_process_times does only a 1 tick
@@ -450,12 +453,9 @@ void tick_nohz_restart_sched_tick(void)
450 /* 453 /*
451 * We might be one off. Do not randomly account a huge number of ticks! 454 * We might be one off. Do not randomly account a huge number of ticks!
452 */ 455 */
453 if (ticks && ticks < LONG_MAX) { 456 if (ticks && ticks < LONG_MAX)
454 add_preempt_count(HARDIRQ_OFFSET); 457 account_idle_ticks(ticks);
455 account_system_time(current, HARDIRQ_OFFSET, 458#endif
456 jiffies_to_cputime(ticks));
457 sub_preempt_count(HARDIRQ_OFFSET);
458 }
459 459
460 touch_softlockup_watchdog(); 460 touch_softlockup_watchdog();
461 /* 461 /*
diff --git a/kernel/timer.c b/kernel/timer.c
index 566257d1dc10..dee3f641a7a7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1018,21 +1018,6 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1018} 1018}
1019#endif 1019#endif
1020 1020
1021#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1022void account_process_tick(struct task_struct *p, int user_tick)
1023{
1024 cputime_t one_jiffy = jiffies_to_cputime(1);
1025
1026 if (user_tick) {
1027 account_user_time(p, one_jiffy);
1028 account_user_time_scaled(p, cputime_to_scaled(one_jiffy));
1029 } else {
1030 account_system_time(p, HARDIRQ_OFFSET, one_jiffy);
1031 account_system_time_scaled(p, cputime_to_scaled(one_jiffy));
1032 }
1033}
1034#endif
1035
1036/* 1021/*
1037 * Called from the timer interrupt handler to charge one tick to the current 1022 * Called from the timer interrupt handler to charge one tick to the current
1038 * process. user_tick is 1 if the tick is user time, 0 for system. 1023 * process. user_tick is 1 if the tick is user time, 0 for system.