diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 15:35:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 15:35:15 -0400 |
commit | 7f2444d38f6bbfa12bc15e2533d8f9daa85ca02b (patch) | |
tree | 6506ec79036890edfd9797b001391a350b5ac10f /kernel/time/posix-cpu-timers.c | |
parent | c5f12fdb8bd873aa3ffdb79512e6bdac92b257b0 (diff) | |
parent | 77b4b5420422fc037d00b8f3f0e89b2262e4ae29 (diff) |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core timer updates from Thomas Gleixner:
"Timers and timekeeping updates:
- A large overhaul of the posix CPU timer code which is a preparation
for moving the CPU timer expiry out into task work so it can be
properly accounted on the task/process.
An update to the bogus permission checks will come later during the
merge window as feedback was not complete before heading of for
travel.
- Switch the timerqueue code to use cached rbtrees and get rid of the
homebrewn caching of the leftmost node.
- Consolidate hrtimer_init() + hrtimer_init_sleeper() calls into a
single function
- Implement the separation of hrtimers to be forced to expire in hard
interrupt context even when PREEMPT_RT is enabled and mark the
affected timers accordingly.
- Implement a mechanism for hrtimers and the timer wheel to protect
RT against priority inversion and live lock issues when a (hr)timer
which should be canceled is currently executing the callback.
Instead of infinitely spinning, the task which tries to cancel the
timer blocks on a per cpu base expiry lock which is held and
released by the (hr)timer expiry code.
- Enable the Hyper-V TSC page based sched_clock for Hyper-V guests
resulting in faster access to timekeeping functions.
- Updates to various clocksource/clockevent drivers and their device
tree bindings.
- The usual small improvements all over the place"
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (101 commits)
posix-cpu-timers: Fix permission check regression
posix-cpu-timers: Always clear head pointer on dequeue
hrtimer: Add a missing bracket and hide `migration_base' on !SMP
posix-cpu-timers: Make expiry_active check actually work correctly
posix-timers: Unbreak CONFIG_POSIX_TIMERS=n build
tick: Mark sched_timer to expire in hard interrupt context
hrtimer: Add kernel doc annotation for HRTIMER_MODE_HARD
x86/hyperv: Hide pv_ops access for CONFIG_PARAVIRT=n
posix-cpu-timers: Utilize timerqueue for storage
posix-cpu-timers: Move state tracking to struct posix_cputimers
posix-cpu-timers: Deduplicate rlimit handling
posix-cpu-timers: Remove pointless comparisons
posix-cpu-timers: Get rid of 64bit divisions
posix-cpu-timers: Consolidate timer expiry further
posix-cpu-timers: Get rid of zero checks
rlimit: Rewrite non-sensical RLIMIT_CPU comment
posix-cpu-timers: Respect INFINITY for hard RTTIME limit
posix-cpu-timers: Switch thread group sampling to array
posix-cpu-timers: Restructure expiry array
posix-cpu-timers: Remove cputime_expires
...
Diffstat (limited to 'kernel/time/posix-cpu-timers.c')
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 1010 |
1 files changed, 492 insertions, 518 deletions
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0a426f4e3125..92a431981b1c 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -20,11 +20,20 @@ | |||
20 | 20 | ||
21 | static void posix_cpu_timer_rearm(struct k_itimer *timer); | 21 | static void posix_cpu_timer_rearm(struct k_itimer *timer); |
22 | 22 | ||
23 | void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) | ||
24 | { | ||
25 | posix_cputimers_init(pct); | ||
26 | if (cpu_limit != RLIM_INFINITY) { | ||
27 | pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC; | ||
28 | pct->timers_active = true; | ||
29 | } | ||
30 | } | ||
31 | |||
23 | /* | 32 | /* |
24 | * Called after updating RLIMIT_CPU to run cpu timer and update | 33 | * Called after updating RLIMIT_CPU to run cpu timer and update |
25 | * tsk->signal->cputime_expires expiration cache if necessary. Needs | 34 | * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if |
26 | * siglock protection since other code may update expiration cache as | 35 | * necessary. Needs siglock protection since other code may update the |
27 | * well. | 36 | * expiration cache as well. |
28 | */ | 37 | */ |
29 | void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) | 38 | void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) |
30 | { | 39 | { |
@@ -35,46 +44,97 @@ void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) | |||
35 | spin_unlock_irq(&task->sighand->siglock); | 44 | spin_unlock_irq(&task->sighand->siglock); |
36 | } | 45 | } |
37 | 46 | ||
38 | static int check_clock(const clockid_t which_clock) | 47 | /* |
48 | * Functions for validating access to tasks. | ||
49 | */ | ||
50 | static struct task_struct *lookup_task(const pid_t pid, bool thread, | ||
51 | bool gettime) | ||
39 | { | 52 | { |
40 | int error = 0; | ||
41 | struct task_struct *p; | 53 | struct task_struct *p; |
42 | const pid_t pid = CPUCLOCK_PID(which_clock); | ||
43 | |||
44 | if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX) | ||
45 | return -EINVAL; | ||
46 | 54 | ||
47 | if (pid == 0) | 55 | /* |
48 | return 0; | 56 | * If the encoded PID is 0, then the timer is targeted at current |
57 | * or the process to which current belongs. | ||
58 | */ | ||
59 | if (!pid) | ||
60 | return thread ? current : current->group_leader; | ||
49 | 61 | ||
50 | rcu_read_lock(); | ||
51 | p = find_task_by_vpid(pid); | 62 | p = find_task_by_vpid(pid); |
52 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? | 63 | if (!p) |
53 | same_thread_group(p, current) : has_group_leader_pid(p))) { | 64 | return p; |
54 | error = -EINVAL; | 65 | |
66 | if (thread) | ||
67 | return same_thread_group(p, current) ? p : NULL; | ||
68 | |||
69 | if (gettime) { | ||
70 | /* | ||
71 | * For clock_gettime(PROCESS) the task does not need to be | ||
72 | * the actual group leader. tsk->sighand gives | ||
73 | * access to the group's clock. | ||
74 | * | ||
75 | * Timers need the group leader because they take a | ||
76 | * reference on it and store the task pointer until the | ||
77 | * timer is destroyed. | ||
78 | */ | ||
79 | return (p == current || thread_group_leader(p)) ? p : NULL; | ||
55 | } | 80 | } |
81 | |||
82 | /* | ||
83 | * For processes require that p is group leader. | ||
84 | */ | ||
85 | return has_group_leader_pid(p) ? p : NULL; | ||
86 | } | ||
87 | |||
88 | static struct task_struct *__get_task_for_clock(const clockid_t clock, | ||
89 | bool getref, bool gettime) | ||
90 | { | ||
91 | const bool thread = !!CPUCLOCK_PERTHREAD(clock); | ||
92 | const pid_t pid = CPUCLOCK_PID(clock); | ||
93 | struct task_struct *p; | ||
94 | |||
95 | if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX) | ||
96 | return NULL; | ||
97 | |||
98 | rcu_read_lock(); | ||
99 | p = lookup_task(pid, thread, gettime); | ||
100 | if (p && getref) | ||
101 | get_task_struct(p); | ||
56 | rcu_read_unlock(); | 102 | rcu_read_unlock(); |
103 | return p; | ||
104 | } | ||
57 | 105 | ||
58 | return error; | 106 | static inline struct task_struct *get_task_for_clock(const clockid_t clock) |
107 | { | ||
108 | return __get_task_for_clock(clock, true, false); | ||
109 | } | ||
110 | |||
111 | static inline struct task_struct *get_task_for_clock_get(const clockid_t clock) | ||
112 | { | ||
113 | return __get_task_for_clock(clock, true, true); | ||
114 | } | ||
115 | |||
116 | static inline int validate_clock_permissions(const clockid_t clock) | ||
117 | { | ||
118 | return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL; | ||
59 | } | 119 | } |
60 | 120 | ||
61 | /* | 121 | /* |
62 | * Update expiry time from increment, and increase overrun count, | 122 | * Update expiry time from increment, and increase overrun count, |
63 | * given the current clock sample. | 123 | * given the current clock sample. |
64 | */ | 124 | */ |
65 | static void bump_cpu_timer(struct k_itimer *timer, u64 now) | 125 | static u64 bump_cpu_timer(struct k_itimer *timer, u64 now) |
66 | { | 126 | { |
127 | u64 delta, incr, expires = timer->it.cpu.node.expires; | ||
67 | int i; | 128 | int i; |
68 | u64 delta, incr; | ||
69 | 129 | ||
70 | if (!timer->it_interval) | 130 | if (!timer->it_interval) |
71 | return; | 131 | return expires; |
72 | 132 | ||
73 | if (now < timer->it.cpu.expires) | 133 | if (now < expires) |
74 | return; | 134 | return expires; |
75 | 135 | ||
76 | incr = timer->it_interval; | 136 | incr = timer->it_interval; |
77 | delta = now + incr - timer->it.cpu.expires; | 137 | delta = now + incr - expires; |
78 | 138 | ||
79 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | 139 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ |
80 | for (i = 0; incr < delta - incr; i++) | 140 | for (i = 0; incr < delta - incr; i++) |
@@ -84,48 +144,26 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now) | |||
84 | if (delta < incr) | 144 | if (delta < incr) |
85 | continue; | 145 | continue; |
86 | 146 | ||
87 | timer->it.cpu.expires += incr; | 147 | timer->it.cpu.node.expires += incr; |
88 | timer->it_overrun += 1LL << i; | 148 | timer->it_overrun += 1LL << i; |
89 | delta -= incr; | 149 | delta -= incr; |
90 | } | 150 | } |
151 | return timer->it.cpu.node.expires; | ||
91 | } | 152 | } |
92 | 153 | ||
93 | /** | 154 | /* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */ |
94 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | 155 | static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct) |
95 | * | ||
96 | * @cputime: The struct to compare. | ||
97 | * | ||
98 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
99 | * are zero, false if any field is nonzero. | ||
100 | */ | ||
101 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
102 | { | 156 | { |
103 | if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) | 157 | return !(~pct->bases[CPUCLOCK_PROF].nextevt | |
104 | return 1; | 158 | ~pct->bases[CPUCLOCK_VIRT].nextevt | |
105 | return 0; | 159 | ~pct->bases[CPUCLOCK_SCHED].nextevt); |
106 | } | ||
107 | |||
108 | static inline u64 prof_ticks(struct task_struct *p) | ||
109 | { | ||
110 | u64 utime, stime; | ||
111 | |||
112 | task_cputime(p, &utime, &stime); | ||
113 | |||
114 | return utime + stime; | ||
115 | } | ||
116 | static inline u64 virt_ticks(struct task_struct *p) | ||
117 | { | ||
118 | u64 utime, stime; | ||
119 | |||
120 | task_cputime(p, &utime, &stime); | ||
121 | |||
122 | return utime; | ||
123 | } | 160 | } |
124 | 161 | ||
125 | static int | 162 | static int |
126 | posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) | 163 | posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) |
127 | { | 164 | { |
128 | int error = check_clock(which_clock); | 165 | int error = validate_clock_permissions(which_clock); |
166 | |||
129 | if (!error) { | 167 | if (!error) { |
130 | tp->tv_sec = 0; | 168 | tp->tv_sec = 0; |
131 | tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); | 169 | tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); |
@@ -142,42 +180,66 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) | |||
142 | } | 180 | } |
143 | 181 | ||
144 | static int | 182 | static int |
145 | posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp) | 183 | posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp) |
146 | { | 184 | { |
185 | int error = validate_clock_permissions(clock); | ||
186 | |||
147 | /* | 187 | /* |
148 | * You can never reset a CPU clock, but we check for other errors | 188 | * You can never reset a CPU clock, but we check for other errors |
149 | * in the call before failing with EPERM. | 189 | * in the call before failing with EPERM. |
150 | */ | 190 | */ |
151 | int error = check_clock(which_clock); | 191 | return error ? : -EPERM; |
152 | if (error == 0) { | ||
153 | error = -EPERM; | ||
154 | } | ||
155 | return error; | ||
156 | } | 192 | } |
157 | 193 | ||
158 | |||
159 | /* | 194 | /* |
160 | * Sample a per-thread clock for the given task. | 195 | * Sample a per-thread clock for the given task. clkid is validated. |
161 | */ | 196 | */ |
162 | static int cpu_clock_sample(const clockid_t which_clock, | 197 | static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p) |
163 | struct task_struct *p, u64 *sample) | ||
164 | { | 198 | { |
165 | switch (CPUCLOCK_WHICH(which_clock)) { | 199 | u64 utime, stime; |
166 | default: | 200 | |
167 | return -EINVAL; | 201 | if (clkid == CPUCLOCK_SCHED) |
202 | return task_sched_runtime(p); | ||
203 | |||
204 | task_cputime(p, &utime, &stime); | ||
205 | |||
206 | switch (clkid) { | ||
168 | case CPUCLOCK_PROF: | 207 | case CPUCLOCK_PROF: |
169 | *sample = prof_ticks(p); | 208 | return utime + stime; |
170 | break; | ||
171 | case CPUCLOCK_VIRT: | 209 | case CPUCLOCK_VIRT: |
172 | *sample = virt_ticks(p); | 210 | return utime; |
173 | break; | 211 | default: |
174 | case CPUCLOCK_SCHED: | 212 | WARN_ON_ONCE(1); |
175 | *sample = task_sched_runtime(p); | ||
176 | break; | ||
177 | } | 213 | } |
178 | return 0; | 214 | return 0; |
179 | } | 215 | } |
180 | 216 | ||
217 | static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime) | ||
218 | { | ||
219 | samples[CPUCLOCK_PROF] = stime + utime; | ||
220 | samples[CPUCLOCK_VIRT] = utime; | ||
221 | samples[CPUCLOCK_SCHED] = rtime; | ||
222 | } | ||
223 | |||
224 | static void task_sample_cputime(struct task_struct *p, u64 *samples) | ||
225 | { | ||
226 | u64 stime, utime; | ||
227 | |||
228 | task_cputime(p, &utime, &stime); | ||
229 | store_samples(samples, stime, utime, p->se.sum_exec_runtime); | ||
230 | } | ||
231 | |||
232 | static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, | ||
233 | u64 *samples) | ||
234 | { | ||
235 | u64 stime, utime, rtime; | ||
236 | |||
237 | utime = atomic64_read(&at->utime); | ||
238 | stime = atomic64_read(&at->stime); | ||
239 | rtime = atomic64_read(&at->sum_exec_runtime); | ||
240 | store_samples(samples, stime, utime, rtime); | ||
241 | } | ||
242 | |||
181 | /* | 243 | /* |
182 | * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg | 244 | * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg |
183 | * to avoid race conditions with concurrent updates to cputime. | 245 | * to avoid race conditions with concurrent updates to cputime. |
@@ -193,29 +255,56 @@ retry: | |||
193 | } | 255 | } |
194 | } | 256 | } |
195 | 257 | ||
196 | static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum) | 258 | static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, |
259 | struct task_cputime *sum) | ||
197 | { | 260 | { |
198 | __update_gt_cputime(&cputime_atomic->utime, sum->utime); | 261 | __update_gt_cputime(&cputime_atomic->utime, sum->utime); |
199 | __update_gt_cputime(&cputime_atomic->stime, sum->stime); | 262 | __update_gt_cputime(&cputime_atomic->stime, sum->stime); |
200 | __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); | 263 | __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); |
201 | } | 264 | } |
202 | 265 | ||
203 | /* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */ | 266 | /** |
204 | static inline void sample_cputime_atomic(struct task_cputime *times, | 267 | * thread_group_sample_cputime - Sample cputime for a given task |
205 | struct task_cputime_atomic *atomic_times) | 268 | * @tsk: Task for which cputime needs to be started |
269 | * @iimes: Storage for time samples | ||
270 | * | ||
271 | * Called from sys_getitimer() to calculate the expiry time of an active | ||
272 | * timer. That means group cputime accounting is already active. Called | ||
273 | * with task sighand lock held. | ||
274 | * | ||
275 | * Updates @times with an uptodate sample of the thread group cputimes. | ||
276 | */ | ||
277 | void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples) | ||
206 | { | 278 | { |
207 | times->utime = atomic64_read(&atomic_times->utime); | 279 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
208 | times->stime = atomic64_read(&atomic_times->stime); | 280 | struct posix_cputimers *pct = &tsk->signal->posix_cputimers; |
209 | times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime); | 281 | |
282 | WARN_ON_ONCE(!pct->timers_active); | ||
283 | |||
284 | proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); | ||
210 | } | 285 | } |
211 | 286 | ||
212 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | 287 | /** |
288 | * thread_group_start_cputime - Start cputime and return a sample | ||
289 | * @tsk: Task for which cputime needs to be started | ||
290 | * @samples: Storage for time samples | ||
291 | * | ||
292 | * The thread group cputime accouting is avoided when there are no posix | ||
293 | * CPU timers armed. Before starting a timer it's required to check whether | ||
294 | * the time accounting is active. If not, a full update of the atomic | ||
295 | * accounting store needs to be done and the accounting enabled. | ||
296 | * | ||
297 | * Updates @times with an uptodate sample of the thread group cputimes. | ||
298 | */ | ||
299 | static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples) | ||
213 | { | 300 | { |
214 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 301 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
215 | struct task_cputime sum; | 302 | struct posix_cputimers *pct = &tsk->signal->posix_cputimers; |
216 | 303 | ||
217 | /* Check if cputimer isn't running. This is accessed without locking. */ | 304 | /* Check if cputimer isn't running. This is accessed without locking. */ |
218 | if (!READ_ONCE(cputimer->running)) { | 305 | if (!READ_ONCE(pct->timers_active)) { |
306 | struct task_cputime sum; | ||
307 | |||
219 | /* | 308 | /* |
220 | * The POSIX timer interface allows for absolute time expiry | 309 | * The POSIX timer interface allows for absolute time expiry |
221 | * values through the TIMER_ABSTIME flag, therefore we have | 310 | * values through the TIMER_ABSTIME flag, therefore we have |
@@ -225,94 +314,69 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | |||
225 | update_gt_cputime(&cputimer->cputime_atomic, &sum); | 314 | update_gt_cputime(&cputimer->cputime_atomic, &sum); |
226 | 315 | ||
227 | /* | 316 | /* |
228 | * We're setting cputimer->running without a lock. Ensure | 317 | * We're setting timers_active without a lock. Ensure this |
229 | * this only gets written to in one operation. We set | 318 | * only gets written to in one operation. We set it after |
230 | * running after update_gt_cputime() as a small optimization, | 319 | * update_gt_cputime() as a small optimization, but |
231 | * but barriers are not required because update_gt_cputime() | 320 | * barriers are not required because update_gt_cputime() |
232 | * can handle concurrent updates. | 321 | * can handle concurrent updates. |
233 | */ | 322 | */ |
234 | WRITE_ONCE(cputimer->running, true); | 323 | WRITE_ONCE(pct->timers_active, true); |
235 | } | 324 | } |
236 | sample_cputime_atomic(times, &cputimer->cputime_atomic); | 325 | proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); |
237 | } | 326 | } |
238 | 327 | ||
239 | /* | 328 | static void __thread_group_cputime(struct task_struct *tsk, u64 *samples) |
240 | * Sample a process (thread group) clock for the given group_leader task. | ||
241 | * Must be called with task sighand lock held for safe while_each_thread() | ||
242 | * traversal. | ||
243 | */ | ||
244 | static int cpu_clock_sample_group(const clockid_t which_clock, | ||
245 | struct task_struct *p, | ||
246 | u64 *sample) | ||
247 | { | 329 | { |
248 | struct task_cputime cputime; | 330 | struct task_cputime ct; |
249 | 331 | ||
250 | switch (CPUCLOCK_WHICH(which_clock)) { | 332 | thread_group_cputime(tsk, &ct); |
251 | default: | 333 | store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime); |
252 | return -EINVAL; | ||
253 | case CPUCLOCK_PROF: | ||
254 | thread_group_cputime(p, &cputime); | ||
255 | *sample = cputime.utime + cputime.stime; | ||
256 | break; | ||
257 | case CPUCLOCK_VIRT: | ||
258 | thread_group_cputime(p, &cputime); | ||
259 | *sample = cputime.utime; | ||
260 | break; | ||
261 | case CPUCLOCK_SCHED: | ||
262 | thread_group_cputime(p, &cputime); | ||
263 | *sample = cputime.sum_exec_runtime; | ||
264 | break; | ||
265 | } | ||
266 | return 0; | ||
267 | } | 334 | } |
268 | 335 | ||
269 | static int posix_cpu_clock_get_task(struct task_struct *tsk, | 336 | /* |
270 | const clockid_t which_clock, | 337 | * Sample a process (thread group) clock for the given task clkid. If the |
271 | struct timespec64 *tp) | 338 | * group's cputime accounting is already enabled, read the atomic |
339 | * store. Otherwise a full update is required. Task's sighand lock must be | ||
340 | * held to protect the task traversal on a full update. clkid is already | ||
341 | * validated. | ||
342 | */ | ||
343 | static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p, | ||
344 | bool start) | ||
272 | { | 345 | { |
273 | int err = -EINVAL; | 346 | struct thread_group_cputimer *cputimer = &p->signal->cputimer; |
274 | u64 rtn; | 347 | struct posix_cputimers *pct = &p->signal->posix_cputimers; |
348 | u64 samples[CPUCLOCK_MAX]; | ||
275 | 349 | ||
276 | if (CPUCLOCK_PERTHREAD(which_clock)) { | 350 | if (!READ_ONCE(pct->timers_active)) { |
277 | if (same_thread_group(tsk, current)) | 351 | if (start) |
278 | err = cpu_clock_sample(which_clock, tsk, &rtn); | 352 | thread_group_start_cputime(p, samples); |
353 | else | ||
354 | __thread_group_cputime(p, samples); | ||
279 | } else { | 355 | } else { |
280 | if (tsk == current || thread_group_leader(tsk)) | 356 | proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); |
281 | err = cpu_clock_sample_group(which_clock, tsk, &rtn); | ||
282 | } | 357 | } |
283 | 358 | ||
284 | if (!err) | 359 | return samples[clkid]; |
285 | *tp = ns_to_timespec64(rtn); | ||
286 | |||
287 | return err; | ||
288 | } | 360 | } |
289 | 361 | ||
290 | 362 | static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp) | |
291 | static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp) | ||
292 | { | 363 | { |
293 | const pid_t pid = CPUCLOCK_PID(which_clock); | 364 | const clockid_t clkid = CPUCLOCK_WHICH(clock); |
294 | int err = -EINVAL; | 365 | struct task_struct *tsk; |
366 | u64 t; | ||
295 | 367 | ||
296 | if (pid == 0) { | 368 | tsk = get_task_for_clock_get(clock); |
297 | /* | 369 | if (!tsk) |
298 | * Special case constant value for our own clocks. | 370 | return -EINVAL; |
299 | * We don't have to do any lookup to find ourselves. | ||
300 | */ | ||
301 | err = posix_cpu_clock_get_task(current, which_clock, tp); | ||
302 | } else { | ||
303 | /* | ||
304 | * Find the given PID, and validate that the caller | ||
305 | * should be able to see it. | ||
306 | */ | ||
307 | struct task_struct *p; | ||
308 | rcu_read_lock(); | ||
309 | p = find_task_by_vpid(pid); | ||
310 | if (p) | ||
311 | err = posix_cpu_clock_get_task(p, which_clock, tp); | ||
312 | rcu_read_unlock(); | ||
313 | } | ||
314 | 371 | ||
315 | return err; | 372 | if (CPUCLOCK_PERTHREAD(clock)) |
373 | t = cpu_clock_sample(clkid, tsk); | ||
374 | else | ||
375 | t = cpu_clock_sample_group(clkid, tsk, false); | ||
376 | put_task_struct(tsk); | ||
377 | |||
378 | *tp = ns_to_timespec64(t); | ||
379 | return 0; | ||
316 | } | 380 | } |
317 | 381 | ||
318 | /* | 382 | /* |
@@ -322,44 +386,15 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *t | |||
322 | */ | 386 | */ |
323 | static int posix_cpu_timer_create(struct k_itimer *new_timer) | 387 | static int posix_cpu_timer_create(struct k_itimer *new_timer) |
324 | { | 388 | { |
325 | int ret = 0; | 389 | struct task_struct *p = get_task_for_clock(new_timer->it_clock); |
326 | const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); | ||
327 | struct task_struct *p; | ||
328 | 390 | ||
329 | if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX) | 391 | if (!p) |
330 | return -EINVAL; | 392 | return -EINVAL; |
331 | 393 | ||
332 | new_timer->kclock = &clock_posix_cpu; | 394 | new_timer->kclock = &clock_posix_cpu; |
333 | 395 | timerqueue_init(&new_timer->it.cpu.node); | |
334 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); | ||
335 | |||
336 | rcu_read_lock(); | ||
337 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { | ||
338 | if (pid == 0) { | ||
339 | p = current; | ||
340 | } else { | ||
341 | p = find_task_by_vpid(pid); | ||
342 | if (p && !same_thread_group(p, current)) | ||
343 | p = NULL; | ||
344 | } | ||
345 | } else { | ||
346 | if (pid == 0) { | ||
347 | p = current->group_leader; | ||
348 | } else { | ||
349 | p = find_task_by_vpid(pid); | ||
350 | if (p && !has_group_leader_pid(p)) | ||
351 | p = NULL; | ||
352 | } | ||
353 | } | ||
354 | new_timer->it.cpu.task = p; | 396 | new_timer->it.cpu.task = p; |
355 | if (p) { | 397 | return 0; |
356 | get_task_struct(p); | ||
357 | } else { | ||
358 | ret = -EINVAL; | ||
359 | } | ||
360 | rcu_read_unlock(); | ||
361 | |||
362 | return ret; | ||
363 | } | 398 | } |
364 | 399 | ||
365 | /* | 400 | /* |
@@ -370,12 +405,14 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
370 | */ | 405 | */ |
371 | static int posix_cpu_timer_del(struct k_itimer *timer) | 406 | static int posix_cpu_timer_del(struct k_itimer *timer) |
372 | { | 407 | { |
373 | int ret = 0; | 408 | struct cpu_timer *ctmr = &timer->it.cpu; |
374 | unsigned long flags; | 409 | struct task_struct *p = ctmr->task; |
375 | struct sighand_struct *sighand; | 410 | struct sighand_struct *sighand; |
376 | struct task_struct *p = timer->it.cpu.task; | 411 | unsigned long flags; |
412 | int ret = 0; | ||
377 | 413 | ||
378 | WARN_ON_ONCE(p == NULL); | 414 | if (WARN_ON_ONCE(!p)) |
415 | return -EINVAL; | ||
379 | 416 | ||
380 | /* | 417 | /* |
381 | * Protect against sighand release/switch in exit/exec and process/ | 418 | * Protect against sighand release/switch in exit/exec and process/ |
@@ -384,15 +421,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) | |||
384 | sighand = lock_task_sighand(p, &flags); | 421 | sighand = lock_task_sighand(p, &flags); |
385 | if (unlikely(sighand == NULL)) { | 422 | if (unlikely(sighand == NULL)) { |
386 | /* | 423 | /* |
387 | * We raced with the reaping of the task. | 424 | * This raced with the reaping of the task. The exit cleanup |
388 | * The deletion should have cleared us off the list. | 425 | * should have removed this timer from the timer queue. |
389 | */ | 426 | */ |
390 | WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry)); | 427 | WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node)); |
391 | } else { | 428 | } else { |
392 | if (timer->it.cpu.firing) | 429 | if (timer->it.cpu.firing) |
393 | ret = TIMER_RETRY; | 430 | ret = TIMER_RETRY; |
394 | else | 431 | else |
395 | list_del(&timer->it.cpu.entry); | 432 | cpu_timer_dequeue(ctmr); |
396 | 433 | ||
397 | unlock_task_sighand(p, &flags); | 434 | unlock_task_sighand(p, &flags); |
398 | } | 435 | } |
@@ -403,25 +440,30 @@ static int posix_cpu_timer_del(struct k_itimer *timer) | |||
403 | return ret; | 440 | return ret; |
404 | } | 441 | } |
405 | 442 | ||
406 | static void cleanup_timers_list(struct list_head *head) | 443 | static void cleanup_timerqueue(struct timerqueue_head *head) |
407 | { | 444 | { |
408 | struct cpu_timer_list *timer, *next; | 445 | struct timerqueue_node *node; |
446 | struct cpu_timer *ctmr; | ||
409 | 447 | ||
410 | list_for_each_entry_safe(timer, next, head, entry) | 448 | while ((node = timerqueue_getnext(head))) { |
411 | list_del_init(&timer->entry); | 449 | timerqueue_del(head, node); |
450 | ctmr = container_of(node, struct cpu_timer, node); | ||
451 | ctmr->head = NULL; | ||
452 | } | ||
412 | } | 453 | } |
413 | 454 | ||
414 | /* | 455 | /* |
415 | * Clean out CPU timers still ticking when a thread exited. The task | 456 | * Clean out CPU timers which are still armed when a thread exits. The |
416 | * pointer is cleared, and the expiry time is replaced with the residual | 457 | * timers are only removed from the list. No other updates are done. The |
417 | * time for later timer_gettime calls to return. | 458 | * corresponding posix timers are still accessible, but cannot be rearmed. |
459 | * | ||
418 | * This must be called with the siglock held. | 460 | * This must be called with the siglock held. |
419 | */ | 461 | */ |
420 | static void cleanup_timers(struct list_head *head) | 462 | static void cleanup_timers(struct posix_cputimers *pct) |
421 | { | 463 | { |
422 | cleanup_timers_list(head); | 464 | cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead); |
423 | cleanup_timers_list(++head); | 465 | cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead); |
424 | cleanup_timers_list(++head); | 466 | cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead); |
425 | } | 467 | } |
426 | 468 | ||
427 | /* | 469 | /* |
@@ -431,16 +473,11 @@ static void cleanup_timers(struct list_head *head) | |||
431 | */ | 473 | */ |
432 | void posix_cpu_timers_exit(struct task_struct *tsk) | 474 | void posix_cpu_timers_exit(struct task_struct *tsk) |
433 | { | 475 | { |
434 | cleanup_timers(tsk->cpu_timers); | 476 | cleanup_timers(&tsk->posix_cputimers); |
435 | } | 477 | } |
436 | void posix_cpu_timers_exit_group(struct task_struct *tsk) | 478 | void posix_cpu_timers_exit_group(struct task_struct *tsk) |
437 | { | 479 | { |
438 | cleanup_timers(tsk->signal->cpu_timers); | 480 | cleanup_timers(&tsk->signal->posix_cputimers); |
439 | } | ||
440 | |||
441 | static inline int expires_gt(u64 expires, u64 new_exp) | ||
442 | { | ||
443 | return expires == 0 || expires > new_exp; | ||
444 | } | 481 | } |
445 | 482 | ||
446 | /* | 483 | /* |
@@ -449,58 +486,33 @@ static inline int expires_gt(u64 expires, u64 new_exp) | |||
449 | */ | 486 | */ |
450 | static void arm_timer(struct k_itimer *timer) | 487 | static void arm_timer(struct k_itimer *timer) |
451 | { | 488 | { |
452 | struct task_struct *p = timer->it.cpu.task; | 489 | int clkidx = CPUCLOCK_WHICH(timer->it_clock); |
453 | struct list_head *head, *listpos; | 490 | struct cpu_timer *ctmr = &timer->it.cpu; |
454 | struct task_cputime *cputime_expires; | 491 | u64 newexp = cpu_timer_getexpires(ctmr); |
455 | struct cpu_timer_list *const nt = &timer->it.cpu; | 492 | struct task_struct *p = ctmr->task; |
456 | struct cpu_timer_list *next; | 493 | struct posix_cputimer_base *base; |
457 | 494 | ||
458 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 495 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) |
459 | head = p->cpu_timers; | 496 | base = p->posix_cputimers.bases + clkidx; |
460 | cputime_expires = &p->cputime_expires; | 497 | else |
461 | } else { | 498 | base = p->signal->posix_cputimers.bases + clkidx; |
462 | head = p->signal->cpu_timers; | 499 | |
463 | cputime_expires = &p->signal->cputime_expires; | 500 | if (!cpu_timer_enqueue(&base->tqhead, ctmr)) |
464 | } | 501 | return; |
465 | head += CPUCLOCK_WHICH(timer->it_clock); | ||
466 | |||
467 | listpos = head; | ||
468 | list_for_each_entry(next, head, entry) { | ||
469 | if (nt->expires < next->expires) | ||
470 | break; | ||
471 | listpos = &next->entry; | ||
472 | } | ||
473 | list_add(&nt->entry, listpos); | ||
474 | |||
475 | if (listpos == head) { | ||
476 | u64 exp = nt->expires; | ||
477 | 502 | ||
478 | /* | 503 | /* |
479 | * We are the new earliest-expiring POSIX 1.b timer, hence | 504 | * We are the new earliest-expiring POSIX 1.b timer, hence |
480 | * need to update expiration cache. Take into account that | 505 | * need to update expiration cache. Take into account that |
481 | * for process timers we share expiration cache with itimers | 506 | * for process timers we share expiration cache with itimers |
482 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. | 507 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. |
483 | */ | 508 | */ |
509 | if (newexp < base->nextevt) | ||
510 | base->nextevt = newexp; | ||
484 | 511 | ||
485 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 512 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) |
486 | case CPUCLOCK_PROF: | 513 | tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); |
487 | if (expires_gt(cputime_expires->prof_exp, exp)) | 514 | else |
488 | cputime_expires->prof_exp = exp; | 515 | tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); |
489 | break; | ||
490 | case CPUCLOCK_VIRT: | ||
491 | if (expires_gt(cputime_expires->virt_exp, exp)) | ||
492 | cputime_expires->virt_exp = exp; | ||
493 | break; | ||
494 | case CPUCLOCK_SCHED: | ||
495 | if (expires_gt(cputime_expires->sched_exp, exp)) | ||
496 | cputime_expires->sched_exp = exp; | ||
497 | break; | ||
498 | } | ||
499 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) | ||
500 | tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); | ||
501 | else | ||
502 | tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); | ||
503 | } | ||
504 | } | 516 | } |
505 | 517 | ||
506 | /* | 518 | /* |
@@ -508,24 +520,26 @@ static void arm_timer(struct k_itimer *timer) | |||
508 | */ | 520 | */ |
509 | static void cpu_timer_fire(struct k_itimer *timer) | 521 | static void cpu_timer_fire(struct k_itimer *timer) |
510 | { | 522 | { |
523 | struct cpu_timer *ctmr = &timer->it.cpu; | ||
524 | |||
511 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | 525 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { |
512 | /* | 526 | /* |
513 | * User don't want any signal. | 527 | * User don't want any signal. |
514 | */ | 528 | */ |
515 | timer->it.cpu.expires = 0; | 529 | cpu_timer_setexpires(ctmr, 0); |
516 | } else if (unlikely(timer->sigq == NULL)) { | 530 | } else if (unlikely(timer->sigq == NULL)) { |
517 | /* | 531 | /* |
518 | * This a special case for clock_nanosleep, | 532 | * This a special case for clock_nanosleep, |
519 | * not a normal timer from sys_timer_create. | 533 | * not a normal timer from sys_timer_create. |
520 | */ | 534 | */ |
521 | wake_up_process(timer->it_process); | 535 | wake_up_process(timer->it_process); |
522 | timer->it.cpu.expires = 0; | 536 | cpu_timer_setexpires(ctmr, 0); |
523 | } else if (!timer->it_interval) { | 537 | } else if (!timer->it_interval) { |
524 | /* | 538 | /* |
525 | * One-shot timer. Clear it as soon as it's fired. | 539 | * One-shot timer. Clear it as soon as it's fired. |
526 | */ | 540 | */ |
527 | posix_timer_event(timer, 0); | 541 | posix_timer_event(timer, 0); |
528 | timer->it.cpu.expires = 0; | 542 | cpu_timer_setexpires(ctmr, 0); |
529 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { | 543 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { |
530 | /* | 544 | /* |
531 | * The signal did not get queued because the signal | 545 | * The signal did not get queued because the signal |
@@ -539,33 +553,6 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
539 | } | 553 | } |
540 | 554 | ||
541 | /* | 555 | /* |
542 | * Sample a process (thread group) timer for the given group_leader task. | ||
543 | * Must be called with task sighand lock held for safe while_each_thread() | ||
544 | * traversal. | ||
545 | */ | ||
546 | static int cpu_timer_sample_group(const clockid_t which_clock, | ||
547 | struct task_struct *p, u64 *sample) | ||
548 | { | ||
549 | struct task_cputime cputime; | ||
550 | |||
551 | thread_group_cputimer(p, &cputime); | ||
552 | switch (CPUCLOCK_WHICH(which_clock)) { | ||
553 | default: | ||
554 | return -EINVAL; | ||
555 | case CPUCLOCK_PROF: | ||
556 | *sample = cputime.utime + cputime.stime; | ||
557 | break; | ||
558 | case CPUCLOCK_VIRT: | ||
559 | *sample = cputime.utime; | ||
560 | break; | ||
561 | case CPUCLOCK_SCHED: | ||
562 | *sample = cputime.sum_exec_runtime; | ||
563 | break; | ||
564 | } | ||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Guts of sys_timer_settime for CPU timers. | 556 | * Guts of sys_timer_settime for CPU timers. |
570 | * This is called with the timer locked and interrupts disabled. | 557 | * This is called with the timer locked and interrupts disabled. |
571 | * If we return TIMER_RETRY, it's necessary to release the timer's lock | 558 | * If we return TIMER_RETRY, it's necessary to release the timer's lock |
@@ -574,13 +561,16 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
574 | static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | 561 | static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, |
575 | struct itimerspec64 *new, struct itimerspec64 *old) | 562 | struct itimerspec64 *new, struct itimerspec64 *old) |
576 | { | 563 | { |
577 | unsigned long flags; | 564 | clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); |
578 | struct sighand_struct *sighand; | ||
579 | struct task_struct *p = timer->it.cpu.task; | ||
580 | u64 old_expires, new_expires, old_incr, val; | 565 | u64 old_expires, new_expires, old_incr, val; |
581 | int ret; | 566 | struct cpu_timer *ctmr = &timer->it.cpu; |
567 | struct task_struct *p = ctmr->task; | ||
568 | struct sighand_struct *sighand; | ||
569 | unsigned long flags; | ||
570 | int ret = 0; | ||
582 | 571 | ||
583 | WARN_ON_ONCE(p == NULL); | 572 | if (WARN_ON_ONCE(!p)) |
573 | return -EINVAL; | ||
584 | 574 | ||
585 | /* | 575 | /* |
586 | * Use the to_ktime conversion because that clamps the maximum | 576 | * Use the to_ktime conversion because that clamps the maximum |
@@ -597,22 +587,21 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
597 | * If p has just been reaped, we can no | 587 | * If p has just been reaped, we can no |
598 | * longer get any information about it at all. | 588 | * longer get any information about it at all. |
599 | */ | 589 | */ |
600 | if (unlikely(sighand == NULL)) { | 590 | if (unlikely(sighand == NULL)) |
601 | return -ESRCH; | 591 | return -ESRCH; |
602 | } | ||
603 | 592 | ||
604 | /* | 593 | /* |
605 | * Disarm any old timer after extracting its expiry time. | 594 | * Disarm any old timer after extracting its expiry time. |
606 | */ | 595 | */ |
607 | |||
608 | ret = 0; | ||
609 | old_incr = timer->it_interval; | 596 | old_incr = timer->it_interval; |
610 | old_expires = timer->it.cpu.expires; | 597 | old_expires = cpu_timer_getexpires(ctmr); |
598 | |||
611 | if (unlikely(timer->it.cpu.firing)) { | 599 | if (unlikely(timer->it.cpu.firing)) { |
612 | timer->it.cpu.firing = -1; | 600 | timer->it.cpu.firing = -1; |
613 | ret = TIMER_RETRY; | 601 | ret = TIMER_RETRY; |
614 | } else | 602 | } else { |
615 | list_del_init(&timer->it.cpu.entry); | 603 | cpu_timer_dequeue(ctmr); |
604 | } | ||
616 | 605 | ||
617 | /* | 606 | /* |
618 | * We need to sample the current value to convert the new | 607 | * We need to sample the current value to convert the new |
@@ -622,11 +611,10 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
622 | * times (in arm_timer). With an absolute time, we must | 611 | * times (in arm_timer). With an absolute time, we must |
623 | * check if it's already passed. In short, we need a sample. | 612 | * check if it's already passed. In short, we need a sample. |
624 | */ | 613 | */ |
625 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 614 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) |
626 | cpu_clock_sample(timer->it_clock, p, &val); | 615 | val = cpu_clock_sample(clkid, p); |
627 | } else { | 616 | else |
628 | cpu_timer_sample_group(timer->it_clock, p, &val); | 617 | val = cpu_clock_sample_group(clkid, p, true); |
629 | } | ||
630 | 618 | ||
631 | if (old) { | 619 | if (old) { |
632 | if (old_expires == 0) { | 620 | if (old_expires == 0) { |
@@ -634,18 +622,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
634 | old->it_value.tv_nsec = 0; | 622 | old->it_value.tv_nsec = 0; |
635 | } else { | 623 | } else { |
636 | /* | 624 | /* |
637 | * Update the timer in case it has | 625 | * Update the timer in case it has overrun already. |
638 | * overrun already. If it has, | 626 | * If it has, we'll report it as having overrun and |
639 | * we'll report it as having overrun | 627 | * with the next reloaded timer already ticking, |
640 | * and with the next reloaded timer | 628 | * though we are swallowing that pending |
641 | * already ticking, though we are | 629 | * notification here to install the new setting. |
642 | * swallowing that pending | ||
643 | * notification here to install the | ||
644 | * new setting. | ||
645 | */ | 630 | */ |
646 | bump_cpu_timer(timer, val); | 631 | u64 exp = bump_cpu_timer(timer, val); |
647 | if (val < timer->it.cpu.expires) { | 632 | |
648 | old_expires = timer->it.cpu.expires - val; | 633 | if (val < exp) { |
634 | old_expires = exp - val; | ||
649 | old->it_value = ns_to_timespec64(old_expires); | 635 | old->it_value = ns_to_timespec64(old_expires); |
650 | } else { | 636 | } else { |
651 | old->it_value.tv_nsec = 1; | 637 | old->it_value.tv_nsec = 1; |
@@ -674,7 +660,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
674 | * For a timer with no notification action, we don't actually | 660 | * For a timer with no notification action, we don't actually |
675 | * arm the timer (we'll just fake it for timer_gettime). | 661 | * arm the timer (we'll just fake it for timer_gettime). |
676 | */ | 662 | */ |
677 | timer->it.cpu.expires = new_expires; | 663 | cpu_timer_setexpires(ctmr, new_expires); |
678 | if (new_expires != 0 && val < new_expires) { | 664 | if (new_expires != 0 && val < new_expires) { |
679 | arm_timer(timer); | 665 | arm_timer(timer); |
680 | } | 666 | } |
@@ -715,24 +701,27 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
715 | 701 | ||
716 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) | 702 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) |
717 | { | 703 | { |
718 | u64 now; | 704 | clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); |
719 | struct task_struct *p = timer->it.cpu.task; | 705 | struct cpu_timer *ctmr = &timer->it.cpu; |
706 | u64 now, expires = cpu_timer_getexpires(ctmr); | ||
707 | struct task_struct *p = ctmr->task; | ||
720 | 708 | ||
721 | WARN_ON_ONCE(p == NULL); | 709 | if (WARN_ON_ONCE(!p)) |
710 | return; | ||
722 | 711 | ||
723 | /* | 712 | /* |
724 | * Easy part: convert the reload time. | 713 | * Easy part: convert the reload time. |
725 | */ | 714 | */ |
726 | itp->it_interval = ktime_to_timespec64(timer->it_interval); | 715 | itp->it_interval = ktime_to_timespec64(timer->it_interval); |
727 | 716 | ||
728 | if (!timer->it.cpu.expires) | 717 | if (!expires) |
729 | return; | 718 | return; |
730 | 719 | ||
731 | /* | 720 | /* |
732 | * Sample the clock to take the difference with the expiry time. | 721 | * Sample the clock to take the difference with the expiry time. |
733 | */ | 722 | */ |
734 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 723 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
735 | cpu_clock_sample(timer->it_clock, p, &now); | 724 | now = cpu_clock_sample(clkid, p); |
736 | } else { | 725 | } else { |
737 | struct sighand_struct *sighand; | 726 | struct sighand_struct *sighand; |
738 | unsigned long flags; | 727 | unsigned long flags; |
@@ -747,18 +736,18 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp | |||
747 | /* | 736 | /* |
748 | * The process has been reaped. | 737 | * The process has been reaped. |
749 | * We can't even collect a sample any more. | 738 | * We can't even collect a sample any more. |
750 | * Call the timer disarmed, nothing else to do. | 739 | * Disarm the timer, nothing else to do. |
751 | */ | 740 | */ |
752 | timer->it.cpu.expires = 0; | 741 | cpu_timer_setexpires(ctmr, 0); |
753 | return; | 742 | return; |
754 | } else { | 743 | } else { |
755 | cpu_timer_sample_group(timer->it_clock, p, &now); | 744 | now = cpu_clock_sample_group(clkid, p, false); |
756 | unlock_task_sighand(p, &flags); | 745 | unlock_task_sighand(p, &flags); |
757 | } | 746 | } |
758 | } | 747 | } |
759 | 748 | ||
760 | if (now < timer->it.cpu.expires) { | 749 | if (now < expires) { |
761 | itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now); | 750 | itp->it_value = ns_to_timespec64(expires - now); |
762 | } else { | 751 | } else { |
763 | /* | 752 | /* |
764 | * The timer should have expired already, but the firing | 753 | * The timer should have expired already, but the firing |
@@ -769,26 +758,42 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp | |||
769 | } | 758 | } |
770 | } | 759 | } |
771 | 760 | ||
772 | static unsigned long long | 761 | #define MAX_COLLECTED 20 |
773 | check_timers_list(struct list_head *timers, | ||
774 | struct list_head *firing, | ||
775 | unsigned long long curr) | ||
776 | { | ||
777 | int maxfire = 20; | ||
778 | 762 | ||
779 | while (!list_empty(timers)) { | 763 | static u64 collect_timerqueue(struct timerqueue_head *head, |
780 | struct cpu_timer_list *t; | 764 | struct list_head *firing, u64 now) |
765 | { | ||
766 | struct timerqueue_node *next; | ||
767 | int i = 0; | ||
768 | |||
769 | while ((next = timerqueue_getnext(head))) { | ||
770 | struct cpu_timer *ctmr; | ||
771 | u64 expires; | ||
772 | |||
773 | ctmr = container_of(next, struct cpu_timer, node); | ||
774 | expires = cpu_timer_getexpires(ctmr); | ||
775 | /* Limit the number of timers to expire at once */ | ||
776 | if (++i == MAX_COLLECTED || now < expires) | ||
777 | return expires; | ||
778 | |||
779 | ctmr->firing = 1; | ||
780 | cpu_timer_dequeue(ctmr); | ||
781 | list_add_tail(&ctmr->elist, firing); | ||
782 | } | ||
781 | 783 | ||
782 | t = list_first_entry(timers, struct cpu_timer_list, entry); | 784 | return U64_MAX; |
785 | } | ||
783 | 786 | ||
784 | if (!--maxfire || curr < t->expires) | 787 | static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, |
785 | return t->expires; | 788 | struct list_head *firing) |
789 | { | ||
790 | struct posix_cputimer_base *base = pct->bases; | ||
791 | int i; | ||
786 | 792 | ||
787 | t->firing = 1; | 793 | for (i = 0; i < CPUCLOCK_MAX; i++, base++) { |
788 | list_move_tail(&t->entry, firing); | 794 | base->nextevt = collect_timerqueue(&base->tqhead, firing, |
795 | samples[i]); | ||
789 | } | 796 | } |
790 | |||
791 | return 0; | ||
792 | } | 797 | } |
793 | 798 | ||
794 | static inline void check_dl_overrun(struct task_struct *tsk) | 799 | static inline void check_dl_overrun(struct task_struct *tsk) |
@@ -799,6 +804,20 @@ static inline void check_dl_overrun(struct task_struct *tsk) | |||
799 | } | 804 | } |
800 | } | 805 | } |
801 | 806 | ||
807 | static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) | ||
808 | { | ||
809 | if (time < limit) | ||
810 | return false; | ||
811 | |||
812 | if (print_fatal_signals) { | ||
813 | pr_info("%s Watchdog Timeout (%s): %s[%d]\n", | ||
814 | rt ? "RT" : "CPU", hard ? "hard" : "soft", | ||
815 | current->comm, task_pid_nr(current)); | ||
816 | } | ||
817 | __group_send_sig_info(signo, SEND_SIG_PRIV, current); | ||
818 | return true; | ||
819 | } | ||
820 | |||
802 | /* | 821 | /* |
803 | * Check for any per-thread CPU timers that have fired and move them off | 822 | * Check for any per-thread CPU timers that have fired and move them off |
804 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the | 823 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the |
@@ -807,76 +826,50 @@ static inline void check_dl_overrun(struct task_struct *tsk) | |||
807 | static void check_thread_timers(struct task_struct *tsk, | 826 | static void check_thread_timers(struct task_struct *tsk, |
808 | struct list_head *firing) | 827 | struct list_head *firing) |
809 | { | 828 | { |
810 | struct list_head *timers = tsk->cpu_timers; | 829 | struct posix_cputimers *pct = &tsk->posix_cputimers; |
811 | struct task_cputime *tsk_expires = &tsk->cputime_expires; | 830 | u64 samples[CPUCLOCK_MAX]; |
812 | u64 expires; | ||
813 | unsigned long soft; | 831 | unsigned long soft; |
814 | 832 | ||
815 | if (dl_task(tsk)) | 833 | if (dl_task(tsk)) |
816 | check_dl_overrun(tsk); | 834 | check_dl_overrun(tsk); |
817 | 835 | ||
818 | /* | 836 | if (expiry_cache_is_inactive(pct)) |
819 | * If cputime_expires is zero, then there are no active | ||
820 | * per thread CPU timers. | ||
821 | */ | ||
822 | if (task_cputime_zero(&tsk->cputime_expires)) | ||
823 | return; | 837 | return; |
824 | 838 | ||
825 | expires = check_timers_list(timers, firing, prof_ticks(tsk)); | 839 | task_sample_cputime(tsk, samples); |
826 | tsk_expires->prof_exp = expires; | 840 | collect_posix_cputimers(pct, samples, firing); |
827 | |||
828 | expires = check_timers_list(++timers, firing, virt_ticks(tsk)); | ||
829 | tsk_expires->virt_exp = expires; | ||
830 | |||
831 | tsk_expires->sched_exp = check_timers_list(++timers, firing, | ||
832 | tsk->se.sum_exec_runtime); | ||
833 | 841 | ||
834 | /* | 842 | /* |
835 | * Check for the special case thread timers. | 843 | * Check for the special case thread timers. |
836 | */ | 844 | */ |
837 | soft = task_rlimit(tsk, RLIMIT_RTTIME); | 845 | soft = task_rlimit(tsk, RLIMIT_RTTIME); |
838 | if (soft != RLIM_INFINITY) { | 846 | if (soft != RLIM_INFINITY) { |
847 | /* Task RT timeout is accounted in jiffies. RTTIME is usec */ | ||
848 | unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); | ||
839 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); | 849 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); |
840 | 850 | ||
851 | /* At the hard limit, send SIGKILL. No further action. */ | ||
841 | if (hard != RLIM_INFINITY && | 852 | if (hard != RLIM_INFINITY && |
842 | tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { | 853 | check_rlimit(rttime, hard, SIGKILL, true, true)) |
843 | /* | ||
844 | * At the hard limit, we just die. | ||
845 | * No need to calculate anything else now. | ||
846 | */ | ||
847 | if (print_fatal_signals) { | ||
848 | pr_info("CPU Watchdog Timeout (hard): %s[%d]\n", | ||
849 | tsk->comm, task_pid_nr(tsk)); | ||
850 | } | ||
851 | __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); | ||
852 | return; | 854 | return; |
853 | } | 855 | |
854 | if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) { | 856 | /* At the soft limit, send a SIGXCPU every second */ |
855 | /* | 857 | if (check_rlimit(rttime, soft, SIGXCPU, true, false)) { |
856 | * At the soft limit, send a SIGXCPU every second. | 858 | soft += USEC_PER_SEC; |
857 | */ | 859 | tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft; |
858 | if (soft < hard) { | ||
859 | soft += USEC_PER_SEC; | ||
860 | tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = | ||
861 | soft; | ||
862 | } | ||
863 | if (print_fatal_signals) { | ||
864 | pr_info("RT Watchdog Timeout (soft): %s[%d]\n", | ||
865 | tsk->comm, task_pid_nr(tsk)); | ||
866 | } | ||
867 | __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); | ||
868 | } | 860 | } |
869 | } | 861 | } |
870 | if (task_cputime_zero(tsk_expires)) | 862 | |
863 | if (expiry_cache_is_inactive(pct)) | ||
871 | tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); | 864 | tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); |
872 | } | 865 | } |
873 | 866 | ||
874 | static inline void stop_process_timers(struct signal_struct *sig) | 867 | static inline void stop_process_timers(struct signal_struct *sig) |
875 | { | 868 | { |
876 | struct thread_group_cputimer *cputimer = &sig->cputimer; | 869 | struct posix_cputimers *pct = &sig->posix_cputimers; |
877 | 870 | ||
878 | /* Turn off cputimer->running. This is done without locking. */ | 871 | /* Turn off the active flag. This is done without locking. */ |
879 | WRITE_ONCE(cputimer->running, false); | 872 | WRITE_ONCE(pct->timers_active, false); |
880 | tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); | 873 | tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); |
881 | } | 874 | } |
882 | 875 | ||
@@ -898,7 +891,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
898 | __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); | 891 | __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); |
899 | } | 892 | } |
900 | 893 | ||
901 | if (it->expires && (!*expires || it->expires < *expires)) | 894 | if (it->expires && it->expires < *expires) |
902 | *expires = it->expires; | 895 | *expires = it->expires; |
903 | } | 896 | } |
904 | 897 | ||
@@ -911,87 +904,69 @@ static void check_process_timers(struct task_struct *tsk, | |||
911 | struct list_head *firing) | 904 | struct list_head *firing) |
912 | { | 905 | { |
913 | struct signal_struct *const sig = tsk->signal; | 906 | struct signal_struct *const sig = tsk->signal; |
914 | u64 utime, ptime, virt_expires, prof_expires; | 907 | struct posix_cputimers *pct = &sig->posix_cputimers; |
915 | u64 sum_sched_runtime, sched_expires; | 908 | u64 samples[CPUCLOCK_MAX]; |
916 | struct list_head *timers = sig->cpu_timers; | ||
917 | struct task_cputime cputime; | ||
918 | unsigned long soft; | 909 | unsigned long soft; |
919 | 910 | ||
920 | /* | 911 | /* |
921 | * If cputimer is not running, then there are no active | 912 | * If there are no active process wide timers (POSIX 1.b, itimers, |
922 | * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). | 913 | * RLIMIT_CPU) nothing to check. Also skip the process wide timer |
914 | * processing when there is already another task handling them. | ||
923 | */ | 915 | */ |
924 | if (!READ_ONCE(tsk->signal->cputimer.running)) | 916 | if (!READ_ONCE(pct->timers_active) || pct->expiry_active) |
925 | return; | 917 | return; |
926 | 918 | ||
927 | /* | 919 | /* |
928 | * Signify that a thread is checking for process timers. | 920 | * Signify that a thread is checking for process timers. |
929 | * Write access to this field is protected by the sighand lock. | 921 | * Write access to this field is protected by the sighand lock. |
930 | */ | 922 | */ |
931 | sig->cputimer.checking_timer = true; | 923 | pct->expiry_active = true; |
932 | 924 | ||
933 | /* | 925 | /* |
934 | * Collect the current process totals. | 926 | * Collect the current process totals. Group accounting is active |
927 | * so the sample can be taken directly. | ||
935 | */ | 928 | */ |
936 | thread_group_cputimer(tsk, &cputime); | 929 | proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples); |
937 | utime = cputime.utime; | 930 | collect_posix_cputimers(pct, samples, firing); |
938 | ptime = utime + cputime.stime; | ||
939 | sum_sched_runtime = cputime.sum_exec_runtime; | ||
940 | |||
941 | prof_expires = check_timers_list(timers, firing, ptime); | ||
942 | virt_expires = check_timers_list(++timers, firing, utime); | ||
943 | sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); | ||
944 | 931 | ||
945 | /* | 932 | /* |
946 | * Check for the special case process timers. | 933 | * Check for the special case process timers. |
947 | */ | 934 | */ |
948 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime, | 935 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], |
949 | SIGPROF); | 936 | &pct->bases[CPUCLOCK_PROF].nextevt, |
950 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, | 937 | samples[CPUCLOCK_PROF], SIGPROF); |
951 | SIGVTALRM); | 938 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], |
939 | &pct->bases[CPUCLOCK_VIRT].nextevt, | ||
940 | samples[CPUCLOCK_VIRT], SIGVTALRM); | ||
941 | |||
952 | soft = task_rlimit(tsk, RLIMIT_CPU); | 942 | soft = task_rlimit(tsk, RLIMIT_CPU); |
953 | if (soft != RLIM_INFINITY) { | 943 | if (soft != RLIM_INFINITY) { |
954 | unsigned long psecs = div_u64(ptime, NSEC_PER_SEC); | 944 | /* RLIMIT_CPU is in seconds. Samples are nanoseconds */ |
955 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); | 945 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); |
956 | u64 x; | 946 | u64 ptime = samples[CPUCLOCK_PROF]; |
957 | if (psecs >= hard) { | 947 | u64 softns = (u64)soft * NSEC_PER_SEC; |
958 | /* | 948 | u64 hardns = (u64)hard * NSEC_PER_SEC; |
959 | * At the hard limit, we just die. | 949 | |
960 | * No need to calculate anything else now. | 950 | /* At the hard limit, send SIGKILL. No further action. */ |
961 | */ | 951 | if (hard != RLIM_INFINITY && |
962 | if (print_fatal_signals) { | 952 | check_rlimit(ptime, hardns, SIGKILL, false, true)) |
963 | pr_info("RT Watchdog Timeout (hard): %s[%d]\n", | ||
964 | tsk->comm, task_pid_nr(tsk)); | ||
965 | } | ||
966 | __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); | ||
967 | return; | 953 | return; |
954 | |||
955 | /* At the soft limit, send a SIGXCPU every second */ | ||
956 | if (check_rlimit(ptime, softns, SIGXCPU, false, false)) { | ||
957 | sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1; | ||
958 | softns += NSEC_PER_SEC; | ||
968 | } | 959 | } |
969 | if (psecs >= soft) { | 960 | |
970 | /* | 961 | /* Update the expiry cache */ |
971 | * At the soft limit, send a SIGXCPU every second. | 962 | if (softns < pct->bases[CPUCLOCK_PROF].nextevt) |
972 | */ | 963 | pct->bases[CPUCLOCK_PROF].nextevt = softns; |
973 | if (print_fatal_signals) { | ||
974 | pr_info("CPU Watchdog Timeout (soft): %s[%d]\n", | ||
975 | tsk->comm, task_pid_nr(tsk)); | ||
976 | } | ||
977 | __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); | ||
978 | if (soft < hard) { | ||
979 | soft++; | ||
980 | sig->rlim[RLIMIT_CPU].rlim_cur = soft; | ||
981 | } | ||
982 | } | ||
983 | x = soft * NSEC_PER_SEC; | ||
984 | if (!prof_expires || x < prof_expires) | ||
985 | prof_expires = x; | ||
986 | } | 964 | } |
987 | 965 | ||
988 | sig->cputime_expires.prof_exp = prof_expires; | 966 | if (expiry_cache_is_inactive(pct)) |
989 | sig->cputime_expires.virt_exp = virt_expires; | ||
990 | sig->cputime_expires.sched_exp = sched_expires; | ||
991 | if (task_cputime_zero(&sig->cputime_expires)) | ||
992 | stop_process_timers(sig); | 967 | stop_process_timers(sig); |
993 | 968 | ||
994 | sig->cputimer.checking_timer = false; | 969 | pct->expiry_active = false; |
995 | } | 970 | } |
996 | 971 | ||
997 | /* | 972 | /* |
@@ -1000,18 +975,21 @@ static void check_process_timers(struct task_struct *tsk, | |||
1000 | */ | 975 | */ |
1001 | static void posix_cpu_timer_rearm(struct k_itimer *timer) | 976 | static void posix_cpu_timer_rearm(struct k_itimer *timer) |
1002 | { | 977 | { |
978 | clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); | ||
979 | struct cpu_timer *ctmr = &timer->it.cpu; | ||
980 | struct task_struct *p = ctmr->task; | ||
1003 | struct sighand_struct *sighand; | 981 | struct sighand_struct *sighand; |
1004 | unsigned long flags; | 982 | unsigned long flags; |
1005 | struct task_struct *p = timer->it.cpu.task; | ||
1006 | u64 now; | 983 | u64 now; |
1007 | 984 | ||
1008 | WARN_ON_ONCE(p == NULL); | 985 | if (WARN_ON_ONCE(!p)) |
986 | return; | ||
1009 | 987 | ||
1010 | /* | 988 | /* |
1011 | * Fetch the current sample and update the timer's expiry time. | 989 | * Fetch the current sample and update the timer's expiry time. |
1012 | */ | 990 | */ |
1013 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 991 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
1014 | cpu_clock_sample(timer->it_clock, p, &now); | 992 | now = cpu_clock_sample(clkid, p); |
1015 | bump_cpu_timer(timer, now); | 993 | bump_cpu_timer(timer, now); |
1016 | if (unlikely(p->exit_state)) | 994 | if (unlikely(p->exit_state)) |
1017 | return; | 995 | return; |
@@ -1031,13 +1009,13 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) | |||
1031 | * The process has been reaped. | 1009 | * The process has been reaped. |
1032 | * We can't even collect a sample any more. | 1010 | * We can't even collect a sample any more. |
1033 | */ | 1011 | */ |
1034 | timer->it.cpu.expires = 0; | 1012 | cpu_timer_setexpires(ctmr, 0); |
1035 | return; | 1013 | return; |
1036 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { | 1014 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { |
1037 | /* If the process is dying, no need to rearm */ | 1015 | /* If the process is dying, no need to rearm */ |
1038 | goto unlock; | 1016 | goto unlock; |
1039 | } | 1017 | } |
1040 | cpu_timer_sample_group(timer->it_clock, p, &now); | 1018 | now = cpu_clock_sample_group(clkid, p, true); |
1041 | bump_cpu_timer(timer, now); | 1019 | bump_cpu_timer(timer, now); |
1042 | /* Leave the sighand locked for the call below. */ | 1020 | /* Leave the sighand locked for the call below. */ |
1043 | } | 1021 | } |
@@ -1051,26 +1029,24 @@ unlock: | |||
1051 | } | 1029 | } |
1052 | 1030 | ||
1053 | /** | 1031 | /** |
1054 | * task_cputime_expired - Compare two task_cputime entities. | 1032 | * task_cputimers_expired - Check whether posix CPU timers are expired |
1055 | * | 1033 | * |
1056 | * @sample: The task_cputime structure to be checked for expiration. | 1034 | * @samples: Array of current samples for the CPUCLOCK clocks |
1057 | * @expires: Expiration times, against which @sample will be checked. | 1035 | * @pct: Pointer to a posix_cputimers container |
1058 | * | 1036 | * |
1059 | * Checks @sample against @expires to see if any field of @sample has expired. | 1037 | * Returns true if any member of @samples is greater than the corresponding |
1060 | * Returns true if any field of the former is greater than the corresponding | 1038 | * member of @pct->bases[CLK].nextevt. False otherwise |
1061 | * field of the latter if the latter field is set. Otherwise returns false. | ||
1062 | */ | 1039 | */ |
1063 | static inline int task_cputime_expired(const struct task_cputime *sample, | 1040 | static inline bool |
1064 | const struct task_cputime *expires) | 1041 | task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct) |
1065 | { | 1042 | { |
1066 | if (expires->utime && sample->utime >= expires->utime) | 1043 | int i; |
1067 | return 1; | 1044 | |
1068 | if (expires->stime && sample->utime + sample->stime >= expires->stime) | 1045 | for (i = 0; i < CPUCLOCK_MAX; i++) { |
1069 | return 1; | 1046 | if (sample[i] >= pct->bases[i].nextevt) |
1070 | if (expires->sum_exec_runtime != 0 && | 1047 | return true; |
1071 | sample->sum_exec_runtime >= expires->sum_exec_runtime) | 1048 | } |
1072 | return 1; | 1049 | return false; |
1073 | return 0; | ||
1074 | } | 1050 | } |
1075 | 1051 | ||
1076 | /** | 1052 | /** |
@@ -1083,48 +1059,50 @@ static inline int task_cputime_expired(const struct task_cputime *sample, | |||
1083 | * timers and compare them with the corresponding expiration times. Return | 1059 | * timers and compare them with the corresponding expiration times. Return |
1084 | * true if a timer has expired, else return false. | 1060 | * true if a timer has expired, else return false. |
1085 | */ | 1061 | */ |
1086 | static inline int fastpath_timer_check(struct task_struct *tsk) | 1062 | static inline bool fastpath_timer_check(struct task_struct *tsk) |
1087 | { | 1063 | { |
1064 | struct posix_cputimers *pct = &tsk->posix_cputimers; | ||
1088 | struct signal_struct *sig; | 1065 | struct signal_struct *sig; |
1089 | 1066 | ||
1090 | if (!task_cputime_zero(&tsk->cputime_expires)) { | 1067 | if (!expiry_cache_is_inactive(pct)) { |
1091 | struct task_cputime task_sample; | 1068 | u64 samples[CPUCLOCK_MAX]; |
1092 | 1069 | ||
1093 | task_cputime(tsk, &task_sample.utime, &task_sample.stime); | 1070 | task_sample_cputime(tsk, samples); |
1094 | task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime; | 1071 | if (task_cputimers_expired(samples, pct)) |
1095 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | 1072 | return true; |
1096 | return 1; | ||
1097 | } | 1073 | } |
1098 | 1074 | ||
1099 | sig = tsk->signal; | 1075 | sig = tsk->signal; |
1076 | pct = &sig->posix_cputimers; | ||
1100 | /* | 1077 | /* |
1101 | * Check if thread group timers expired when the cputimer is | 1078 | * Check if thread group timers expired when timers are active and |
1102 | * running and no other thread in the group is already checking | 1079 | * no other thread in the group is already handling expiry for |
1103 | * for thread group cputimers. These fields are read without the | 1080 | * thread group cputimers. These fields are read without the |
1104 | * sighand lock. However, this is fine because this is meant to | 1081 | * sighand lock. However, this is fine because this is meant to be |
1105 | * be a fastpath heuristic to determine whether we should try to | 1082 | * a fastpath heuristic to determine whether we should try to |
1106 | * acquire the sighand lock to check/handle timers. | 1083 | * acquire the sighand lock to handle timer expiry. |
1107 | * | 1084 | * |
1108 | * In the worst case scenario, if 'running' or 'checking_timer' gets | 1085 | * In the worst case scenario, if concurrently timers_active is set |
1109 | * set but the current thread doesn't see the change yet, we'll wait | 1086 | * or expiry_active is cleared, but the current thread doesn't see |
1110 | * until the next thread in the group gets a scheduler interrupt to | 1087 | * the change yet, the timer checks are delayed until the next |
1111 | * handle the timer. This isn't an issue in practice because these | 1088 | * thread in the group gets a scheduler interrupt to handle the |
1112 | * types of delays with signals actually getting sent are expected. | 1089 | * timer. This isn't an issue in practice because these types of |
1090 | * delays with signals actually getting sent are expected. | ||
1113 | */ | 1091 | */ |
1114 | if (READ_ONCE(sig->cputimer.running) && | 1092 | if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) { |
1115 | !READ_ONCE(sig->cputimer.checking_timer)) { | 1093 | u64 samples[CPUCLOCK_MAX]; |
1116 | struct task_cputime group_sample; | ||
1117 | 1094 | ||
1118 | sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic); | 1095 | proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, |
1096 | samples); | ||
1119 | 1097 | ||
1120 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | 1098 | if (task_cputimers_expired(samples, pct)) |
1121 | return 1; | 1099 | return true; |
1122 | } | 1100 | } |
1123 | 1101 | ||
1124 | if (dl_task(tsk) && tsk->dl.dl_overrun) | 1102 | if (dl_task(tsk) && tsk->dl.dl_overrun) |
1125 | return 1; | 1103 | return true; |
1126 | 1104 | ||
1127 | return 0; | 1105 | return false; |
1128 | } | 1106 | } |
1129 | 1107 | ||
1130 | /* | 1108 | /* |
@@ -1132,11 +1110,12 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1132 | * already updated our counts. We need to check if any timers fire now. | 1110 | * already updated our counts. We need to check if any timers fire now. |
1133 | * Interrupts are disabled. | 1111 | * Interrupts are disabled. |
1134 | */ | 1112 | */ |
1135 | void run_posix_cpu_timers(struct task_struct *tsk) | 1113 | void run_posix_cpu_timers(void) |
1136 | { | 1114 | { |
1137 | LIST_HEAD(firing); | 1115 | struct task_struct *tsk = current; |
1138 | struct k_itimer *timer, *next; | 1116 | struct k_itimer *timer, *next; |
1139 | unsigned long flags; | 1117 | unsigned long flags; |
1118 | LIST_HEAD(firing); | ||
1140 | 1119 | ||
1141 | lockdep_assert_irqs_disabled(); | 1120 | lockdep_assert_irqs_disabled(); |
1142 | 1121 | ||
@@ -1174,11 +1153,11 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1174 | * each timer's lock before clearing its firing flag, so no | 1153 | * each timer's lock before clearing its firing flag, so no |
1175 | * timer call will interfere. | 1154 | * timer call will interfere. |
1176 | */ | 1155 | */ |
1177 | list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { | 1156 | list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) { |
1178 | int cpu_firing; | 1157 | int cpu_firing; |
1179 | 1158 | ||
1180 | spin_lock(&timer->it_lock); | 1159 | spin_lock(&timer->it_lock); |
1181 | list_del_init(&timer->it.cpu.entry); | 1160 | list_del_init(&timer->it.cpu.elist); |
1182 | cpu_firing = timer->it.cpu.firing; | 1161 | cpu_firing = timer->it.cpu.firing; |
1183 | timer->it.cpu.firing = 0; | 1162 | timer->it.cpu.firing = 0; |
1184 | /* | 1163 | /* |
@@ -1196,16 +1175,18 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1196 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | 1175 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
1197 | * The tsk->sighand->siglock must be held by the caller. | 1176 | * The tsk->sighand->siglock must be held by the caller. |
1198 | */ | 1177 | */ |
1199 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1178 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, |
1200 | u64 *newval, u64 *oldval) | 1179 | u64 *newval, u64 *oldval) |
1201 | { | 1180 | { |
1202 | u64 now; | 1181 | u64 now, *nextevt; |
1203 | int ret; | 1182 | |
1183 | if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED)) | ||
1184 | return; | ||
1204 | 1185 | ||
1205 | WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); | 1186 | nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt; |
1206 | ret = cpu_timer_sample_group(clock_idx, tsk, &now); | 1187 | now = cpu_clock_sample_group(clkid, tsk, true); |
1207 | 1188 | ||
1208 | if (oldval && ret != -EINVAL) { | 1189 | if (oldval) { |
1209 | /* | 1190 | /* |
1210 | * We are setting itimer. The *oldval is absolute and we update | 1191 | * We are setting itimer. The *oldval is absolute and we update |
1211 | * it to be relative, *newval argument is relative and we update | 1192 | * it to be relative, *newval argument is relative and we update |
@@ -1226,19 +1207,11 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1226 | } | 1207 | } |
1227 | 1208 | ||
1228 | /* | 1209 | /* |
1229 | * Update expiration cache if we are the earliest timer, or eventually | 1210 | * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF |
1230 | * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire. | 1211 | * expiry cache is also used by RLIMIT_CPU!. |
1231 | */ | 1212 | */ |
1232 | switch (clock_idx) { | 1213 | if (*newval < *nextevt) |
1233 | case CPUCLOCK_PROF: | 1214 | *nextevt = *newval; |
1234 | if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval)) | ||
1235 | tsk->signal->cputime_expires.prof_exp = *newval; | ||
1236 | break; | ||
1237 | case CPUCLOCK_VIRT: | ||
1238 | if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval)) | ||
1239 | tsk->signal->cputime_expires.virt_exp = *newval; | ||
1240 | break; | ||
1241 | } | ||
1242 | 1215 | ||
1243 | tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); | 1216 | tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); |
1244 | } | 1217 | } |
@@ -1260,6 +1233,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1260 | timer.it_overrun = -1; | 1233 | timer.it_overrun = -1; |
1261 | error = posix_cpu_timer_create(&timer); | 1234 | error = posix_cpu_timer_create(&timer); |
1262 | timer.it_process = current; | 1235 | timer.it_process = current; |
1236 | |||
1263 | if (!error) { | 1237 | if (!error) { |
1264 | static struct itimerspec64 zero_it; | 1238 | static struct itimerspec64 zero_it; |
1265 | struct restart_block *restart; | 1239 | struct restart_block *restart; |
@@ -1275,7 +1249,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1275 | } | 1249 | } |
1276 | 1250 | ||
1277 | while (!signal_pending(current)) { | 1251 | while (!signal_pending(current)) { |
1278 | if (timer.it.cpu.expires == 0) { | 1252 | if (!cpu_timer_getexpires(&timer.it.cpu)) { |
1279 | /* | 1253 | /* |
1280 | * Our timer fired and was reset, below | 1254 | * Our timer fired and was reset, below |
1281 | * deletion can not fail. | 1255 | * deletion can not fail. |
@@ -1297,7 +1271,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1297 | /* | 1271 | /* |
1298 | * We were interrupted by a signal. | 1272 | * We were interrupted by a signal. |
1299 | */ | 1273 | */ |
1300 | expires = timer.it.cpu.expires; | 1274 | expires = cpu_timer_getexpires(&timer.it.cpu); |
1301 | error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); | 1275 | error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); |
1302 | if (!error) { | 1276 | if (!error) { |
1303 | /* | 1277 | /* |