From 9465bee863bc4c6cf1566c12d6f92a8133e3da5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Oct 2005 15:36:00 -0700 Subject: Revert "Fix cpu timers exit deadlock and races" Revert commit e03d13e985d48ac4885382c9e3b1510c78bd047f, to be replaced by a much nicer fix from Roland. --- kernel/posix-cpu-timers.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index b3f3edc475de..7a51a5597c33 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -387,19 +387,25 @@ int posix_cpu_timer_del(struct k_itimer *timer) if (unlikely(p == NULL)) return 0; - spin_lock(&p->sighand->siglock); if (!list_empty(&timer->it.cpu.entry)) { - /* - * Take us off the task's timer list. We don't need to - * take tasklist_lock and check for the task being reaped. - * If it was reaped, it already called posix_cpu_timers_exit - * and posix_cpu_timers_exit_group to clear all the timers - * that pointed to it. - */ - list_del(&timer->it.cpu.entry); - put_task_struct(p); + read_lock(&tasklist_lock); + if (unlikely(p->signal == NULL)) { + /* + * We raced with the reaping of the task. + * The deletion should have cleared us off the list. + */ + BUG_ON(!list_empty(&timer->it.cpu.entry)); + } else { + /* + * Take us off the task's timer list. + */ + spin_lock(&p->sighand->siglock); + list_del(&timer->it.cpu.entry); + spin_unlock(&p->sighand->siglock); + } + read_unlock(&tasklist_lock); } - spin_unlock(&p->sighand->siglock); + put_task_struct(p); return 0; } -- cgit v1.2.2 From e80eda94d3eaf1d12cfc97878eff77cd679dabc9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Oct 2005 10:02:50 -0700 Subject: Posix timers: limit number of timers firing at once Bursty timers aren't good for anybody, very much including latency for other programs when we trigger lots of timers in interrupt context. So set a random limit, after which we'll handle the rest on the next timer tick. Noted by Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 7a51a5597c33..d30b304a3384 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -961,14 +961,16 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { + int maxfire; struct list_head *timers = tsk->cpu_timers; + maxfire = 20; tsk->it_prof_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(prof_ticks(tsk), t->expires.cpu)) { + if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { tsk->it_prof_expires = t->expires.cpu; break; } @@ -977,12 +979,13 @@ static void check_thread_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; tsk->it_virt_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(virt_ticks(tsk), t->expires.cpu)) { + if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { tsk->it_virt_expires = t->expires.cpu; break; } @@ -991,12 +994,13 @@ static void check_thread_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; tsk->it_sched_expires = 0; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (tsk->sched_time < t->expires.sched) { + if (!--maxfire || tsk->sched_time < t->expires.sched) { tsk->it_sched_expires = t->expires.sched; break; } @@ -1013,6 +1017,7 @@ static void check_thread_timers(struct task_struct *tsk, static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { + int maxfire; struct signal_struct *const sig = tsk->signal; cputime_t utime, stime, ptime, virt_expires, prof_expires; unsigned long long sched_time, sched_expires; @@ -1045,12 +1050,13 @@ static void check_process_timers(struct task_struct *tsk, } while (t != tsk); ptime = cputime_add(utime, stime); + maxfire = 20; prof_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(ptime, t->expires.cpu)) { + if (!--maxfire || cputime_lt(ptime, t->expires.cpu)) { prof_expires = t->expires.cpu; break; } @@ -1059,12 +1065,13 @@ static void check_process_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; virt_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(utime, t->expires.cpu)) { + if (!--maxfire || cputime_lt(utime, t->expires.cpu)) { virt_expires = t->expires.cpu; break; } @@ -1073,12 +1080,13 @@ static void check_process_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; sched_expires = 0; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (sched_time < t->expires.sched) { + if (!--maxfire || sched_time < t->expires.sched) { sched_expires = t->expires.sched; break; } -- cgit v1.2.2 From 108150ea78003044e41150c75259447b2c0953b6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 23 Oct 2005 20:25:39 +0400 Subject: [PATCH] posix-timers: fix cleanup_timers() and run_posix_cpu_timers() races 1. cleanup_timers() sets timer->task = NULL under tasklist + ->sighand locks. That means that this code in posix_cpu_timer_del() and posix_cpu_timer_set() lock_timer(timer); if (timer->task == NULL) return; read_lock(tasklist); put_task_struct(timer->task) is racy. With this patch timer->task modified and accounted only under timer->it_lock. Sadly, this means that dead task_struct won't be freed until timer deleted or armed. 2. run_posix_cpu_timers() collects expired timers into local list under tasklist + ->sighand again. That means that posix_cpu_timer_del() should check timer->it.cpu.firing under these locks too. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d30b304a3384..30ab39a27736 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -380,14 +380,9 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) int posix_cpu_timer_del(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; + int ret = 0; - if (timer->it.cpu.firing) - return TIMER_RETRY; - - if (unlikely(p == NULL)) - return 0; - - if (!list_empty(&timer->it.cpu.entry)) { + if (likely(p != NULL)) { read_lock(&tasklist_lock); if (unlikely(p->signal == NULL)) { /* @@ -396,18 +391,20 @@ int posix_cpu_timer_del(struct k_itimer *timer) */ BUG_ON(!list_empty(&timer->it.cpu.entry)); } else { - /* - * Take us off the task's timer list. - */ spin_lock(&p->sighand->siglock); - list_del(&timer->it.cpu.entry); + if (timer->it.cpu.firing) + ret = TIMER_RETRY; + else + list_del(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); } read_unlock(&tasklist_lock); + + if (!ret) + put_task_struct(p); } - put_task_struct(p); - return 0; + return ret; } /* @@ -424,8 +421,6 @@ static void cleanup_timers(struct list_head *head, cputime_t ptime = cputime_add(utime, stime); list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, ptime)) { timer->expires.cpu = cputime_zero; @@ -437,8 +432,6 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, utime)) { timer->expires.cpu = cputime_zero; @@ -450,8 +443,6 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (timer->expires.sched < sched_time) { timer->expires.sched = 0; -- cgit v1.2.2 From 3de463c7d9d58f8cf3395268230cb20a4c15bffa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Oct 2005 14:34:03 +0400 Subject: [PATCH] posix-timers: remove false BUG_ON() from run_posix_cpu_timers() do_exit() clears ->it_##clock##_expires, but nothing prevents another cpu to attach the timer to exiting process after that. After exit_notify() does 'write_unlock_irq(&tasklist_lock)' and before do_exit() calls 'schedule() local timer interrupt can find tsk->exit_state != 0. If that state was EXIT_DEAD (or another cpu does sys_wait4) interrupted task has ->signal == NULL. At this moment exiting task has no pending cpu timers, they were cleaned up in __exit_signal()->posix_cpu_timers_exit{,_group}(), so we can just return from irq. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 30ab39a27736..ccb04683bf18 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1285,30 +1285,30 @@ void run_posix_cpu_timers(struct task_struct *tsk) #undef UNEXPIRED - BUG_ON(tsk->exit_state); - /* * Double-check with locks held. */ read_lock(&tasklist_lock); - spin_lock(&tsk->sighand->siglock); + if (likely(tsk->signal != NULL)) { + spin_lock(&tsk->sighand->siglock); - /* - * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] - * all the timers that are firing, and put them on the firing list. - */ - check_thread_timers(tsk, &firing); - check_process_timers(tsk, &firing); + /* + * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] + * all the timers that are firing, and put them on the firing list. + */ + check_thread_timers(tsk, &firing); + check_process_timers(tsk, &firing); - /* - * We must release these locks before taking any timer's lock. - * There is a potential race with timer deletion here, as the - * siglock now protects our private firing list. We have set - * the firing flag in each timer, so that a deletion attempt - * that gets the timer lock before we do will give it up and - * spin until we've taken care of that timer below. - */ - spin_unlock(&tsk->sighand->siglock); + /* + * We must release these locks before taking any timer's lock. + * There is a potential race with timer deletion here, as the + * siglock now protects our private firing list. We have set + * the firing flag in each timer, so that a deletion attempt + * that gets the timer lock before we do will give it up and + * spin until we've taken care of that timer below. + */ + spin_unlock(&tsk->sighand->siglock); + } read_unlock(&tasklist_lock); /* -- cgit v1.2.2 From ca531a0a5e01e5122f67cb6aca8fcbfc70e18e0b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Oct 2005 14:36:28 +0400 Subject: [PATCH] posix-timers: exit path cleanup No need to rebalance when task exited Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ccb04683bf18..92a038064628 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -486,6 +486,9 @@ static void process_timer_rebalance(struct task_struct *p, struct task_struct *t = p; unsigned int nthreads = atomic_read(&p->signal->live); + if (!nthreads) + return; + switch (clock_idx) { default: BUG(); @@ -1160,6 +1163,9 @@ static void check_process_timers(struct task_struct *tsk, unsigned long long sched_left, sched; const unsigned int nthreads = atomic_read(&sig->live); + if (!nthreads) + return; + prof_left = cputime_sub(prof_expires, utime); prof_left = cputime_sub(prof_left, stime); prof_left = cputime_div(prof_left, nthreads); -- cgit v1.2.2 From a69ac4a78d8bd9e1ec478bd7297d4f047fcd44a8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Oct 2005 18:29:58 +0400 Subject: [PATCH] posix-timers: fix posix_cpu_timer_set() vs run_posix_cpu_timers() race This might be harmless, but looks like a race from code inspection (I was unable to trigger it). I must admit, I don't understand why we can't return TIMER_RETRY after 'spin_unlock(&p->sighand->siglock)' without doing bump_cpu_timer(), but this is what original code does. posix_cpu_timer_set: read_lock(&tasklist_lock); spin_lock(&p->sighand->siglock); list_del_init(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); We are probaly deleting the timer from run_posix_cpu_timers's 'firing' local list_head while run_posix_cpu_timers() does list_for_each_safe. Various bad things can happen, for example we can just delete this timer so that list_for_each() will not notice it and run_posix_cpu_timers() will not reset '->firing' flag. In that case, .... if (timer->it.cpu.firing) { read_unlock(&tasklist_lock); timer->it.cpu.firing = -1; return TIMER_RETRY; } sys_timer_settime() goes to 'retry:', calls posix_cpu_timer_set() again, it returns TIMER_RETRY ... Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 92a038064628..b15462b17a58 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -730,9 +730,15 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, * Disarm any old timer after extracting its expiry time. */ BUG_ON(!irqs_disabled()); + + ret = 0; spin_lock(&p->sighand->siglock); old_expires = timer->it.cpu.expires; - list_del_init(&timer->it.cpu.entry); + if (unlikely(timer->it.cpu.firing)) { + timer->it.cpu.firing = -1; + ret = TIMER_RETRY; + } else + list_del_init(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); /* @@ -780,7 +786,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, } } - if (unlikely(timer->it.cpu.firing)) { + if (unlikely(ret)) { /* * We are colliding with the timer actually firing. * Punt after filling in the timer's old value, and @@ -788,8 +794,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, * it as an overrun (thanks to bump_cpu_timer above). */ read_unlock(&tasklist_lock); - timer->it.cpu.firing = -1; - ret = TIMER_RETRY; goto out; } -- cgit v1.2.2