diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2009-05-03 22:42:18 -0400 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2009-05-03 22:42:18 -0400 |
commit | 536a1b04a8a02f1f5f4510cea657a3612e2610f5 (patch) | |
tree | 3b2368ae89c4c87ef450c7b8b84bdb30a9fedf3f | |
parent | fa940ad71184dec3d156b7dd673ee38067e7c0f6 (diff) |
rt domain: fix rare deadlock between timer base lock and release lock
If a timer happens to go off at the wrong time on the wrong CPU, the old
code could deadlock. This avoided in the new version by dropping the
release_lock before using the hrtimer API.
-rw-r--r-- | litmus/rt_domain.c | 76 |
1 files changed, 60 insertions, 16 deletions
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index 8065a2c0bd..a46b7e8e72 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c | |||
@@ -1,3 +1,4 @@ | |||
1 | |||
1 | /* | 2 | /* |
2 | * kernel/rt_domain.c | 3 | * kernel/rt_domain.c |
3 | * | 4 | * |
@@ -54,9 +55,11 @@ static enum hrtimer_restart on_release_timer(struct hrtimer *timer) | |||
54 | rh = container_of(timer, struct release_heap, timer); | 55 | rh = container_of(timer, struct release_heap, timer); |
55 | 56 | ||
56 | spin_lock_irqsave(&rh->dom->release_lock, flags); | 57 | spin_lock_irqsave(&rh->dom->release_lock, flags); |
58 | TRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); | ||
57 | /* remove from release queue */ | 59 | /* remove from release queue */ |
58 | list_del(&rh->list); | 60 | list_del(&rh->list); |
59 | spin_unlock_irqrestore(&rh->dom->release_lock, flags); | 61 | spin_unlock_irqrestore(&rh->dom->release_lock, flags); |
62 | TRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); | ||
60 | 63 | ||
61 | /* call release callback */ | 64 | /* call release callback */ |
62 | rh->dom->release_jobs(rh->dom, &rh->heap); | 65 | rh->dom->release_jobs(rh->dom, &rh->heap); |
@@ -99,7 +102,9 @@ void release_heap_free(struct release_heap* rh) | |||
99 | * Will return heap for given time. If no such heap exists prior to the invocation | 102 | * Will return heap for given time. If no such heap exists prior to the invocation |
100 | * it will be created. | 103 | * it will be created. |
101 | */ | 104 | */ |
102 | static struct release_heap* get_release_heap(rt_domain_t *rt, struct task_struct* t) | 105 | static struct release_heap* get_release_heap(rt_domain_t *rt, |
106 | struct task_struct* t, | ||
107 | int use_task_heap) | ||
103 | { | 108 | { |
104 | struct list_head* pos; | 109 | struct list_head* pos; |
105 | struct release_heap* heap = NULL; | 110 | struct release_heap* heap = NULL; |
@@ -124,24 +129,12 @@ static struct release_heap* get_release_heap(rt_domain_t *rt, struct task_struct | |||
124 | break; | 129 | break; |
125 | } | 130 | } |
126 | } | 131 | } |
127 | if (!heap) { | 132 | if (!heap && use_task_heap) { |
128 | /* use pre-allocated release heap */ | 133 | /* use pre-allocated release heap */ |
129 | rh = tsk_rt(t)->rel_heap; | 134 | rh = tsk_rt(t)->rel_heap; |
130 | 135 | ||
131 | /* Make sure it is safe to use. The timer callback could still | ||
132 | * be executing on another CPU; hrtimer_cancel() will wait | ||
133 | * until the timer callback has completed. However, under no | ||
134 | * circumstances should the timer be active (= yet to be | ||
135 | * triggered). | ||
136 | */ | ||
137 | BUG_ON(hrtimer_cancel(&rh->timer)); | ||
138 | |||
139 | /* initialize */ | ||
140 | rh->release_time = release_time; | ||
141 | rh->dom = rt; | 136 | rh->dom = rt; |
142 | heap_init(&rh->heap); | 137 | rh->release_time = release_time; |
143 | |||
144 | atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); | ||
145 | 138 | ||
146 | /* add to release queue */ | 139 | /* add to release queue */ |
147 | list_add(&rh->list, pos->prev); | 140 | list_add(&rh->list, pos->prev); |
@@ -150,6 +143,45 @@ static struct release_heap* get_release_heap(rt_domain_t *rt, struct task_struct | |||
150 | return heap; | 143 | return heap; |
151 | } | 144 | } |
152 | 145 | ||
146 | static void reinit_release_heap(struct task_struct* t) | ||
147 | { | ||
148 | struct release_heap* rh; | ||
149 | |||
150 | /* use pre-allocated release heap */ | ||
151 | rh = tsk_rt(t)->rel_heap; | ||
152 | |||
153 | /* { | ||
154 | lt_t start = litmus_clock(); | ||
155 | int ret; | ||
156 | do { | ||
157 | if (lt_before(start + 1000000, litmus_clock())) { | ||
158 | TRACE_TASK(t, "BAD: timer still in use after 1ms! giving up.\n"); | ||
159 | break; | ||
160 | } | ||
161 | } while ((ret = hrtimer_try_to_cancel(&rh->timer)) == -1); | ||
162 | if (ret != 0) { | ||
163 | TRACE_TASK(t, "BAD: cancelled timer and got %d.\n", ret); | ||
164 | } | ||
165 | } | ||
166 | */ | ||
167 | |||
168 | /* Make sure it is safe to use. The timer callback could still | ||
169 | * be executing on another CPU; hrtimer_cancel() will wait | ||
170 | * until the timer callback has completed. However, under no | ||
171 | * circumstances should the timer be active (= yet to be | ||
172 | * triggered). | ||
173 | * | ||
174 | * WARNING: If the CPU still holds the release_lock at this point, | ||
175 | * deadlock may occur! | ||
176 | */ | ||
177 | BUG_ON(hrtimer_cancel(&rh->timer)); | ||
178 | |||
179 | /* initialize */ | ||
180 | heap_init(&rh->heap); | ||
181 | |||
182 | atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); | ||
183 | } | ||
184 | |||
153 | static void arm_release_timer(unsigned long _rt) | 185 | static void arm_release_timer(unsigned long _rt) |
154 | { | 186 | { |
155 | rt_domain_t *rt = (rt_domain_t*) _rt; | 187 | rt_domain_t *rt = (rt_domain_t*) _rt; |
@@ -175,10 +207,22 @@ static void arm_release_timer(unsigned long _rt) | |||
175 | 207 | ||
176 | /* put into release heap while holding release_lock */ | 208 | /* put into release heap while holding release_lock */ |
177 | spin_lock_irqsave(&rt->release_lock, flags); | 209 | spin_lock_irqsave(&rt->release_lock, flags); |
178 | rh = get_release_heap(rt, t); | 210 | TRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); |
211 | rh = get_release_heap(rt, t, 0); | ||
212 | if (!rh) { | ||
213 | /* need to use our own, but drop lock first */ | ||
214 | spin_unlock(&rt->release_lock); | ||
215 | TRACE_TASK(t, "Dropped release_lock 0x%p\n", &rt->release_lock); | ||
216 | reinit_release_heap(t); | ||
217 | TRACE_TASK(t, "release_heap ready\n"); | ||
218 | spin_lock(&rt->release_lock); | ||
219 | TRACE_TASK(t, "Re-acquired release_lock 0x%p\n", &rt->release_lock); | ||
220 | rh = get_release_heap(rt, t, 1); | ||
221 | } | ||
179 | heap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); | 222 | heap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); |
180 | TRACE_TASK(t, "arm_release_timer(): added to release heap\n"); | 223 | TRACE_TASK(t, "arm_release_timer(): added to release heap\n"); |
181 | spin_unlock_irqrestore(&rt->release_lock, flags); | 224 | spin_unlock_irqrestore(&rt->release_lock, flags); |
225 | TRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); | ||
182 | 226 | ||
183 | /* To avoid arming the timer multiple times, we only let the | 227 | /* To avoid arming the timer multiple times, we only let the |
184 | * owner do the arming (which is the "first" task to reference | 228 | * owner do the arming (which is the "first" task to reference |