aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2009-05-03 22:42:18 -0400
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2009-05-03 22:42:18 -0400
commit536a1b04a8a02f1f5f4510cea657a3612e2610f5 (patch)
tree3b2368ae89c4c87ef450c7b8b84bdb30a9fedf3f
parentfa940ad71184dec3d156b7dd673ee38067e7c0f6 (diff)
rt domain: fix rare deadlock between timer base lock and release lock
If a timer happens to go off at the wrong time on the wrong CPU, the old code could deadlock. This avoided in the new version by dropping the release_lock before using the hrtimer API.
-rw-r--r--litmus/rt_domain.c76
1 files changed, 60 insertions, 16 deletions
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 8065a2c0bd..a46b7e8e72 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -1,3 +1,4 @@
1
1/* 2/*
2 * kernel/rt_domain.c 3 * kernel/rt_domain.c
3 * 4 *
@@ -54,9 +55,11 @@ static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
54 rh = container_of(timer, struct release_heap, timer); 55 rh = container_of(timer, struct release_heap, timer);
55 56
56 spin_lock_irqsave(&rh->dom->release_lock, flags); 57 spin_lock_irqsave(&rh->dom->release_lock, flags);
58 TRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
57 /* remove from release queue */ 59 /* remove from release queue */
58 list_del(&rh->list); 60 list_del(&rh->list);
59 spin_unlock_irqrestore(&rh->dom->release_lock, flags); 61 spin_unlock_irqrestore(&rh->dom->release_lock, flags);
62 TRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
60 63
61 /* call release callback */ 64 /* call release callback */
62 rh->dom->release_jobs(rh->dom, &rh->heap); 65 rh->dom->release_jobs(rh->dom, &rh->heap);
@@ -99,7 +102,9 @@ void release_heap_free(struct release_heap* rh)
99 * Will return heap for given time. If no such heap exists prior to the invocation 102 * Will return heap for given time. If no such heap exists prior to the invocation
100 * it will be created. 103 * it will be created.
101 */ 104 */
102static struct release_heap* get_release_heap(rt_domain_t *rt, struct task_struct* t) 105static struct release_heap* get_release_heap(rt_domain_t *rt,
106 struct task_struct* t,
107 int use_task_heap)
103{ 108{
104 struct list_head* pos; 109 struct list_head* pos;
105 struct release_heap* heap = NULL; 110 struct release_heap* heap = NULL;
@@ -124,24 +129,12 @@ static struct release_heap* get_release_heap(rt_domain_t *rt, struct task_struct
124 break; 129 break;
125 } 130 }
126 } 131 }
127 if (!heap) { 132 if (!heap && use_task_heap) {
128 /* use pre-allocated release heap */ 133 /* use pre-allocated release heap */
129 rh = tsk_rt(t)->rel_heap; 134 rh = tsk_rt(t)->rel_heap;
130 135
131 /* Make sure it is safe to use. The timer callback could still
132 * be executing on another CPU; hrtimer_cancel() will wait
133 * until the timer callback has completed. However, under no
134 * circumstances should the timer be active (= yet to be
135 * triggered).
136 */
137 BUG_ON(hrtimer_cancel(&rh->timer));
138
139 /* initialize */
140 rh->release_time = release_time;
141 rh->dom = rt; 136 rh->dom = rt;
142 heap_init(&rh->heap); 137 rh->release_time = release_time;
143
144 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
145 138
146 /* add to release queue */ 139 /* add to release queue */
147 list_add(&rh->list, pos->prev); 140 list_add(&rh->list, pos->prev);
@@ -150,6 +143,45 @@ static struct release_heap* get_release_heap(rt_domain_t *rt, struct task_struct
150 return heap; 143 return heap;
151} 144}
152 145
146static void reinit_release_heap(struct task_struct* t)
147{
148 struct release_heap* rh;
149
150 /* use pre-allocated release heap */
151 rh = tsk_rt(t)->rel_heap;
152
153/* {
154 lt_t start = litmus_clock();
155 int ret;
156 do {
157 if (lt_before(start + 1000000, litmus_clock())) {
158 TRACE_TASK(t, "BAD: timer still in use after 1ms! giving up.\n");
159 break;
160 }
161 } while ((ret = hrtimer_try_to_cancel(&rh->timer)) == -1);
162 if (ret != 0) {
163 TRACE_TASK(t, "BAD: cancelled timer and got %d.\n", ret);
164 }
165 }
166*/
167
168 /* Make sure it is safe to use. The timer callback could still
169 * be executing on another CPU; hrtimer_cancel() will wait
170 * until the timer callback has completed. However, under no
171 * circumstances should the timer be active (= yet to be
172 * triggered).
173 *
174 * WARNING: If the CPU still holds the release_lock at this point,
175 * deadlock may occur!
176 */
177 BUG_ON(hrtimer_cancel(&rh->timer));
178
179 /* initialize */
180 heap_init(&rh->heap);
181
182 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
183}
184
153static void arm_release_timer(unsigned long _rt) 185static void arm_release_timer(unsigned long _rt)
154{ 186{
155 rt_domain_t *rt = (rt_domain_t*) _rt; 187 rt_domain_t *rt = (rt_domain_t*) _rt;
@@ -175,10 +207,22 @@ static void arm_release_timer(unsigned long _rt)
175 207
176 /* put into release heap while holding release_lock */ 208 /* put into release heap while holding release_lock */
177 spin_lock_irqsave(&rt->release_lock, flags); 209 spin_lock_irqsave(&rt->release_lock, flags);
178 rh = get_release_heap(rt, t); 210 TRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
211 rh = get_release_heap(rt, t, 0);
212 if (!rh) {
213 /* need to use our own, but drop lock first */
214 spin_unlock(&rt->release_lock);
215 TRACE_TASK(t, "Dropped release_lock 0x%p\n", &rt->release_lock);
216 reinit_release_heap(t);
217 TRACE_TASK(t, "release_heap ready\n");
218 spin_lock(&rt->release_lock);
219 TRACE_TASK(t, "Re-acquired release_lock 0x%p\n", &rt->release_lock);
220 rh = get_release_heap(rt, t, 1);
221 }
179 heap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); 222 heap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
180 TRACE_TASK(t, "arm_release_timer(): added to release heap\n"); 223 TRACE_TASK(t, "arm_release_timer(): added to release heap\n");
181 spin_unlock_irqrestore(&rt->release_lock, flags); 224 spin_unlock_irqrestore(&rt->release_lock, flags);
225 TRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
182 226
183 /* To avoid arming the timer multiple times, we only let the 227 /* To avoid arming the timer multiple times, we only let the
184 * owner do the arming (which is the "first" task to reference 228 * owner do the arming (which is the "first" task to reference