aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempool.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempool.c')
-rw-r--r--mm/mempool.c61
1 files changed, 48 insertions, 13 deletions
diff --git a/mm/mempool.c b/mm/mempool.c
index e73641b79bb5..11f0d0a5e0f8 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -224,28 +224,31 @@ repeat_alloc:
224 if (likely(pool->curr_nr)) { 224 if (likely(pool->curr_nr)) {
225 element = remove_element(pool); 225 element = remove_element(pool);
226 spin_unlock_irqrestore(&pool->lock, flags); 226 spin_unlock_irqrestore(&pool->lock, flags);
227 /* paired with rmb in mempool_free(), read comment there */
228 smp_wmb();
227 return element; 229 return element;
228 } 230 }
229 spin_unlock_irqrestore(&pool->lock, flags);
230 231
231 /* We must not sleep in the GFP_ATOMIC case */ 232 /* We must not sleep in the GFP_ATOMIC case */
232 if (!(gfp_mask & __GFP_WAIT)) 233 if (!(gfp_mask & __GFP_WAIT)) {
234 spin_unlock_irqrestore(&pool->lock, flags);
233 return NULL; 235 return NULL;
236 }
234 237
235 /* Now start performing page reclaim */ 238 /* Let's wait for someone else to return an element to @pool */
236 gfp_temp = gfp_mask; 239 gfp_temp = gfp_mask;
237 init_wait(&wait); 240 init_wait(&wait);
238 prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 241 prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
239 smp_mb();
240 if (!pool->curr_nr) {
241 /*
242 * FIXME: this should be io_schedule(). The timeout is there
243 * as a workaround for some DM problems in 2.6.18.
244 */
245 io_schedule_timeout(5*HZ);
246 }
247 finish_wait(&pool->wait, &wait);
248 242
243 spin_unlock_irqrestore(&pool->lock, flags);
244
245 /*
246 * FIXME: this should be io_schedule(). The timeout is there as a
247 * workaround for some DM problems in 2.6.18.
248 */
249 io_schedule_timeout(5*HZ);
250
251 finish_wait(&pool->wait, &wait);
249 goto repeat_alloc; 252 goto repeat_alloc;
250} 253}
251EXPORT_SYMBOL(mempool_alloc); 254EXPORT_SYMBOL(mempool_alloc);
@@ -265,7 +268,39 @@ void mempool_free(void *element, mempool_t *pool)
265 if (unlikely(element == NULL)) 268 if (unlikely(element == NULL))
266 return; 269 return;
267 270
268 smp_mb(); 271 /*
272 * Paired with the wmb in mempool_alloc(). The preceding read is
273 * for @element and the following @pool->curr_nr. This ensures
274 * that the visible value of @pool->curr_nr is from after the
275 * allocation of @element. This is necessary for fringe cases
276 * where @element was passed to this task without going through
277 * barriers.
278 *
279 * For example, assume @p is %NULL at the beginning and one task
280 * performs "p = mempool_alloc(...);" while another task is doing
281 * "while (!p) cpu_relax(); mempool_free(p, ...);". This function
282 * may end up using curr_nr value which is from before allocation
283 * of @p without the following rmb.
284 */
285 smp_rmb();
286
287 /*
288 * For correctness, we need a test which is guaranteed to trigger
289 * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr
290 * without locking achieves that and refilling as soon as possible
291 * is desirable.
292 *
293 * Because curr_nr visible here is always a value after the
294 * allocation of @element, any task which decremented curr_nr below
295 * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets
296 * incremented to min_nr afterwards. If curr_nr gets incremented
297 * to min_nr after the allocation of @element, the elements
298 * allocated after that are subject to the same guarantee.
299 *
300 * Waiters happen iff curr_nr is 0 and the above guarantee also
301 * ensures that there will be frees which return elements to the
302 * pool waking up the waiters.
303 */
269 if (pool->curr_nr < pool->min_nr) { 304 if (pool->curr_nr < pool->min_nr) {
270 spin_lock_irqsave(&pool->lock, flags); 305 spin_lock_irqsave(&pool->lock, flags);
271 if (pool->curr_nr < pool->min_nr) { 306 if (pool->curr_nr < pool->min_nr) {