aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-core.c369
-rw-r--r--block/blk-flush.c3
-rw-r--r--block/elevator.c6
-rw-r--r--include/linux/blk_types.h2
-rw-r--r--include/linux/blkdev.h42
-rw-r--r--include/linux/elevator.h1
-rw-r--r--include/linux/sched.h6
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/sched.c12
10 files changed, 344 insertions, 101 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index e958c7a1e462..6efb55cc5af0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -27,6 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/fault-inject.h> 29#include <linux/fault-inject.h>
30#include <linux/list_sort.h>
30 31
31#define CREATE_TRACE_POINTS 32#define CREATE_TRACE_POINTS
32#include <trace/events/block.h> 33#include <trace/events/block.h>
@@ -203,7 +204,7 @@ static void blk_delay_work(struct work_struct *work)
203 204
204 q = container_of(work, struct request_queue, delay_work.work); 205 q = container_of(work, struct request_queue, delay_work.work);
205 spin_lock_irq(q->queue_lock); 206 spin_lock_irq(q->queue_lock);
206 q->request_fn(q); 207 __blk_run_queue(q);
207 spin_unlock_irq(q->queue_lock); 208 spin_unlock_irq(q->queue_lock);
208} 209}
209 210
@@ -686,6 +687,8 @@ int blk_get_queue(struct request_queue *q)
686 687
687static inline void blk_free_request(struct request_queue *q, struct request *rq) 688static inline void blk_free_request(struct request_queue *q, struct request *rq)
688{ 689{
690 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
691
689 if (rq->cmd_flags & REQ_ELVPRIV) 692 if (rq->cmd_flags & REQ_ELVPRIV)
690 elv_put_request(q, rq); 693 elv_put_request(q, rq);
691 mempool_free(rq, q->rq.rq_pool); 694 mempool_free(rq, q->rq.rq_pool);
@@ -1051,6 +1054,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
1051} 1054}
1052EXPORT_SYMBOL(blk_requeue_request); 1055EXPORT_SYMBOL(blk_requeue_request);
1053 1056
1057static void add_acct_request(struct request_queue *q, struct request *rq,
1058 int where)
1059{
1060 drive_stat_acct(rq, 1);
1061 __elv_add_request(q, rq, where, 0);
1062}
1063
1054/** 1064/**
1055 * blk_insert_request - insert a special request into a request queue 1065 * blk_insert_request - insert a special request into a request queue
1056 * @q: request queue where request should be inserted 1066 * @q: request queue where request should be inserted
@@ -1093,8 +1103,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
1093 if (blk_rq_tagged(rq)) 1103 if (blk_rq_tagged(rq))
1094 blk_queue_end_tag(q, rq); 1104 blk_queue_end_tag(q, rq);
1095 1105
1096 drive_stat_acct(rq, 1); 1106 add_acct_request(q, rq, where);
1097 __elv_add_request(q, rq, where, 0);
1098 __blk_run_queue(q); 1107 __blk_run_queue(q);
1099 spin_unlock_irqrestore(q->queue_lock, flags); 1108 spin_unlock_irqrestore(q->queue_lock, flags);
1100} 1109}
@@ -1215,6 +1224,113 @@ void blk_add_request_payload(struct request *rq, struct page *page,
1215} 1224}
1216EXPORT_SYMBOL_GPL(blk_add_request_payload); 1225EXPORT_SYMBOL_GPL(blk_add_request_payload);
1217 1226
1227static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1228 struct bio *bio)
1229{
1230 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1231
1232 /*
1233 * Debug stuff, kill later
1234 */
1235 if (!rq_mergeable(req)) {
1236 blk_dump_rq_flags(req, "back");
1237 return false;
1238 }
1239
1240 if (!ll_back_merge_fn(q, req, bio))
1241 return false;
1242
1243 trace_block_bio_backmerge(q, bio);
1244
1245 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1246 blk_rq_set_mixed_merge(req);
1247
1248 req->biotail->bi_next = bio;
1249 req->biotail = bio;
1250 req->__data_len += bio->bi_size;
1251 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1252
1253 drive_stat_acct(req, 0);
1254 return true;
1255}
1256
1257static bool bio_attempt_front_merge(struct request_queue *q,
1258 struct request *req, struct bio *bio)
1259{
1260 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1261 sector_t sector;
1262
1263 /*
1264 * Debug stuff, kill later
1265 */
1266 if (!rq_mergeable(req)) {
1267 blk_dump_rq_flags(req, "front");
1268 return false;
1269 }
1270
1271 if (!ll_front_merge_fn(q, req, bio))
1272 return false;
1273
1274 trace_block_bio_frontmerge(q, bio);
1275
1276 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1277 blk_rq_set_mixed_merge(req);
1278
1279 sector = bio->bi_sector;
1280
1281 bio->bi_next = req->bio;
1282 req->bio = bio;
1283
1284 /*
1285 * may not be valid. if the low level driver said
1286 * it didn't need a bounce buffer then it better
1287 * not touch req->buffer either...
1288 */
1289 req->buffer = bio_data(bio);
1290 req->__sector = bio->bi_sector;
1291 req->__data_len += bio->bi_size;
1292 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1293
1294 drive_stat_acct(req, 0);
1295 return true;
1296}
1297
1298/*
1299 * Attempts to merge with the plugged list in the current process. Returns
1300 * true if merge was succesful, otherwise false.
1301 */
1302static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1303 struct bio *bio)
1304{
1305 struct blk_plug *plug;
1306 struct request *rq;
1307 bool ret = false;
1308
1309 plug = tsk->plug;
1310 if (!plug)
1311 goto out;
1312
1313 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1314 int el_ret;
1315
1316 if (rq->q != q)
1317 continue;
1318
1319 el_ret = elv_try_merge(rq, bio);
1320 if (el_ret == ELEVATOR_BACK_MERGE) {
1321 ret = bio_attempt_back_merge(q, rq, bio);
1322 if (ret)
1323 break;
1324 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1325 ret = bio_attempt_front_merge(q, rq, bio);
1326 if (ret)
1327 break;
1328 }
1329 }
1330out:
1331 return ret;
1332}
1333
1218void init_request_from_bio(struct request *req, struct bio *bio) 1334void init_request_from_bio(struct request *req, struct bio *bio)
1219{ 1335{
1220 req->cpu = bio->bi_comp_cpu; 1336 req->cpu = bio->bi_comp_cpu;
@@ -1230,26 +1346,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1230 blk_rq_bio_prep(req->q, req, bio); 1346 blk_rq_bio_prep(req->q, req, bio);
1231} 1347}
1232 1348
1233/*
1234 * Only disabling plugging for non-rotational devices if it does tagging
1235 * as well, otherwise we do need the proper merging
1236 */
1237static inline bool queue_should_plug(struct request_queue *q)
1238{
1239 return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1240}
1241
1242static int __make_request(struct request_queue *q, struct bio *bio) 1349static int __make_request(struct request_queue *q, struct bio *bio)
1243{ 1350{
1244 struct request *req;
1245 int el_ret;
1246 unsigned int bytes = bio->bi_size;
1247 const unsigned short prio = bio_prio(bio);
1248 const bool sync = !!(bio->bi_rw & REQ_SYNC); 1351 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1249 const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); 1352 struct blk_plug *plug;
1250 const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; 1353 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1251 int where = ELEVATOR_INSERT_SORT; 1354 struct request *req;
1252 int rw_flags;
1253 1355
1254 /* 1356 /*
1255 * low level driver can indicate that it wants pages above a 1357 * low level driver can indicate that it wants pages above a
@@ -1258,78 +1360,36 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1258 */ 1360 */
1259 blk_queue_bounce(q, &bio); 1361 blk_queue_bounce(q, &bio);
1260 1362
1261 spin_lock_irq(q->queue_lock);
1262
1263 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { 1363 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1364 spin_lock_irq(q->queue_lock);
1264 where = ELEVATOR_INSERT_FLUSH; 1365 where = ELEVATOR_INSERT_FLUSH;
1265 goto get_rq; 1366 goto get_rq;
1266 } 1367 }
1267 1368
1268 if (elv_queue_empty(q)) 1369 /*
1269 goto get_rq; 1370 * Check if we can merge with the plugged list before grabbing
1270 1371 * any locks.
1271 el_ret = elv_merge(q, &req, bio); 1372 */
1272 switch (el_ret) { 1373 if (attempt_plug_merge(current, q, bio))
1273 case ELEVATOR_BACK_MERGE:
1274 BUG_ON(!rq_mergeable(req));
1275
1276 if (!ll_back_merge_fn(q, req, bio))
1277 break;
1278
1279 trace_block_bio_backmerge(q, bio);
1280
1281 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1282 blk_rq_set_mixed_merge(req);
1283
1284 req->biotail->bi_next = bio;
1285 req->biotail = bio;
1286 req->__data_len += bytes;
1287 req->ioprio = ioprio_best(req->ioprio, prio);
1288 if (!blk_rq_cpu_valid(req))
1289 req->cpu = bio->bi_comp_cpu;
1290 drive_stat_acct(req, 0);
1291 elv_bio_merged(q, req, bio);
1292 if (!attempt_back_merge(q, req))
1293 elv_merged_request(q, req, el_ret);
1294 goto out; 1374 goto out;
1295 1375
1296 case ELEVATOR_FRONT_MERGE: 1376 spin_lock_irq(q->queue_lock);
1297 BUG_ON(!rq_mergeable(req));
1298
1299 if (!ll_front_merge_fn(q, req, bio))
1300 break;
1301
1302 trace_block_bio_frontmerge(q, bio);
1303 1377
1304 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { 1378 el_ret = elv_merge(q, &req, bio);
1305 blk_rq_set_mixed_merge(req); 1379 if (el_ret == ELEVATOR_BACK_MERGE) {
1306 req->cmd_flags &= ~REQ_FAILFAST_MASK; 1380 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1307 req->cmd_flags |= ff; 1381 if (bio_attempt_back_merge(q, req, bio)) {
1382 if (!attempt_back_merge(q, req))
1383 elv_merged_request(q, req, el_ret);
1384 goto out_unlock;
1385 }
1386 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1387 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1388 if (bio_attempt_front_merge(q, req, bio)) {
1389 if (!attempt_front_merge(q, req))
1390 elv_merged_request(q, req, el_ret);
1391 goto out_unlock;
1308 } 1392 }
1309
1310 bio->bi_next = req->bio;
1311 req->bio = bio;
1312
1313 /*
1314 * may not be valid. if the low level driver said
1315 * it didn't need a bounce buffer then it better
1316 * not touch req->buffer either...
1317 */
1318 req->buffer = bio_data(bio);
1319 req->__sector = bio->bi_sector;
1320 req->__data_len += bytes;
1321 req->ioprio = ioprio_best(req->ioprio, prio);
1322 if (!blk_rq_cpu_valid(req))
1323 req->cpu = bio->bi_comp_cpu;
1324 drive_stat_acct(req, 0);
1325 elv_bio_merged(q, req, bio);
1326 if (!attempt_front_merge(q, req))
1327 elv_merged_request(q, req, el_ret);
1328 goto out;
1329
1330 /* ELV_NO_MERGE: elevator says don't/can't merge. */
1331 default:
1332 ;
1333 } 1393 }
1334 1394
1335get_rq: 1395get_rq:
@@ -1356,20 +1416,35 @@ get_rq:
1356 */ 1416 */
1357 init_request_from_bio(req, bio); 1417 init_request_from_bio(req, bio);
1358 1418
1359 spin_lock_irq(q->queue_lock);
1360 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1419 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1361 bio_flagged(bio, BIO_CPU_AFFINE)) 1420 bio_flagged(bio, BIO_CPU_AFFINE)) {
1362 req->cpu = blk_cpu_to_group(smp_processor_id()); 1421 req->cpu = blk_cpu_to_group(get_cpu());
1363 if (queue_should_plug(q) && elv_queue_empty(q)) 1422 put_cpu();
1364 blk_plug_device(q); 1423 }
1365 1424
1366 /* insert the request into the elevator */ 1425 plug = current->plug;
1367 drive_stat_acct(req, 1); 1426 if (plug && !sync) {
1368 __elv_add_request(q, req, where, 0); 1427 if (!plug->should_sort && !list_empty(&plug->list)) {
1428 struct request *__rq;
1429
1430 __rq = list_entry_rq(plug->list.prev);
1431 if (__rq->q != q)
1432 plug->should_sort = 1;
1433 }
1434 /*
1435 * Debug flag, kill later
1436 */
1437 req->cmd_flags |= REQ_ON_PLUG;
1438 list_add_tail(&req->queuelist, &plug->list);
1439 drive_stat_acct(req, 1);
1440 } else {
1441 spin_lock_irq(q->queue_lock);
1442 add_acct_request(q, req, where);
1443 __blk_run_queue(q);
1444out_unlock:
1445 spin_unlock_irq(q->queue_lock);
1446 }
1369out: 1447out:
1370 if (unplug || !queue_should_plug(q))
1371 __generic_unplug_device(q);
1372 spin_unlock_irq(q->queue_lock);
1373 return 0; 1448 return 0;
1374} 1449}
1375 1450
@@ -1772,9 +1847,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1772 */ 1847 */
1773 BUG_ON(blk_queued_rq(rq)); 1848 BUG_ON(blk_queued_rq(rq));
1774 1849
1775 drive_stat_acct(rq, 1); 1850 add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
1776 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1777
1778 spin_unlock_irqrestore(q->queue_lock, flags); 1851 spin_unlock_irqrestore(q->queue_lock, flags);
1779 1852
1780 return 0; 1853 return 0;
@@ -2659,6 +2732,106 @@ int kblockd_schedule_delayed_work(struct request_queue *q,
2659} 2732}
2660EXPORT_SYMBOL(kblockd_schedule_delayed_work); 2733EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2661 2734
2735#define PLUG_MAGIC 0x91827364
2736
2737void blk_start_plug(struct blk_plug *plug)
2738{
2739 struct task_struct *tsk = current;
2740
2741 plug->magic = PLUG_MAGIC;
2742 INIT_LIST_HEAD(&plug->list);
2743 plug->should_sort = 0;
2744
2745 /*
2746 * If this is a nested plug, don't actually assign it. It will be
2747 * flushed on its own.
2748 */
2749 if (!tsk->plug) {
2750 /*
2751 * Store ordering should not be needed here, since a potential
2752 * preempt will imply a full memory barrier
2753 */
2754 tsk->plug = plug;
2755 }
2756}
2757EXPORT_SYMBOL(blk_start_plug);
2758
2759static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2760{
2761 struct request *rqa = container_of(a, struct request, queuelist);
2762 struct request *rqb = container_of(b, struct request, queuelist);
2763
2764 return !(rqa->q == rqb->q);
2765}
2766
2767static void flush_plug_list(struct blk_plug *plug)
2768{
2769 struct request_queue *q;
2770 unsigned long flags;
2771 struct request *rq;
2772
2773 BUG_ON(plug->magic != PLUG_MAGIC);
2774
2775 if (list_empty(&plug->list))
2776 return;
2777
2778 if (plug->should_sort)
2779 list_sort(NULL, &plug->list, plug_rq_cmp);
2780
2781 q = NULL;
2782 local_irq_save(flags);
2783 while (!list_empty(&plug->list)) {
2784 rq = list_entry_rq(plug->list.next);
2785 list_del_init(&rq->queuelist);
2786 BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
2787 BUG_ON(!rq->q);
2788 if (rq->q != q) {
2789 if (q) {
2790 __blk_run_queue(q);
2791 spin_unlock(q->queue_lock);
2792 }
2793 q = rq->q;
2794 spin_lock(q->queue_lock);
2795 }
2796 rq->cmd_flags &= ~REQ_ON_PLUG;
2797
2798 /*
2799 * rq is already accounted, so use raw insert
2800 */
2801 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT, 0);
2802 }
2803
2804 if (q) {
2805 __blk_run_queue(q);
2806 spin_unlock(q->queue_lock);
2807 }
2808
2809 BUG_ON(!list_empty(&plug->list));
2810 local_irq_restore(flags);
2811}
2812
2813static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
2814{
2815 flush_plug_list(plug);
2816
2817 if (plug == tsk->plug)
2818 tsk->plug = NULL;
2819}
2820
2821void blk_finish_plug(struct blk_plug *plug)
2822{
2823 if (plug)
2824 __blk_finish_plug(current, plug);
2825}
2826EXPORT_SYMBOL(blk_finish_plug);
2827
2828void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
2829{
2830 __blk_finish_plug(tsk, plug);
2831 tsk->plug = plug;
2832}
2833EXPORT_SYMBOL(__blk_flush_plug);
2834
2662int __init blk_dev_init(void) 2835int __init blk_dev_init(void)
2663{ 2836{
2664 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2837 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a867e3f524f3..1e2aa8a8908c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -264,10 +264,9 @@ static bool blk_kick_flush(struct request_queue *q)
264static void flush_data_end_io(struct request *rq, int error) 264static void flush_data_end_io(struct request *rq, int error)
265{ 265{
266 struct request_queue *q = rq->q; 266 struct request_queue *q = rq->q;
267 bool was_empty = elv_queue_empty(q);
268 267
269 /* after populating an empty queue, kick it to avoid stall */ 268 /* after populating an empty queue, kick it to avoid stall */
270 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty) 269 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
271 __blk_run_queue(q); 270 __blk_run_queue(q);
272} 271}
273 272
diff --git a/block/elevator.c b/block/elevator.c
index f98e92edc937..25713927c0d3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
113} 113}
114EXPORT_SYMBOL(elv_rq_merge_ok); 114EXPORT_SYMBOL(elv_rq_merge_ok);
115 115
116static inline int elv_try_merge(struct request *__rq, struct bio *bio) 116int elv_try_merge(struct request *__rq, struct bio *bio)
117{ 117{
118 int ret = ELEVATOR_NO_MERGE; 118 int ret = ELEVATOR_NO_MERGE;
119 119
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
421 struct list_head *entry; 421 struct list_head *entry;
422 int stop_flags; 422 int stop_flags;
423 423
424 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
425
424 if (q->last_merge == rq) 426 if (q->last_merge == rq)
425 q->last_merge = NULL; 427 q->last_merge = NULL;
426 428
@@ -696,6 +698,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
696void __elv_add_request(struct request_queue *q, struct request *rq, int where, 698void __elv_add_request(struct request_queue *q, struct request *rq, int where,
697 int plug) 699 int plug)
698{ 700{
701 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
702
699 if (rq->cmd_flags & REQ_SOFTBARRIER) { 703 if (rq->cmd_flags & REQ_SOFTBARRIER) {
700 /* barriers are scheduling boundary, update end_sector */ 704 /* barriers are scheduling boundary, update end_sector */
701 if (rq->cmd_type == REQ_TYPE_FS || 705 if (rq->cmd_type == REQ_TYPE_FS ||
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dddedfc0af81..16b286473042 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -152,6 +152,7 @@ enum rq_flag_bits {
152 __REQ_IO_STAT, /* account I/O stat */ 152 __REQ_IO_STAT, /* account I/O stat */
153 __REQ_MIXED_MERGE, /* merge of different types, fail separately */ 153 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
154 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ 154 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
155 __REQ_ON_PLUG, /* on plug list */
155 __REQ_NR_BITS, /* stops here */ 156 __REQ_NR_BITS, /* stops here */
156}; 157};
157 158
@@ -193,5 +194,6 @@ enum rq_flag_bits {
193#define REQ_IO_STAT (1 << __REQ_IO_STAT) 194#define REQ_IO_STAT (1 << __REQ_IO_STAT)
194#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) 195#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
195#define REQ_SECURE (1 << __REQ_SECURE) 196#define REQ_SECURE (1 << __REQ_SECURE)
197#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)
196 198
197#endif /* __LINUX_BLK_TYPES_H */ 199#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f55b2a8b6610..5873037eeb91 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
871struct request_queue *blk_alloc_queue_node(gfp_t, int); 871struct request_queue *blk_alloc_queue_node(gfp_t, int);
872extern void blk_put_queue(struct request_queue *); 872extern void blk_put_queue(struct request_queue *);
873 873
874struct blk_plug {
875 unsigned long magic;
876 struct list_head list;
877 unsigned int should_sort;
878};
879
880extern void blk_start_plug(struct blk_plug *);
881extern void blk_finish_plug(struct blk_plug *);
882extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
883
884static inline void blk_flush_plug(struct task_struct *tsk)
885{
886 struct blk_plug *plug = tsk->plug;
887
888 if (unlikely(plug))
889 __blk_flush_plug(tsk, plug);
890}
891
892static inline bool blk_needs_flush_plug(struct task_struct *tsk)
893{
894 struct blk_plug *plug = tsk->plug;
895
896 return plug && !list_empty(&plug->list);
897}
898
874/* 899/*
875 * tag stuff 900 * tag stuff
876 */ 901 */
@@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void)
1294 return 0; 1319 return 0;
1295} 1320}
1296 1321
1322static inline void blk_start_plug(struct list_head *list)
1323{
1324}
1325
1326static inline void blk_finish_plug(struct list_head *list)
1327{
1328}
1329
1330static inline void blk_flush_plug(struct task_struct *tsk)
1331{
1332}
1333
1334static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1335{
1336 return false;
1337}
1338
1297#endif /* CONFIG_BLOCK */ 1339#endif /* CONFIG_BLOCK */
1298 1340
1299#endif 1341#endif
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 39b68edb388d..8857cf9adbb7 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -105,6 +105,7 @@ extern void elv_add_request(struct request_queue *, struct request *, int, int);
105extern void __elv_add_request(struct request_queue *, struct request *, int, int); 105extern void __elv_add_request(struct request_queue *, struct request *, int, int);
106extern void elv_insert(struct request_queue *, struct request *, int); 106extern void elv_insert(struct request_queue *, struct request *, int);
107extern int elv_merge(struct request_queue *, struct request **, struct bio *); 107extern int elv_merge(struct request_queue *, struct request **, struct bio *);
108extern int elv_try_merge(struct request *, struct bio *);
108extern void elv_merge_requests(struct request_queue *, struct request *, 109extern void elv_merge_requests(struct request_queue *, struct request *,
109 struct request *); 110 struct request *);
110extern void elv_merged_request(struct request_queue *, struct request *, int); 111extern void elv_merged_request(struct request_queue *, struct request *, int);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 777d8a5ed06b..96ac22643742 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -99,6 +99,7 @@ struct robust_list_head;
99struct bio_list; 99struct bio_list;
100struct fs_struct; 100struct fs_struct;
101struct perf_event_context; 101struct perf_event_context;
102struct blk_plug;
102 103
103/* 104/*
104 * List of flags we want to share for kernel threads, 105 * List of flags we want to share for kernel threads,
@@ -1429,6 +1430,11 @@ struct task_struct {
1429/* stacked block device info */ 1430/* stacked block device info */
1430 struct bio_list *bio_list; 1431 struct bio_list *bio_list;
1431 1432
1433#ifdef CONFIG_BLOCK
1434/* stack plugging */
1435 struct blk_plug *plug;
1436#endif
1437
1432/* VM state */ 1438/* VM state */
1433 struct reclaim_state *reclaim_state; 1439 struct reclaim_state *reclaim_state;
1434 1440
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b1..6a488ad2dce5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
908 profile_task_exit(tsk); 908 profile_task_exit(tsk);
909 909
910 WARN_ON(atomic_read(&tsk->fs_excl)); 910 WARN_ON(atomic_read(&tsk->fs_excl));
911 WARN_ON(blk_needs_flush_plug(tsk));
911 912
912 if (unlikely(in_interrupt())) 913 if (unlikely(in_interrupt()))
913 panic("Aiee, killing interrupt handler!"); 914 panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index 25e429152ddc..027c80e5162f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1204 * Clear TID on mm_release()? 1204 * Clear TID on mm_release()?
1205 */ 1205 */
1206 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1206 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1207#ifdef CONFIG_BLOCK
1208 p->plug = NULL;
1209#endif
1207#ifdef CONFIG_FUTEX 1210#ifdef CONFIG_FUTEX
1208 p->robust_list = NULL; 1211 p->robust_list = NULL;
1209#ifdef CONFIG_COMPAT 1212#ifdef CONFIG_COMPAT
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7ba..ca098bf4cc65 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3978,6 +3978,16 @@ need_resched_nonpreemptible:
3978 switch_count = &prev->nvcsw; 3978 switch_count = &prev->nvcsw;
3979 } 3979 }
3980 3980
3981 /*
3982 * If we are going to sleep and we have plugged IO queued, make
3983 * sure to submit it to avoid deadlocks.
3984 */
3985 if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
3986 raw_spin_unlock(&rq->lock);
3987 blk_flush_plug(prev);
3988 raw_spin_lock(&rq->lock);
3989 }
3990
3981 pre_schedule(rq, prev); 3991 pre_schedule(rq, prev);
3982 3992
3983 if (unlikely(!rq->nr_running)) 3993 if (unlikely(!rq->nr_running))
@@ -5333,6 +5343,7 @@ void __sched io_schedule(void)
5333 5343
5334 delayacct_blkio_start(); 5344 delayacct_blkio_start();
5335 atomic_inc(&rq->nr_iowait); 5345 atomic_inc(&rq->nr_iowait);
5346 blk_flush_plug(current);
5336 current->in_iowait = 1; 5347 current->in_iowait = 1;
5337 schedule(); 5348 schedule();
5338 current->in_iowait = 0; 5349 current->in_iowait = 0;
@@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout)
5348 5359
5349 delayacct_blkio_start(); 5360 delayacct_blkio_start();
5350 atomic_inc(&rq->nr_iowait); 5361 atomic_inc(&rq->nr_iowait);
5362 blk_flush_plug(current);
5351 current->in_iowait = 1; 5363 current->in_iowait = 1;
5352 ret = schedule_timeout(timeout); 5364 ret = schedule_timeout(timeout);
5353 current->in_iowait = 0; 5365 current->in_iowait = 0;