aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_req.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_req.c')
-rw-r--r--drivers/block/drbd/drbd_req.c192
1 files changed, 161 insertions, 31 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 2b8303ad63c9..c24379ffd4e3 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -34,14 +34,14 @@
34static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size); 34static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size);
35 35
36/* Update disk stats at start of I/O request */ 36/* Update disk stats at start of I/O request */
37static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) 37static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
38{ 38{
39 const int rw = bio_data_dir(bio); 39 const int rw = bio_data_dir(req->master_bio);
40 int cpu; 40 int cpu;
41 cpu = part_stat_lock(); 41 cpu = part_stat_lock();
42 part_round_stats(cpu, &mdev->vdisk->part0); 42 part_round_stats(cpu, &mdev->vdisk->part0);
43 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); 43 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
44 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); 44 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], req->i.size >> 9);
45 (void) cpu; /* The macro invocations above want the cpu argument, I do not like 45 (void) cpu; /* The macro invocations above want the cpu argument, I do not like
46 the compiler warning about cpu only assigned but never used... */ 46 the compiler warning about cpu only assigned but never used... */
47 part_inc_in_flight(&mdev->vdisk->part0, rw); 47 part_inc_in_flight(&mdev->vdisk->part0, rw);
@@ -263,8 +263,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
263 else 263 else
264 root = &mdev->read_requests; 264 root = &mdev->read_requests;
265 drbd_remove_request_interval(root, req); 265 drbd_remove_request_interval(root, req);
266 } else if (!(s & RQ_POSTPONED)) 266 }
267 D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
268 267
269 /* Before we can signal completion to the upper layers, 268 /* Before we can signal completion to the upper layers,
270 * we may need to close the current transfer log epoch. 269 * we may need to close the current transfer log epoch.
@@ -755,6 +754,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
755 D_ASSERT(req->rq_state & RQ_NET_PENDING); 754 D_ASSERT(req->rq_state & RQ_NET_PENDING);
756 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE); 755 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
757 break; 756 break;
757
758 case QUEUE_AS_DRBD_BARRIER:
759 start_new_tl_epoch(mdev->tconn);
760 mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
761 break;
758 }; 762 };
759 763
760 return rv; 764 return rv;
@@ -861,8 +865,10 @@ static void maybe_pull_ahead(struct drbd_conf *mdev)
861 bool congested = false; 865 bool congested = false;
862 enum drbd_on_congestion on_congestion; 866 enum drbd_on_congestion on_congestion;
863 867
868 rcu_read_lock();
864 nc = rcu_dereference(tconn->net_conf); 869 nc = rcu_dereference(tconn->net_conf);
865 on_congestion = nc ? nc->on_congestion : OC_BLOCK; 870 on_congestion = nc ? nc->on_congestion : OC_BLOCK;
871 rcu_read_unlock();
866 if (on_congestion == OC_BLOCK || 872 if (on_congestion == OC_BLOCK ||
867 tconn->agreed_pro_version < 96) 873 tconn->agreed_pro_version < 96)
868 return; 874 return;
@@ -956,14 +962,8 @@ static int drbd_process_write_request(struct drbd_request *req)
956 struct drbd_conf *mdev = req->w.mdev; 962 struct drbd_conf *mdev = req->w.mdev;
957 int remote, send_oos; 963 int remote, send_oos;
958 964
959 rcu_read_lock();
960 remote = drbd_should_do_remote(mdev->state); 965 remote = drbd_should_do_remote(mdev->state);
961 if (remote) {
962 maybe_pull_ahead(mdev);
963 remote = drbd_should_do_remote(mdev->state);
964 }
965 send_oos = drbd_should_send_out_of_sync(mdev->state); 966 send_oos = drbd_should_send_out_of_sync(mdev->state);
966 rcu_read_unlock();
967 967
968 /* Need to replicate writes. Unless it is an empty flush, 968 /* Need to replicate writes. Unless it is an empty flush,
969 * which is better mapped to a DRBD P_BARRIER packet, 969 * which is better mapped to a DRBD P_BARRIER packet,
@@ -975,8 +975,8 @@ static int drbd_process_write_request(struct drbd_request *req)
975 /* The only size==0 bios we expect are empty flushes. */ 975 /* The only size==0 bios we expect are empty flushes. */
976 D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH); 976 D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);
977 if (remote) 977 if (remote)
978 start_new_tl_epoch(mdev->tconn); 978 _req_mod(req, QUEUE_AS_DRBD_BARRIER);
979 return 0; 979 return remote;
980 } 980 }
981 981
982 if (!remote && !send_oos) 982 if (!remote && !send_oos)
@@ -1020,12 +1020,24 @@ drbd_submit_req_private_bio(struct drbd_request *req)
1020 bio_endio(bio, -EIO); 1020 bio_endio(bio, -EIO);
1021} 1021}
1022 1022
1023void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) 1023static void drbd_queue_write(struct drbd_conf *mdev, struct drbd_request *req)
1024{ 1024{
1025 const int rw = bio_rw(bio); 1025 spin_lock(&mdev->submit.lock);
1026 struct bio_and_error m = { NULL, }; 1026 list_add_tail(&req->tl_requests, &mdev->submit.writes);
1027 spin_unlock(&mdev->submit.lock);
1028 queue_work(mdev->submit.wq, &mdev->submit.worker);
1029}
1030
1031/* returns the new drbd_request pointer, if the caller is expected to
1032 * drbd_send_and_submit() it (to save latency), or NULL if we queued the
1033 * request on the submitter thread.
1034 * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
1035 */
1036struct drbd_request *
1037drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
1038{
1039 const int rw = bio_data_dir(bio);
1027 struct drbd_request *req; 1040 struct drbd_request *req;
1028 bool no_remote = false;
1029 1041
1030 /* allocate outside of all locks; */ 1042 /* allocate outside of all locks; */
1031 req = drbd_req_new(mdev, bio); 1043 req = drbd_req_new(mdev, bio);
@@ -1035,7 +1047,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long
1035 * if user cannot handle io errors, that's not our business. */ 1047 * if user cannot handle io errors, that's not our business. */
1036 dev_err(DEV, "could not kmalloc() req\n"); 1048 dev_err(DEV, "could not kmalloc() req\n");
1037 bio_endio(bio, -ENOMEM); 1049 bio_endio(bio, -ENOMEM);
1038 return; 1050 return ERR_PTR(-ENOMEM);
1039 } 1051 }
1040 req->start_time = start_time; 1052 req->start_time = start_time;
1041 1053
@@ -1044,28 +1056,40 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long
1044 req->private_bio = NULL; 1056 req->private_bio = NULL;
1045 } 1057 }
1046 1058
1047 /* For WRITES going to the local disk, grab a reference on the target 1059 /* Update disk stats */
1048 * extent. This waits for any resync activity in the corresponding 1060 _drbd_start_io_acct(mdev, req);
1049 * resync extent to finish, and, if necessary, pulls in the target 1061
1050 * extent into the activity log, which involves further disk io because
1051 * of transactional on-disk meta data updates.
1052 * Empty flushes don't need to go into the activity log, they can only
1053 * flush data for pending writes which are already in there. */
1054 if (rw == WRITE && req->private_bio && req->i.size 1062 if (rw == WRITE && req->private_bio && req->i.size
1055 && !test_bit(AL_SUSPENDED, &mdev->flags)) { 1063 && !test_bit(AL_SUSPENDED, &mdev->flags)) {
1064 if (!drbd_al_begin_io_fastpath(mdev, &req->i)) {
1065 drbd_queue_write(mdev, req);
1066 return NULL;
1067 }
1056 req->rq_state |= RQ_IN_ACT_LOG; 1068 req->rq_state |= RQ_IN_ACT_LOG;
1057 drbd_al_begin_io(mdev, &req->i);
1058 } 1069 }
1059 1070
1071 return req;
1072}
1073
1074static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *req)
1075{
1076 const int rw = bio_rw(req->master_bio);
1077 struct bio_and_error m = { NULL, };
1078 bool no_remote = false;
1079
1060 spin_lock_irq(&mdev->tconn->req_lock); 1080 spin_lock_irq(&mdev->tconn->req_lock);
1061 if (rw == WRITE) { 1081 if (rw == WRITE) {
1062 /* This may temporarily give up the req_lock, 1082 /* This may temporarily give up the req_lock,
1063 * but will re-aquire it before it returns here. 1083 * but will re-aquire it before it returns here.
1064 * Needs to be before the check on drbd_suspended() */ 1084 * Needs to be before the check on drbd_suspended() */
1065 complete_conflicting_writes(req); 1085 complete_conflicting_writes(req);
1086 /* no more giving up req_lock from now on! */
1087
1088 /* check for congestion, and potentially stop sending
1089 * full data updates, but start sending "dirty bits" only. */
1090 maybe_pull_ahead(mdev);
1066 } 1091 }
1067 1092
1068 /* no more giving up req_lock from now on! */
1069 1093
1070 if (drbd_suspended(mdev)) { 1094 if (drbd_suspended(mdev)) {
1071 /* push back and retry: */ 1095 /* push back and retry: */
@@ -1078,9 +1102,6 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long
1078 goto out; 1102 goto out;
1079 } 1103 }
1080 1104
1081 /* Update disk stats */
1082 _drbd_start_io_acct(mdev, req, bio);
1083
1084 /* We fail READ/READA early, if we can not serve it. 1105 /* We fail READ/READA early, if we can not serve it.
1085 * We must do this before req is registered on any lists. 1106 * We must do this before req is registered on any lists.
1086 * Otherwise, drbd_req_complete() will queue failed READ for retry. */ 1107 * Otherwise, drbd_req_complete() will queue failed READ for retry. */
@@ -1137,7 +1158,116 @@ out:
1137 1158
1138 if (m.bio) 1159 if (m.bio)
1139 complete_master_bio(mdev, &m); 1160 complete_master_bio(mdev, &m);
1140 return; 1161}
1162
1163void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
1164{
1165 struct drbd_request *req = drbd_request_prepare(mdev, bio, start_time);
1166 if (IS_ERR_OR_NULL(req))
1167 return;
1168 drbd_send_and_submit(mdev, req);
1169}
1170
1171static void submit_fast_path(struct drbd_conf *mdev, struct list_head *incoming)
1172{
1173 struct drbd_request *req, *tmp;
1174 list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
1175 const int rw = bio_data_dir(req->master_bio);
1176
1177 if (rw == WRITE /* rw != WRITE should not even end up here! */
1178 && req->private_bio && req->i.size
1179 && !test_bit(AL_SUSPENDED, &mdev->flags)) {
1180 if (!drbd_al_begin_io_fastpath(mdev, &req->i))
1181 continue;
1182
1183 req->rq_state |= RQ_IN_ACT_LOG;
1184 }
1185
1186 list_del_init(&req->tl_requests);
1187 drbd_send_and_submit(mdev, req);
1188 }
1189}
1190
1191static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev,
1192 struct list_head *incoming,
1193 struct list_head *pending)
1194{
1195 struct drbd_request *req, *tmp;
1196 int wake = 0;
1197 int err;
1198
1199 spin_lock_irq(&mdev->al_lock);
1200 list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
1201 err = drbd_al_begin_io_nonblock(mdev, &req->i);
1202 if (err == -EBUSY)
1203 wake = 1;
1204 if (err)
1205 continue;
1206 req->rq_state |= RQ_IN_ACT_LOG;
1207 list_move_tail(&req->tl_requests, pending);
1208 }
1209 spin_unlock_irq(&mdev->al_lock);
1210 if (wake)
1211 wake_up(&mdev->al_wait);
1212
1213 return !list_empty(pending);
1214}
1215
1216void do_submit(struct work_struct *ws)
1217{
1218 struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker);
1219 LIST_HEAD(incoming);
1220 LIST_HEAD(pending);
1221 struct drbd_request *req, *tmp;
1222
1223 for (;;) {
1224 spin_lock(&mdev->submit.lock);
1225 list_splice_tail_init(&mdev->submit.writes, &incoming);
1226 spin_unlock(&mdev->submit.lock);
1227
1228 submit_fast_path(mdev, &incoming);
1229 if (list_empty(&incoming))
1230 break;
1231
1232 wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending));
1233 /* Maybe more was queued, while we prepared the transaction?
1234 * Try to stuff them into this transaction as well.
1235 * Be strictly non-blocking here, no wait_event, we already
1236 * have something to commit.
1237 * Stop if we don't make any more progres.
1238 */
1239 for (;;) {
1240 LIST_HEAD(more_pending);
1241 LIST_HEAD(more_incoming);
1242 bool made_progress;
1243
1244 /* It is ok to look outside the lock,
1245 * it's only an optimization anyways */
1246 if (list_empty(&mdev->submit.writes))
1247 break;
1248
1249 spin_lock(&mdev->submit.lock);
1250 list_splice_tail_init(&mdev->submit.writes, &more_incoming);
1251 spin_unlock(&mdev->submit.lock);
1252
1253 if (list_empty(&more_incoming))
1254 break;
1255
1256 made_progress = prepare_al_transaction_nonblock(mdev, &more_incoming, &more_pending);
1257
1258 list_splice_tail_init(&more_pending, &pending);
1259 list_splice_tail_init(&more_incoming, &incoming);
1260
1261 if (!made_progress)
1262 break;
1263 }
1264 drbd_al_begin_io_commit(mdev, false);
1265
1266 list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
1267 list_del_init(&req->tl_requests);
1268 drbd_send_and_submit(mdev, req);
1269 }
1270 }
1141} 1271}
1142 1272
1143void drbd_make_request(struct request_queue *q, struct bio *bio) 1273void drbd_make_request(struct request_queue *q, struct bio *bio)