aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/pnfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r--fs/nfs/pnfs.c352
1 files changed, 263 insertions, 89 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 38512bcd2e98..b8323aa7b543 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
395 dprintk("%s:Begin lo %p\n", __func__, lo); 395 dprintk("%s:Begin lo %p\n", __func__, lo);
396 396
397 if (list_empty(&lo->plh_segs)) { 397 if (list_empty(&lo->plh_segs)) {
398 /* Reset MDS Threshold I/O counters */
399 NFS_I(lo->plh_inode)->write_io = 0;
400 NFS_I(lo->plh_inode)->read_io = 0;
398 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) 401 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
399 put_layout_hdr_locked(lo); 402 put_layout_hdr_locked(lo);
400 return 0; 403 return 0;
@@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
455 spin_unlock(&nfsi->vfs_inode.i_lock); 458 spin_unlock(&nfsi->vfs_inode.i_lock);
456 pnfs_free_lseg_list(&tmp_list); 459 pnfs_free_lseg_list(&tmp_list);
457} 460}
461EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
458 462
459/* 463/*
460 * Called by the state manger to remove all layouts established under an 464 * Called by the state manger to remove all layouts established under an
@@ -692,6 +696,7 @@ out:
692 dprintk("<-- %s status: %d\n", __func__, status); 696 dprintk("<-- %s status: %d\n", __func__, status);
693 return status; 697 return status;
694} 698}
699EXPORT_SYMBOL_GPL(_pnfs_return_layout);
695 700
696bool pnfs_roc(struct inode *ino) 701bool pnfs_roc(struct inode *ino)
697{ 702{
@@ -931,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
931} 936}
932 937
933/* 938/*
939 * Use mdsthreshold hints set at each OPEN to determine if I/O should go
940 * to the MDS or over pNFS
941 *
942 * The nfs_inode read_io and write_io fields are cumulative counters reset
943 * when there are no layout segments. Note that in pnfs_update_layout iomode
944 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
945 * WRITE request.
946 *
947 * A return of true means use MDS I/O.
948 *
949 * From rfc 5661:
950 * If a file's size is smaller than the file size threshold, data accesses
951 * SHOULD be sent to the metadata server. If an I/O request has a length that
952 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
953 * server. If both file size and I/O size are provided, the client SHOULD
954 * reach or exceed both thresholds before sending its read or write
955 * requests to the data server.
956 */
957static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
958 struct inode *ino, int iomode)
959{
960 struct nfs4_threshold *t = ctx->mdsthreshold;
961 struct nfs_inode *nfsi = NFS_I(ino);
962 loff_t fsize = i_size_read(ino);
963 bool size = false, size_set = false, io = false, io_set = false, ret = false;
964
965 if (t == NULL)
966 return ret;
967
968 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
969 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
970
971 switch (iomode) {
972 case IOMODE_READ:
973 if (t->bm & THRESHOLD_RD) {
974 dprintk("%s fsize %llu\n", __func__, fsize);
975 size_set = true;
976 if (fsize < t->rd_sz)
977 size = true;
978 }
979 if (t->bm & THRESHOLD_RD_IO) {
980 dprintk("%s nfsi->read_io %llu\n", __func__,
981 nfsi->read_io);
982 io_set = true;
983 if (nfsi->read_io < t->rd_io_sz)
984 io = true;
985 }
986 break;
987 case IOMODE_RW:
988 if (t->bm & THRESHOLD_WR) {
989 dprintk("%s fsize %llu\n", __func__, fsize);
990 size_set = true;
991 if (fsize < t->wr_sz)
992 size = true;
993 }
994 if (t->bm & THRESHOLD_WR_IO) {
995 dprintk("%s nfsi->write_io %llu\n", __func__,
996 nfsi->write_io);
997 io_set = true;
998 if (nfsi->write_io < t->wr_io_sz)
999 io = true;
1000 }
1001 break;
1002 }
1003 if (size_set && io_set) {
1004 if (size && io)
1005 ret = true;
1006 } else if (size || io)
1007 ret = true;
1008
1009 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1010 return ret;
1011}
1012
1013/*
934 * Layout segment is retreived from the server if not cached. 1014 * Layout segment is retreived from the server if not cached.
935 * The appropriate layout segment is referenced and returned to the caller. 1015 * The appropriate layout segment is referenced and returned to the caller.
936 */ 1016 */
@@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
957 1037
958 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 1038 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
959 return NULL; 1039 return NULL;
1040
1041 if (pnfs_within_mdsthreshold(ctx, ino, iomode))
1042 return NULL;
1043
960 spin_lock(&ino->i_lock); 1044 spin_lock(&ino->i_lock);
961 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1045 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
962 if (lo == NULL) { 1046 if (lo == NULL) {
@@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1082{ 1166{
1083 BUG_ON(pgio->pg_lseg != NULL); 1167 BUG_ON(pgio->pg_lseg != NULL);
1084 1168
1169 if (req->wb_offset != req->wb_pgbase) {
1170 nfs_pageio_reset_read_mds(pgio);
1171 return;
1172 }
1085 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1173 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1086 req->wb_context, 1174 req->wb_context,
1087 req_offset(req), 1175 req_offset(req),
@@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1100{ 1188{
1101 BUG_ON(pgio->pg_lseg != NULL); 1189 BUG_ON(pgio->pg_lseg != NULL);
1102 1190
1191 if (req->wb_offset != req->wb_pgbase) {
1192 nfs_pageio_reset_write_mds(pgio);
1193 return;
1194 }
1103 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1195 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1104 req->wb_context, 1196 req->wb_context,
1105 req_offset(req), 1197 req_offset(req),
@@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1113EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1205EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1114 1206
1115bool 1207bool
1116pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 1208pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1209 const struct nfs_pgio_completion_ops *compl_ops)
1117{ 1210{
1118 struct nfs_server *server = NFS_SERVER(inode); 1211 struct nfs_server *server = NFS_SERVER(inode);
1119 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1212 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1120 1213
1121 if (ld == NULL) 1214 if (ld == NULL)
1122 return false; 1215 return false;
1123 nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); 1216 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
1217 server->rsize, 0);
1124 return true; 1218 return true;
1125} 1219}
1126 1220
1127bool 1221bool
1128pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 1222pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1223 int ioflags,
1224 const struct nfs_pgio_completion_ops *compl_ops)
1129{ 1225{
1130 struct nfs_server *server = NFS_SERVER(inode); 1226 struct nfs_server *server = NFS_SERVER(inode);
1131 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1227 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1132 1228
1133 if (ld == NULL) 1229 if (ld == NULL)
1134 return false; 1230 return false;
1135 nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); 1231 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
1232 server->wsize, ioflags);
1136 return true; 1233 return true;
1137} 1234}
1138 1235
@@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1162} 1259}
1163EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1260EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1164 1261
1165static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) 1262int pnfs_write_done_resend_to_mds(struct inode *inode,
1263 struct list_head *head,
1264 const struct nfs_pgio_completion_ops *compl_ops)
1166{ 1265{
1167 struct nfs_pageio_descriptor pgio; 1266 struct nfs_pageio_descriptor pgio;
1168 LIST_HEAD(failed); 1267 LIST_HEAD(failed);
1169 1268
1170 /* Resend all requests through the MDS */ 1269 /* Resend all requests through the MDS */
1171 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); 1270 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
1172 while (!list_empty(head)) { 1271 while (!list_empty(head)) {
1173 struct nfs_page *req = nfs_list_entry(head->next); 1272 struct nfs_page *req = nfs_list_entry(head->next);
1174 1273
@@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
1188 } 1287 }
1189 return 0; 1288 return 0;
1190} 1289}
1290EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1291
1292static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1293{
1294 struct nfs_pgio_header *hdr = data->header;
1295
1296 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1297 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1298 PNFS_LAYOUTRET_ON_ERROR) {
1299 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1300 pnfs_return_layout(hdr->inode);
1301 }
1302 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1303 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
1304 &hdr->pages,
1305 hdr->completion_ops);
1306}
1191 1307
1192/* 1308/*
1193 * Called by non rpc-based layout drivers 1309 * Called by non rpc-based layout drivers
1194 */ 1310 */
1195void pnfs_ld_write_done(struct nfs_write_data *data) 1311void pnfs_ld_write_done(struct nfs_write_data *data)
1196{ 1312{
1197 if (likely(!data->pnfs_error)) { 1313 struct nfs_pgio_header *hdr = data->header;
1314
1315 if (!hdr->pnfs_error) {
1198 pnfs_set_layoutcommit(data); 1316 pnfs_set_layoutcommit(data);
1199 data->mds_ops->rpc_call_done(&data->task, data); 1317 hdr->mds_ops->rpc_call_done(&data->task, data);
1200 } else { 1318 } else
1201 dprintk("pnfs write error = %d\n", data->pnfs_error); 1319 pnfs_ld_handle_write_error(data);
1202 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1320 hdr->mds_ops->rpc_release(data);
1203 PNFS_LAYOUTRET_ON_ERROR) {
1204 /* Don't lo_commit on error, Server will needs to
1205 * preform a file recovery.
1206 */
1207 clear_bit(NFS_INO_LAYOUTCOMMIT,
1208 &NFS_I(data->inode)->flags);
1209 pnfs_return_layout(data->inode);
1210 }
1211 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1212 }
1213 put_lseg(data->lseg);
1214 data->mds_ops->rpc_release(data);
1215} 1321}
1216EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1322EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1217 1323
@@ -1219,12 +1325,13 @@ static void
1219pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1325pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1220 struct nfs_write_data *data) 1326 struct nfs_write_data *data)
1221{ 1327{
1222 list_splice_tail_init(&data->pages, &desc->pg_list); 1328 struct nfs_pgio_header *hdr = data->header;
1223 if (data->req && list_empty(&data->req->wb_list)) 1329
1224 nfs_list_add_request(data->req, &desc->pg_list); 1330 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1225 nfs_pageio_reset_write_mds(desc); 1331 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1226 desc->pg_recoalesce = 1; 1332 nfs_pageio_reset_write_mds(desc);
1227 put_lseg(data->lseg); 1333 desc->pg_recoalesce = 1;
1334 }
1228 nfs_writedata_release(data); 1335 nfs_writedata_release(data);
1229} 1336}
1230 1337
@@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1234 struct pnfs_layout_segment *lseg, 1341 struct pnfs_layout_segment *lseg,
1235 int how) 1342 int how)
1236{ 1343{
1237 struct inode *inode = wdata->inode; 1344 struct nfs_pgio_header *hdr = wdata->header;
1345 struct inode *inode = hdr->inode;
1238 enum pnfs_try_status trypnfs; 1346 enum pnfs_try_status trypnfs;
1239 struct nfs_server *nfss = NFS_SERVER(inode); 1347 struct nfs_server *nfss = NFS_SERVER(inode);
1240 1348
1241 wdata->mds_ops = call_ops; 1349 hdr->mds_ops = call_ops;
1242 wdata->lseg = get_lseg(lseg);
1243 1350
1244 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1351 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1245 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1352 inode->i_ino, wdata->args.count, wdata->args.offset, how);
1246
1247 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1353 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
1248 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1354 if (trypnfs != PNFS_NOT_ATTEMPTED)
1249 put_lseg(wdata->lseg);
1250 wdata->lseg = NULL;
1251 } else
1252 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1355 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1253
1254 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1356 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1255 return trypnfs; 1357 return trypnfs;
1256} 1358}
@@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1266 while (!list_empty(head)) { 1368 while (!list_empty(head)) {
1267 enum pnfs_try_status trypnfs; 1369 enum pnfs_try_status trypnfs;
1268 1370
1269 data = list_entry(head->next, struct nfs_write_data, list); 1371 data = list_first_entry(head, struct nfs_write_data, list);
1270 list_del_init(&data->list); 1372 list_del_init(&data->list);
1271 1373
1272 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1374 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1276 put_lseg(lseg); 1378 put_lseg(lseg);
1277} 1379}
1278 1380
1381static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1382{
1383 put_lseg(hdr->lseg);
1384 nfs_writehdr_free(hdr);
1385}
1386
1279int 1387int
1280pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1388pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1281{ 1389{
1282 LIST_HEAD(head); 1390 struct nfs_write_header *whdr;
1391 struct nfs_pgio_header *hdr;
1283 int ret; 1392 int ret;
1284 1393
1285 ret = nfs_generic_flush(desc, &head); 1394 whdr = nfs_writehdr_alloc();
1286 if (ret != 0) { 1395 if (!whdr) {
1396 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1287 put_lseg(desc->pg_lseg); 1397 put_lseg(desc->pg_lseg);
1288 desc->pg_lseg = NULL; 1398 desc->pg_lseg = NULL;
1289 return ret; 1399 return -ENOMEM;
1290 } 1400 }
1291 pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); 1401 hdr = &whdr->header;
1292 return 0; 1402 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1403 hdr->lseg = get_lseg(desc->pg_lseg);
1404 atomic_inc(&hdr->refcnt);
1405 ret = nfs_generic_flush(desc, hdr);
1406 if (ret != 0) {
1407 put_lseg(desc->pg_lseg);
1408 desc->pg_lseg = NULL;
1409 } else
1410 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
1411 if (atomic_dec_and_test(&hdr->refcnt))
1412 hdr->completion_ops->completion(hdr);
1413 return ret;
1293} 1414}
1294EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1415EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1295 1416
1296static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1417int pnfs_read_done_resend_to_mds(struct inode *inode,
1418 struct list_head *head,
1419 const struct nfs_pgio_completion_ops *compl_ops)
1297{ 1420{
1298 struct nfs_pageio_descriptor pgio; 1421 struct nfs_pageio_descriptor pgio;
1422 LIST_HEAD(failed);
1299 1423
1300 put_lseg(data->lseg); 1424 /* Resend all requests through the MDS */
1301 data->lseg = NULL; 1425 nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
1302 dprintk("pnfs write error = %d\n", data->pnfs_error); 1426 while (!list_empty(head)) {
1303 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1427 struct nfs_page *req = nfs_list_entry(head->next);
1304 PNFS_LAYOUTRET_ON_ERROR)
1305 pnfs_return_layout(data->inode);
1306
1307 nfs_pageio_init_read_mds(&pgio, data->inode);
1308
1309 while (!list_empty(&data->pages)) {
1310 struct nfs_page *req = nfs_list_entry(data->pages.next);
1311 1428
1312 nfs_list_remove_request(req); 1429 nfs_list_remove_request(req);
1313 nfs_pageio_add_request(&pgio, req); 1430 if (!nfs_pageio_add_request(&pgio, req))
1431 nfs_list_add_request(req, &failed);
1314 } 1432 }
1315 nfs_pageio_complete(&pgio); 1433 nfs_pageio_complete(&pgio);
1434
1435 if (!list_empty(&failed)) {
1436 list_move(&failed, head);
1437 return -EIO;
1438 }
1439 return 0;
1440}
1441EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1442
1443static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1444{
1445 struct nfs_pgio_header *hdr = data->header;
1446
1447 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1448 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1449 PNFS_LAYOUTRET_ON_ERROR) {
1450 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1451 pnfs_return_layout(hdr->inode);
1452 }
1453 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1454 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1455 &hdr->pages,
1456 hdr->completion_ops);
1316} 1457}
1317 1458
1318/* 1459/*
@@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1320 */ 1461 */
1321void pnfs_ld_read_done(struct nfs_read_data *data) 1462void pnfs_ld_read_done(struct nfs_read_data *data)
1322{ 1463{
1323 if (likely(!data->pnfs_error)) { 1464 struct nfs_pgio_header *hdr = data->header;
1465
1466 if (likely(!hdr->pnfs_error)) {
1324 __nfs4_read_done_cb(data); 1467 __nfs4_read_done_cb(data);
1325 data->mds_ops->rpc_call_done(&data->task, data); 1468 hdr->mds_ops->rpc_call_done(&data->task, data);
1326 } else 1469 } else
1327 pnfs_ld_handle_read_error(data); 1470 pnfs_ld_handle_read_error(data);
1328 put_lseg(data->lseg); 1471 hdr->mds_ops->rpc_release(data);
1329 data->mds_ops->rpc_release(data);
1330} 1472}
1331EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1473EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1332 1474
@@ -1334,11 +1476,13 @@ static void
1334pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1476pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1335 struct nfs_read_data *data) 1477 struct nfs_read_data *data)
1336{ 1478{
1337 list_splice_tail_init(&data->pages, &desc->pg_list); 1479 struct nfs_pgio_header *hdr = data->header;
1338 if (data->req && list_empty(&data->req->wb_list)) 1480
1339 nfs_list_add_request(data->req, &desc->pg_list); 1481 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1340 nfs_pageio_reset_read_mds(desc); 1482 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1341 desc->pg_recoalesce = 1; 1483 nfs_pageio_reset_read_mds(desc);
1484 desc->pg_recoalesce = 1;
1485 }
1342 nfs_readdata_release(data); 1486 nfs_readdata_release(data);
1343} 1487}
1344 1488
@@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1350 const struct rpc_call_ops *call_ops, 1494 const struct rpc_call_ops *call_ops,
1351 struct pnfs_layout_segment *lseg) 1495 struct pnfs_layout_segment *lseg)
1352{ 1496{
1353 struct inode *inode = rdata->inode; 1497 struct nfs_pgio_header *hdr = rdata->header;
1498 struct inode *inode = hdr->inode;
1354 struct nfs_server *nfss = NFS_SERVER(inode); 1499 struct nfs_server *nfss = NFS_SERVER(inode);
1355 enum pnfs_try_status trypnfs; 1500 enum pnfs_try_status trypnfs;
1356 1501
1357 rdata->mds_ops = call_ops; 1502 hdr->mds_ops = call_ops;
1358 rdata->lseg = get_lseg(lseg);
1359 1503
1360 dprintk("%s: Reading ino:%lu %u@%llu\n", 1504 dprintk("%s: Reading ino:%lu %u@%llu\n",
1361 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1505 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1362 1506
1363 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1507 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1364 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1508 if (trypnfs != PNFS_NOT_ATTEMPTED)
1365 put_lseg(rdata->lseg);
1366 rdata->lseg = NULL;
1367 } else {
1368 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1509 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1369 }
1370 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1510 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1371 return trypnfs; 1511 return trypnfs;
1372} 1512}
@@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1382 while (!list_empty(head)) { 1522 while (!list_empty(head)) {
1383 enum pnfs_try_status trypnfs; 1523 enum pnfs_try_status trypnfs;
1384 1524
1385 data = list_entry(head->next, struct nfs_read_data, list); 1525 data = list_first_entry(head, struct nfs_read_data, list);
1386 list_del_init(&data->list); 1526 list_del_init(&data->list);
1387 1527
1388 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1528 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1392 put_lseg(lseg); 1532 put_lseg(lseg);
1393} 1533}
1394 1534
1535static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1536{
1537 put_lseg(hdr->lseg);
1538 nfs_readhdr_free(hdr);
1539}
1540
1395int 1541int
1396pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1542pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1397{ 1543{
1398 LIST_HEAD(head); 1544 struct nfs_read_header *rhdr;
1545 struct nfs_pgio_header *hdr;
1399 int ret; 1546 int ret;
1400 1547
1401 ret = nfs_generic_pagein(desc, &head); 1548 rhdr = nfs_readhdr_alloc();
1402 if (ret != 0) { 1549 if (!rhdr) {
1550 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1551 ret = -ENOMEM;
1403 put_lseg(desc->pg_lseg); 1552 put_lseg(desc->pg_lseg);
1404 desc->pg_lseg = NULL; 1553 desc->pg_lseg = NULL;
1405 return ret; 1554 return ret;
1406 } 1555 }
1407 pnfs_do_multiple_reads(desc, &head); 1556 hdr = &rhdr->header;
1408 return 0; 1557 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1558 hdr->lseg = get_lseg(desc->pg_lseg);
1559 atomic_inc(&hdr->refcnt);
1560 ret = nfs_generic_pagein(desc, hdr);
1561 if (ret != 0) {
1562 put_lseg(desc->pg_lseg);
1563 desc->pg_lseg = NULL;
1564 } else
1565 pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1566 if (atomic_dec_and_test(&hdr->refcnt))
1567 hdr->completion_ops->completion(hdr);
1568 return ret;
1409} 1569}
1410EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1570EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1411 1571
@@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1438void 1598void
1439pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1599pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1440{ 1600{
1441 struct nfs_inode *nfsi = NFS_I(wdata->inode); 1601 struct nfs_pgio_header *hdr = wdata->header;
1602 struct inode *inode = hdr->inode;
1603 struct nfs_inode *nfsi = NFS_I(inode);
1442 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1604 loff_t end_pos = wdata->mds_offset + wdata->res.count;
1443 bool mark_as_dirty = false; 1605 bool mark_as_dirty = false;
1444 1606
1445 spin_lock(&nfsi->vfs_inode.i_lock); 1607 spin_lock(&inode->i_lock);
1446 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1608 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1447 mark_as_dirty = true; 1609 mark_as_dirty = true;
1448 dprintk("%s: Set layoutcommit for inode %lu ", 1610 dprintk("%s: Set layoutcommit for inode %lu ",
1449 __func__, wdata->inode->i_ino); 1611 __func__, inode->i_ino);
1450 } 1612 }
1451 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { 1613 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1452 /* references matched in nfs4_layoutcommit_release */ 1614 /* references matched in nfs4_layoutcommit_release */
1453 get_lseg(wdata->lseg); 1615 get_lseg(hdr->lseg);
1454 } 1616 }
1455 if (end_pos > nfsi->layout->plh_lwb) 1617 if (end_pos > nfsi->layout->plh_lwb)
1456 nfsi->layout->plh_lwb = end_pos; 1618 nfsi->layout->plh_lwb = end_pos;
1457 spin_unlock(&nfsi->vfs_inode.i_lock); 1619 spin_unlock(&inode->i_lock);
1458 dprintk("%s: lseg %p end_pos %llu\n", 1620 dprintk("%s: lseg %p end_pos %llu\n",
1459 __func__, wdata->lseg, nfsi->layout->plh_lwb); 1621 __func__, hdr->lseg, nfsi->layout->plh_lwb);
1460 1622
1461 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 1623 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1462 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 1624 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1463 if (mark_as_dirty) 1625 if (mark_as_dirty)
1464 mark_inode_dirty_sync(wdata->inode); 1626 mark_inode_dirty_sync(inode);
1465} 1627}
1466EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1628EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1467 1629
@@ -1550,3 +1712,15 @@ out_free:
1550 kfree(data); 1712 kfree(data);
1551 goto out; 1713 goto out;
1552} 1714}
1715
1716struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
1717{
1718 struct nfs4_threshold *thp;
1719
1720 thp = kzalloc(sizeof(*thp), GFP_NOFS);
1721 if (!thp) {
1722 dprintk("%s mdsthreshold allocation failed\n", __func__);
1723 return NULL;
1724 }
1725 return thp;
1726}