aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/blocklayout/blocklayout.c74
-rw-r--r--fs/nfs/blocklayout/blocklayout.h9
-rw-r--r--fs/nfs/blocklayout/extents.c85
3 files changed, 131 insertions, 37 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9215c6644a3..48cfac31f64 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
90 */ 90 */
91struct parallel_io { 91struct parallel_io {
92 struct kref refcnt; 92 struct kref refcnt;
93 void (*pnfs_callback) (void *data); 93 void (*pnfs_callback) (void *data, int num_se);
94 void *data; 94 void *data;
95 int bse_count;
95}; 96};
96 97
97static inline struct parallel_io *alloc_parallel(void *data) 98static inline struct parallel_io *alloc_parallel(void *data)
@@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
102 if (rv) { 103 if (rv) {
103 rv->data = data; 104 rv->data = data;
104 kref_init(&rv->refcnt); 105 kref_init(&rv->refcnt);
106 rv->bse_count = 0;
105 } 107 }
106 return rv; 108 return rv;
107} 109}
@@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
116 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); 118 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
117 119
118 dprintk("%s enter\n", __func__); 120 dprintk("%s enter\n", __func__);
119 p->pnfs_callback(p->data); 121 p->pnfs_callback(p->data, p->bse_count);
120 kfree(p); 122 kfree(p);
121} 123}
122 124
@@ -216,7 +218,7 @@ static void bl_read_cleanup(struct work_struct *work)
216} 218}
217 219
218static void 220static void
219bl_end_par_io_read(void *data) 221bl_end_par_io_read(void *data, int unused)
220{ 222{
221 struct nfs_read_data *rdata = data; 223 struct nfs_read_data *rdata = data;
222 224
@@ -317,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
317{ 319{
318 sector_t isect, end; 320 sector_t isect, end;
319 struct pnfs_block_extent *be; 321 struct pnfs_block_extent *be;
322 struct pnfs_block_short_extent *se;
320 323
321 dprintk("%s(%llu, %u)\n", __func__, offset, count); 324 dprintk("%s(%llu, %u)\n", __func__, offset, count);
322 if (count == 0) 325 if (count == 0)
@@ -329,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
329 be = bl_find_get_extent(bl, isect, NULL); 332 be = bl_find_get_extent(bl, isect, NULL);
330 BUG_ON(!be); /* FIXME */ 333 BUG_ON(!be); /* FIXME */
331 len = min(end, be->be_f_offset + be->be_length) - isect; 334 len = min(end, be->be_f_offset + be->be_length) - isect;
332 if (be->be_state == PNFS_BLOCK_INVALID_DATA) 335 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
333 bl_mark_for_commit(be, isect, len); /* What if fails? */ 336 se = bl_pop_one_short_extent(be->be_inval);
337 BUG_ON(!se);
338 bl_mark_for_commit(be, isect, len, se);
339 }
334 isect += len; 340 isect += len;
335 bl_put_extent(be); 341 bl_put_extent(be);
336 } 342 }
@@ -352,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
352 end_page_writeback(page); 358 end_page_writeback(page);
353 page_cache_release(page); 359 page_cache_release(page);
354 } while (bvec >= bio->bi_io_vec); 360 } while (bvec >= bio->bi_io_vec);
355 if (!uptodate) { 361
362 if (unlikely(!uptodate)) {
356 if (!wdata->pnfs_error) 363 if (!wdata->pnfs_error)
357 wdata->pnfs_error = -EIO; 364 wdata->pnfs_error = -EIO;
358 pnfs_set_lo_fail(wdata->lseg); 365 pnfs_set_lo_fail(wdata->lseg);
@@ -361,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
361 put_parallel(par); 368 put_parallel(par);
362} 369}
363 370
364/* This is basically copied from mpage_end_io_read */
365static void bl_end_io_write(struct bio *bio, int err) 371static void bl_end_io_write(struct bio *bio, int err)
366{ 372{
367 struct parallel_io *par = bio->bi_private; 373 struct parallel_io *par = bio->bi_private;
@@ -387,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work)
387 dprintk("%s enter\n", __func__); 393 dprintk("%s enter\n", __func__);
388 task = container_of(work, struct rpc_task, u.tk_work); 394 task = container_of(work, struct rpc_task, u.tk_work);
389 wdata = container_of(task, struct nfs_write_data, task); 395 wdata = container_of(task, struct nfs_write_data, task);
390 if (!wdata->pnfs_error) { 396 if (likely(!wdata->pnfs_error)) {
391 /* Marks for LAYOUTCOMMIT */ 397 /* Marks for LAYOUTCOMMIT */
392 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 398 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
393 wdata->args.offset, wdata->args.count); 399 wdata->args.offset, wdata->args.count);
@@ -396,10 +402,15 @@ static void bl_write_cleanup(struct work_struct *work)
396} 402}
397 403
398/* Called when last of bios associated with a bl_write_pagelist call finishes */ 404/* Called when last of bios associated with a bl_write_pagelist call finishes */
399static void bl_end_par_io_write(void *data) 405static void bl_end_par_io_write(void *data, int num_se)
400{ 406{
401 struct nfs_write_data *wdata = data; 407 struct nfs_write_data *wdata = data;
402 408
409 if (unlikely(wdata->pnfs_error)) {
410 bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
411 num_se);
412 }
413
403 wdata->task.tk_status = wdata->pnfs_error; 414 wdata->task.tk_status = wdata->pnfs_error;
404 wdata->verf.committed = NFS_FILE_SYNC; 415 wdata->verf.committed = NFS_FILE_SYNC;
405 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 416 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
@@ -552,7 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
552 */ 563 */
553 par = alloc_parallel(wdata); 564 par = alloc_parallel(wdata);
554 if (!par) 565 if (!par)
555 return PNFS_NOT_ATTEMPTED; 566 goto out_mds;
556 par->pnfs_callback = bl_end_par_io_write; 567 par->pnfs_callback = bl_end_par_io_write;
557 /* At this point, have to be more careful with error handling */ 568 /* At this point, have to be more careful with error handling */
558 569
@@ -560,12 +571,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
560 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); 571 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
561 if (!be || !is_writable(be, isect)) { 572 if (!be || !is_writable(be, isect)) {
562 dprintk("%s no matching extents!\n", __func__); 573 dprintk("%s no matching extents!\n", __func__);
563 wdata->pnfs_error = -EINVAL; 574 goto out_mds;
564 goto out;
565 } 575 }
566 576
567 /* First page inside INVALID extent */ 577 /* First page inside INVALID extent */
568 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 578 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
579 if (likely(!bl_push_one_short_extent(be->be_inval)))
580 par->bse_count++;
581 else
582 goto out_mds;
569 temp = offset >> PAGE_CACHE_SHIFT; 583 temp = offset >> PAGE_CACHE_SHIFT;
570 npg_zero = do_div(temp, npg_per_block); 584 npg_zero = do_div(temp, npg_per_block);
571 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & 585 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
@@ -603,6 +617,19 @@ fill_invalid_ext:
603 wdata->pnfs_error = ret; 617 wdata->pnfs_error = ret;
604 goto out; 618 goto out;
605 } 619 }
620 if (likely(!bl_push_one_short_extent(be->be_inval)))
621 par->bse_count++;
622 else {
623 end_page_writeback(page);
624 page_cache_release(page);
625 wdata->pnfs_error = -ENOMEM;
626 goto out;
627 }
628 /* FIXME: This should be done in bi_end_io */
629 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
630 page->index << PAGE_CACHE_SHIFT,
631 PAGE_CACHE_SIZE);
632
606 bio = bl_add_page_to_bio(bio, npg_zero, WRITE, 633 bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
607 isect, page, be, 634 isect, page, be,
608 bl_end_io_write_zero, par); 635 bl_end_io_write_zero, par);
@@ -611,10 +638,6 @@ fill_invalid_ext:
611 bio = NULL; 638 bio = NULL;
612 goto out; 639 goto out;
613 } 640 }
614 /* FIXME: This should be done in bi_end_io */
615 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
616 page->index << PAGE_CACHE_SHIFT,
617 PAGE_CACHE_SIZE);
618next_page: 641next_page:
619 isect += PAGE_CACHE_SECTORS; 642 isect += PAGE_CACHE_SECTORS;
620 extent_length -= PAGE_CACHE_SECTORS; 643 extent_length -= PAGE_CACHE_SECTORS;
@@ -638,6 +661,15 @@ next_page:
638 wdata->pnfs_error = -EINVAL; 661 wdata->pnfs_error = -EINVAL;
639 goto out; 662 goto out;
640 } 663 }
664 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
665 if (likely(!bl_push_one_short_extent(
666 be->be_inval)))
667 par->bse_count++;
668 else {
669 wdata->pnfs_error = -ENOMEM;
670 goto out;
671 }
672 }
641 extent_length = be->be_length - 673 extent_length = be->be_length -
642 (isect - be->be_f_offset); 674 (isect - be->be_f_offset);
643 } 675 }
@@ -685,6 +717,10 @@ out:
685 bl_submit_bio(WRITE, bio); 717 bl_submit_bio(WRITE, bio);
686 put_parallel(par); 718 put_parallel(par);
687 return PNFS_ATTEMPTED; 719 return PNFS_ATTEMPTED;
720out_mds:
721 bl_put_extent(be);
722 kfree(par);
723 return PNFS_NOT_ATTEMPTED;
688} 724}
689 725
690/* FIXME - range ignored */ 726/* FIXME - range ignored */
@@ -711,11 +747,17 @@ static void
711release_inval_marks(struct pnfs_inval_markings *marks) 747release_inval_marks(struct pnfs_inval_markings *marks)
712{ 748{
713 struct pnfs_inval_tracking *pos, *temp; 749 struct pnfs_inval_tracking *pos, *temp;
750 struct pnfs_block_short_extent *se, *stemp;
714 751
715 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { 752 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
716 list_del(&pos->it_link); 753 list_del(&pos->it_link);
717 kfree(pos); 754 kfree(pos);
718 } 755 }
756
757 list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
758 list_del(&se->bse_node);
759 kfree(se);
760 }
719 return; 761 return;
720} 762}
721 763
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 60728acc7b9..e31a2df28e7 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -70,6 +70,7 @@ struct pnfs_inval_markings {
70 spinlock_t im_lock; 70 spinlock_t im_lock;
71 struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ 71 struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
72 sector_t im_block_size; /* Server blocksize in sectors */ 72 sector_t im_block_size; /* Server blocksize in sectors */
73 struct list_head im_extents; /* Short extents for INVAL->RW conversion */
73}; 74};
74 75
75struct pnfs_inval_tracking { 76struct pnfs_inval_tracking {
@@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
105{ 106{
106 spin_lock_init(&marks->im_lock); 107 spin_lock_init(&marks->im_lock);
107 INIT_LIST_HEAD(&marks->im_tree.mtt_stub); 108 INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
109 INIT_LIST_HEAD(&marks->im_extents);
108 marks->im_block_size = blocksize; 110 marks->im_block_size = blocksize;
109 marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, 111 marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
110 blocksize); 112 blocksize);
@@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
199int bl_add_merge_extent(struct pnfs_block_layout *bl, 201int bl_add_merge_extent(struct pnfs_block_layout *bl,
200 struct pnfs_block_extent *new); 202 struct pnfs_block_extent *new);
201int bl_mark_for_commit(struct pnfs_block_extent *be, 203int bl_mark_for_commit(struct pnfs_block_extent *be,
202 sector_t offset, sector_t length); 204 sector_t offset, sector_t length,
205 struct pnfs_block_short_extent *new);
206int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
207struct pnfs_block_short_extent *
208bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
209void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
203 210
204#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ 211#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index d0f52ed2242..1abac09f7cd 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -157,10 +157,10 @@ static int _preload_range(struct pnfs_inval_markings *marks,
157 goto out_cleanup; 157 goto out_cleanup;
158 } 158 }
159 159
160 spin_lock(&marks->im_lock); 160 spin_lock_bh(&marks->im_lock);
161 for (s = start; s < end; s += tree->mtt_step_size) 161 for (s = start; s < end; s += tree->mtt_step_size)
162 used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); 162 used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
163 spin_unlock(&marks->im_lock); 163 spin_unlock_bh(&marks->im_lock);
164 164
165 status = 0; 165 status = 0;
166 166
@@ -179,9 +179,9 @@ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
179{ 179{
180 int rv; 180 int rv;
181 181
182 spin_lock(&marks->im_lock); 182 spin_lock_bh(&marks->im_lock);
183 rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); 183 rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
184 spin_unlock(&marks->im_lock); 184 spin_unlock_bh(&marks->im_lock);
185 return rv; 185 return rv;
186} 186}
187 187
@@ -221,9 +221,9 @@ static int is_range_written(struct pnfs_inval_markings *marks,
221{ 221{
222 int rv; 222 int rv;
223 223
224 spin_lock(&marks->im_lock); 224 spin_lock_bh(&marks->im_lock);
225 rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); 225 rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
226 spin_unlock(&marks->im_lock); 226 spin_unlock_bh(&marks->im_lock);
227 return rv; 227 return rv;
228} 228}
229 229
@@ -244,15 +244,15 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
244 if (_preload_range(marks, start, end - start)) 244 if (_preload_range(marks, start, end - start))
245 goto outerr; 245 goto outerr;
246 246
247 spin_lock(&marks->im_lock); 247 spin_lock_bh(&marks->im_lock);
248 if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) 248 if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
249 goto out_unlock; 249 goto out_unlock;
250 spin_unlock(&marks->im_lock); 250 spin_unlock_bh(&marks->im_lock);
251 251
252 return 0; 252 return 0;
253 253
254out_unlock: 254out_unlock:
255 spin_unlock(&marks->im_lock); 255 spin_unlock_bh(&marks->im_lock);
256outerr: 256outerr:
257 return -ENOMEM; 257 return -ENOMEM;
258} 258}
@@ -267,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks,
267 267
268 dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, 268 dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
269 (u64)offset, (u64)length); 269 (u64)offset, (u64)length);
270 spin_lock(&marks->im_lock); 270 spin_lock_bh(&marks->im_lock);
271 status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); 271 status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
272 spin_unlock(&marks->im_lock); 272 spin_unlock_bh(&marks->im_lock);
273 return status; 273 return status;
274} 274}
275 275
@@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
369 369
370/* Note the range described by offset, length is guaranteed to be contained 370/* Note the range described by offset, length is guaranteed to be contained
371 * within be. 371 * within be.
372 * new will be freed, either by this function or add_to_commitlist if they
373 * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
372 */ 374 */
373int bl_mark_for_commit(struct pnfs_block_extent *be, 375int bl_mark_for_commit(struct pnfs_block_extent *be,
374 sector_t offset, sector_t length) 376 sector_t offset, sector_t length,
377 struct pnfs_block_short_extent *new)
375{ 378{
376 sector_t new_end, end = offset + length; 379 sector_t new_end, end = offset + length;
377 struct pnfs_block_short_extent *new;
378 struct pnfs_block_layout *bl = container_of(be->be_inval, 380 struct pnfs_block_layout *bl = container_of(be->be_inval,
379 struct pnfs_block_layout, 381 struct pnfs_block_layout,
380 bl_inval); 382 bl_inval);
381 383
382 new = kmalloc(sizeof(*new), GFP_NOFS);
383 if (!new)
384 return -ENOMEM;
385
386 mark_written_sectors(be->be_inval, offset, length); 384 mark_written_sectors(be->be_inval, offset, length);
387 /* We want to add the range to commit list, but it must be 385 /* We want to add the range to commit list, but it must be
388 * block-normalized, and verified that the normalized range has 386 * block-normalized, and verified that the normalized range has
@@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
412 new->bse_mdev = be->be_mdev; 410 new->bse_mdev = be->be_mdev;
413 411
414 spin_lock(&bl->bl_ext_lock); 412 spin_lock(&bl->bl_ext_lock);
415 /* new will be freed, either by add_to_commitlist if it decides not
416 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
417 */
418 add_to_commitlist(bl, new); 413 add_to_commitlist(bl, new);
419 spin_unlock(&bl->bl_ext_lock); 414 spin_unlock(&bl->bl_ext_lock);
420 return 0; 415 return 0;
@@ -862,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
862 } 857 }
863 } 858 }
864} 859}
860
861int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
862{
863 struct pnfs_block_short_extent *new;
864
865 new = kmalloc(sizeof(*new), GFP_NOFS);
866 if (unlikely(!new))
867 return -ENOMEM;
868
869 spin_lock_bh(&marks->im_lock);
870 list_add(&new->bse_node, &marks->im_extents);
871 spin_unlock_bh(&marks->im_lock);
872
873 return 0;
874}
875
876struct pnfs_block_short_extent *
877bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
878{
879 struct pnfs_block_short_extent *rv = NULL;
880
881 spin_lock_bh(&marks->im_lock);
882 if (!list_empty(&marks->im_extents)) {
883 rv = list_entry((&marks->im_extents)->next,
884 struct pnfs_block_short_extent, bse_node);
885 list_del_init(&rv->bse_node);
886 }
887 spin_unlock_bh(&marks->im_lock);
888
889 return rv;
890}
891
892void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
893{
894 struct pnfs_block_short_extent *se = NULL, *tmp;
895
896 if (num_to_free <= 0)
897 return;
898
899 spin_lock(&marks->im_lock);
900 list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
901 list_del(&se->bse_node);
902 kfree(se);
903 if (--num_to_free == 0)
904 break;
905 }
906 spin_unlock(&marks->im_lock);
907
908 BUG_ON(num_to_free > 0);
909}