aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-16 18:08:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-16 18:08:13 -0500
commita12587b00388d1694933252e97abca237bc3a6b8 (patch)
tree56ba7ec24ed84f81ce41fc327c3722ab2fb9f3eb /fs
parentadfeb6e9f46ded31b46fe406ad0dd6a9b4e0f7fe (diff)
parent7c5465d6ccd759caa959828e2add5603518dafc4 (diff)
Merge tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
NFS client bugfixes and cleanups for Linux 3.3 (pull 2) * tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: pnfsblock: alloc short extent before submit bio pnfsblock: remove rpc_call_ops from struct parallel_io pnfsblock: move find lock page logic out of bl_write_pagelist pnfsblock: cleanup bl_mark_sectors_init pnfsblock: limit bio page count pnfsblock: don't spinlock when freeing block_dev pnfsblock: clean up _add_entry pnfsblock: set read/write tk_status to pnfs_error pnfsblock: acquire im_lock in _preload_range NFS4: fix compile warnings in nfs4proc.c nfs: check for integer overflow in decode_devicenotify_args() NFS: cleanup endian type in decode_ds_addr() NFS: add an endian notation
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c202
-rw-r--r--fs/nfs/blocklayout/blocklayout.h12
-rw-r--r--fs/nfs/blocklayout/extents.c176
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_xdr.c4
-rw-r--r--fs/nfs/nfs4filelayoutdev.c2
-rw-r--r--fs/nfs/nfs4proc.c2
7 files changed, 222 insertions, 178 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 281ae95932c9..48cfac31f64c 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -90,9 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
90 */ 90 */
91struct parallel_io { 91struct parallel_io {
92 struct kref refcnt; 92 struct kref refcnt;
93 struct rpc_call_ops call_ops; 93 void (*pnfs_callback) (void *data, int num_se);
94 void (*pnfs_callback) (void *data);
95 void *data; 94 void *data;
95 int bse_count;
96}; 96};
97 97
98static inline struct parallel_io *alloc_parallel(void *data) 98static inline struct parallel_io *alloc_parallel(void *data)
@@ -103,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
103 if (rv) { 103 if (rv) {
104 rv->data = data; 104 rv->data = data;
105 kref_init(&rv->refcnt); 105 kref_init(&rv->refcnt);
106 rv->bse_count = 0;
106 } 107 }
107 return rv; 108 return rv;
108} 109}
@@ -117,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
117 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); 118 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
118 119
119 dprintk("%s enter\n", __func__); 120 dprintk("%s enter\n", __func__);
120 p->pnfs_callback(p->data); 121 p->pnfs_callback(p->data, p->bse_count);
121 kfree(p); 122 kfree(p);
122} 123}
123 124
@@ -146,14 +147,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
146{ 147{
147 struct bio *bio; 148 struct bio *bio;
148 149
150 npg = min(npg, BIO_MAX_PAGES);
149 bio = bio_alloc(GFP_NOIO, npg); 151 bio = bio_alloc(GFP_NOIO, npg);
150 if (!bio) 152 if (!bio && (current->flags & PF_MEMALLOC)) {
151 return NULL; 153 while (!bio && (npg /= 2))
154 bio = bio_alloc(GFP_NOIO, npg);
155 }
152 156
153 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; 157 if (bio) {
154 bio->bi_bdev = be->be_mdev; 158 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
155 bio->bi_end_io = end_io; 159 bio->bi_bdev = be->be_mdev;
156 bio->bi_private = par; 160 bio->bi_end_io = end_io;
161 bio->bi_private = par;
162 }
157 return bio; 163 return bio;
158} 164}
159 165
@@ -212,22 +218,15 @@ static void bl_read_cleanup(struct work_struct *work)
212} 218}
213 219
214static void 220static void
215bl_end_par_io_read(void *data) 221bl_end_par_io_read(void *data, int unused)
216{ 222{
217 struct nfs_read_data *rdata = data; 223 struct nfs_read_data *rdata = data;
218 224
225 rdata->task.tk_status = rdata->pnfs_error;
219 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 226 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
220 schedule_work(&rdata->task.u.tk_work); 227 schedule_work(&rdata->task.u.tk_work);
221} 228}
222 229
223/* We don't want normal .rpc_call_done callback used, so we replace it
224 * with this stub.
225 */
226static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
227{
228 return;
229}
230
231static enum pnfs_try_status 230static enum pnfs_try_status
232bl_read_pagelist(struct nfs_read_data *rdata) 231bl_read_pagelist(struct nfs_read_data *rdata)
233{ 232{
@@ -247,8 +246,6 @@ bl_read_pagelist(struct nfs_read_data *rdata)
247 par = alloc_parallel(rdata); 246 par = alloc_parallel(rdata);
248 if (!par) 247 if (!par)
249 goto use_mds; 248 goto use_mds;
250 par->call_ops = *rdata->mds_ops;
251 par->call_ops.rpc_call_done = bl_rpc_do_nothing;
252 par->pnfs_callback = bl_end_par_io_read; 249 par->pnfs_callback = bl_end_par_io_read;
253 /* At this point, we can no longer jump to use_mds */ 250 /* At this point, we can no longer jump to use_mds */
254 251
@@ -322,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
322{ 319{
323 sector_t isect, end; 320 sector_t isect, end;
324 struct pnfs_block_extent *be; 321 struct pnfs_block_extent *be;
322 struct pnfs_block_short_extent *se;
325 323
326 dprintk("%s(%llu, %u)\n", __func__, offset, count); 324 dprintk("%s(%llu, %u)\n", __func__, offset, count);
327 if (count == 0) 325 if (count == 0)
@@ -334,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
334 be = bl_find_get_extent(bl, isect, NULL); 332 be = bl_find_get_extent(bl, isect, NULL);
335 BUG_ON(!be); /* FIXME */ 333 BUG_ON(!be); /* FIXME */
336 len = min(end, be->be_f_offset + be->be_length) - isect; 334 len = min(end, be->be_f_offset + be->be_length) - isect;
337 if (be->be_state == PNFS_BLOCK_INVALID_DATA) 335 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
338 bl_mark_for_commit(be, isect, len); /* What if fails? */ 336 se = bl_pop_one_short_extent(be->be_inval);
337 BUG_ON(!se);
338 bl_mark_for_commit(be, isect, len, se);
339 }
339 isect += len; 340 isect += len;
340 bl_put_extent(be); 341 bl_put_extent(be);
341 } 342 }
@@ -357,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
357 end_page_writeback(page); 358 end_page_writeback(page);
358 page_cache_release(page); 359 page_cache_release(page);
359 } while (bvec >= bio->bi_io_vec); 360 } while (bvec >= bio->bi_io_vec);
360 if (!uptodate) { 361
362 if (unlikely(!uptodate)) {
361 if (!wdata->pnfs_error) 363 if (!wdata->pnfs_error)
362 wdata->pnfs_error = -EIO; 364 wdata->pnfs_error = -EIO;
363 pnfs_set_lo_fail(wdata->lseg); 365 pnfs_set_lo_fail(wdata->lseg);
@@ -366,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
366 put_parallel(par); 368 put_parallel(par);
367} 369}
368 370
369/* This is basically copied from mpage_end_io_read */
370static void bl_end_io_write(struct bio *bio, int err) 371static void bl_end_io_write(struct bio *bio, int err)
371{ 372{
372 struct parallel_io *par = bio->bi_private; 373 struct parallel_io *par = bio->bi_private;
@@ -392,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work)
392 dprintk("%s enter\n", __func__); 393 dprintk("%s enter\n", __func__);
393 task = container_of(work, struct rpc_task, u.tk_work); 394 task = container_of(work, struct rpc_task, u.tk_work);
394 wdata = container_of(task, struct nfs_write_data, task); 395 wdata = container_of(task, struct nfs_write_data, task);
395 if (!wdata->pnfs_error) { 396 if (likely(!wdata->pnfs_error)) {
396 /* Marks for LAYOUTCOMMIT */ 397 /* Marks for LAYOUTCOMMIT */
397 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 398 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
398 wdata->args.offset, wdata->args.count); 399 wdata->args.offset, wdata->args.count);
@@ -401,11 +402,16 @@ static void bl_write_cleanup(struct work_struct *work)
401} 402}
402 403
403/* Called when last of bios associated with a bl_write_pagelist call finishes */ 404/* Called when last of bios associated with a bl_write_pagelist call finishes */
404static void bl_end_par_io_write(void *data) 405static void bl_end_par_io_write(void *data, int num_se)
405{ 406{
406 struct nfs_write_data *wdata = data; 407 struct nfs_write_data *wdata = data;
407 408
408 wdata->task.tk_status = 0; 409 if (unlikely(wdata->pnfs_error)) {
410 bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
411 num_se);
412 }
413
414 wdata->task.tk_status = wdata->pnfs_error;
409 wdata->verf.committed = NFS_FILE_SYNC; 415 wdata->verf.committed = NFS_FILE_SYNC;
410 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 416 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
411 schedule_work(&wdata->task.u.tk_work); 417 schedule_work(&wdata->task.u.tk_work);
@@ -484,6 +490,55 @@ cleanup:
484 return ret; 490 return ret;
485} 491}
486 492
493/* Find or create a zeroing page marked being writeback.
494 * Return ERR_PTR on error, NULL to indicate skip this page and page itself
495 * to indicate write out.
496 */
497static struct page *
498bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
499 struct pnfs_block_extent *cow_read)
500{
501 struct page *page;
502 int locked = 0;
503 page = find_get_page(inode->i_mapping, index);
504 if (page)
505 goto check_page;
506
507 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
508 if (unlikely(!page)) {
509 dprintk("%s oom\n", __func__);
510 return ERR_PTR(-ENOMEM);
511 }
512 locked = 1;
513
514check_page:
515 /* PageDirty: Other will write this out
516 * PageWriteback: Other is writing this out
517 * PageUptodate: It was read before
518 */
519 if (PageDirty(page) || PageWriteback(page)) {
520 print_page(page);
521 if (locked)
522 unlock_page(page);
523 page_cache_release(page);
524 return NULL;
525 }
526
527 if (!locked) {
528 lock_page(page);
529 locked = 1;
530 goto check_page;
531 }
532 if (!PageUptodate(page)) {
533 /* New page, readin or zero it */
534 init_page_for_write(page, cow_read);
535 }
536 set_page_writeback(page);
537 unlock_page(page);
538
539 return page;
540}
541
487static enum pnfs_try_status 542static enum pnfs_try_status
488bl_write_pagelist(struct nfs_write_data *wdata, int sync) 543bl_write_pagelist(struct nfs_write_data *wdata, int sync)
489{ 544{
@@ -508,9 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
508 */ 563 */
509 par = alloc_parallel(wdata); 564 par = alloc_parallel(wdata);
510 if (!par) 565 if (!par)
511 return PNFS_NOT_ATTEMPTED; 566 goto out_mds;
512 par->call_ops = *wdata->mds_ops;
513 par->call_ops.rpc_call_done = bl_rpc_do_nothing;
514 par->pnfs_callback = bl_end_par_io_write; 567 par->pnfs_callback = bl_end_par_io_write;
515 /* At this point, have to be more careful with error handling */ 568 /* At this point, have to be more careful with error handling */
516 569
@@ -518,12 +571,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
518 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); 571 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
519 if (!be || !is_writable(be, isect)) { 572 if (!be || !is_writable(be, isect)) {
520 dprintk("%s no matching extents!\n", __func__); 573 dprintk("%s no matching extents!\n", __func__);
521 wdata->pnfs_error = -EINVAL; 574 goto out_mds;
522 goto out;
523 } 575 }
524 576
525 /* First page inside INVALID extent */ 577 /* First page inside INVALID extent */
526 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 578 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
579 if (likely(!bl_push_one_short_extent(be->be_inval)))
580 par->bse_count++;
581 else
582 goto out_mds;
527 temp = offset >> PAGE_CACHE_SHIFT; 583 temp = offset >> PAGE_CACHE_SHIFT;
528 npg_zero = do_div(temp, npg_per_block); 584 npg_zero = do_div(temp, npg_per_block);
529 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & 585 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
@@ -543,36 +599,16 @@ fill_invalid_ext:
543 dprintk("%s zero %dth page: index %lu isect %llu\n", 599 dprintk("%s zero %dth page: index %lu isect %llu\n",
544 __func__, npg_zero, index, 600 __func__, npg_zero, index,
545 (unsigned long long)isect); 601 (unsigned long long)isect);
546 page = 602 page = bl_find_get_zeroing_page(wdata->inode, index,
547 find_or_create_page(wdata->inode->i_mapping, index, 603 cow_read);
548 GFP_NOFS); 604 if (unlikely(IS_ERR(page))) {
549 if (!page) { 605 wdata->pnfs_error = PTR_ERR(page);
550 dprintk("%s oom\n", __func__);
551 wdata->pnfs_error = -ENOMEM;
552 goto out; 606 goto out;
553 } 607 } else if (page == NULL)
554
555 /* PageDirty: Other will write this out
556 * PageWriteback: Other is writing this out
557 * PageUptodate: It was read before
558 * sector_initialized: already written out
559 */
560 if (PageDirty(page) || PageWriteback(page)) {
561 print_page(page);
562 unlock_page(page);
563 page_cache_release(page);
564 goto next_page; 608 goto next_page;
565 }
566 if (!PageUptodate(page)) {
567 /* New page, readin or zero it */
568 init_page_for_write(page, cow_read);
569 }
570 set_page_writeback(page);
571 unlock_page(page);
572 609
573 ret = bl_mark_sectors_init(be->be_inval, isect, 610 ret = bl_mark_sectors_init(be->be_inval, isect,
574 PAGE_CACHE_SECTORS, 611 PAGE_CACHE_SECTORS);
575 NULL);
576 if (unlikely(ret)) { 612 if (unlikely(ret)) {
577 dprintk("%s bl_mark_sectors_init fail %d\n", 613 dprintk("%s bl_mark_sectors_init fail %d\n",
578 __func__, ret); 614 __func__, ret);
@@ -581,6 +617,19 @@ fill_invalid_ext:
581 wdata->pnfs_error = ret; 617 wdata->pnfs_error = ret;
582 goto out; 618 goto out;
583 } 619 }
620 if (likely(!bl_push_one_short_extent(be->be_inval)))
621 par->bse_count++;
622 else {
623 end_page_writeback(page);
624 page_cache_release(page);
625 wdata->pnfs_error = -ENOMEM;
626 goto out;
627 }
628 /* FIXME: This should be done in bi_end_io */
629 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
630 page->index << PAGE_CACHE_SHIFT,
631 PAGE_CACHE_SIZE);
632
584 bio = bl_add_page_to_bio(bio, npg_zero, WRITE, 633 bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
585 isect, page, be, 634 isect, page, be,
586 bl_end_io_write_zero, par); 635 bl_end_io_write_zero, par);
@@ -589,10 +638,6 @@ fill_invalid_ext:
589 bio = NULL; 638 bio = NULL;
590 goto out; 639 goto out;
591 } 640 }
592 /* FIXME: This should be done in bi_end_io */
593 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
594 page->index << PAGE_CACHE_SHIFT,
595 PAGE_CACHE_SIZE);
596next_page: 641next_page:
597 isect += PAGE_CACHE_SECTORS; 642 isect += PAGE_CACHE_SECTORS;
598 extent_length -= PAGE_CACHE_SECTORS; 643 extent_length -= PAGE_CACHE_SECTORS;
@@ -616,13 +661,21 @@ next_page:
616 wdata->pnfs_error = -EINVAL; 661 wdata->pnfs_error = -EINVAL;
617 goto out; 662 goto out;
618 } 663 }
664 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
665 if (likely(!bl_push_one_short_extent(
666 be->be_inval)))
667 par->bse_count++;
668 else {
669 wdata->pnfs_error = -ENOMEM;
670 goto out;
671 }
672 }
619 extent_length = be->be_length - 673 extent_length = be->be_length -
620 (isect - be->be_f_offset); 674 (isect - be->be_f_offset);
621 } 675 }
622 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 676 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
623 ret = bl_mark_sectors_init(be->be_inval, isect, 677 ret = bl_mark_sectors_init(be->be_inval, isect,
624 PAGE_CACHE_SECTORS, 678 PAGE_CACHE_SECTORS);
625 NULL);
626 if (unlikely(ret)) { 679 if (unlikely(ret)) {
627 dprintk("%s bl_mark_sectors_init fail %d\n", 680 dprintk("%s bl_mark_sectors_init fail %d\n",
628 __func__, ret); 681 __func__, ret);
@@ -664,6 +717,10 @@ out:
664 bl_submit_bio(WRITE, bio); 717 bl_submit_bio(WRITE, bio);
665 put_parallel(par); 718 put_parallel(par);
666 return PNFS_ATTEMPTED; 719 return PNFS_ATTEMPTED;
720out_mds:
721 bl_put_extent(be);
722 kfree(par);
723 return PNFS_NOT_ATTEMPTED;
667} 724}
668 725
669/* FIXME - range ignored */ 726/* FIXME - range ignored */
@@ -690,11 +747,17 @@ static void
690release_inval_marks(struct pnfs_inval_markings *marks) 747release_inval_marks(struct pnfs_inval_markings *marks)
691{ 748{
692 struct pnfs_inval_tracking *pos, *temp; 749 struct pnfs_inval_tracking *pos, *temp;
750 struct pnfs_block_short_extent *se, *stemp;
693 751
694 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { 752 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
695 list_del(&pos->it_link); 753 list_del(&pos->it_link);
696 kfree(pos); 754 kfree(pos);
697 } 755 }
756
757 list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
758 list_del(&se->bse_node);
759 kfree(se);
760 }
698 return; 761 return;
699} 762}
700 763
@@ -779,16 +842,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
779static void free_blk_mountid(struct block_mount_id *mid) 842static void free_blk_mountid(struct block_mount_id *mid)
780{ 843{
781 if (mid) { 844 if (mid) {
782 struct pnfs_block_dev *dev; 845 struct pnfs_block_dev *dev, *tmp;
783 spin_lock(&mid->bm_lock); 846
784 while (!list_empty(&mid->bm_devlist)) { 847 /* No need to take bm_lock as we are last user freeing bm_devlist */
785 dev = list_first_entry(&mid->bm_devlist, 848 list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
786 struct pnfs_block_dev,
787 bm_node);
788 list_del(&dev->bm_node); 849 list_del(&dev->bm_node);
789 bl_free_block_dev(dev); 850 bl_free_block_dev(dev);
790 } 851 }
791 spin_unlock(&mid->bm_lock);
792 kfree(mid); 852 kfree(mid);
793 } 853 }
794} 854}
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 42acf7ef5992..e31a2df28e70 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -70,6 +70,7 @@ struct pnfs_inval_markings {
70 spinlock_t im_lock; 70 spinlock_t im_lock;
71 struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ 71 struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
72 sector_t im_block_size; /* Server blocksize in sectors */ 72 sector_t im_block_size; /* Server blocksize in sectors */
73 struct list_head im_extents; /* Short extents for INVAL->RW conversion */
73}; 74};
74 75
75struct pnfs_inval_tracking { 76struct pnfs_inval_tracking {
@@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
105{ 106{
106 spin_lock_init(&marks->im_lock); 107 spin_lock_init(&marks->im_lock);
107 INIT_LIST_HEAD(&marks->im_tree.mtt_stub); 108 INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
109 INIT_LIST_HEAD(&marks->im_extents);
108 marks->im_block_size = blocksize; 110 marks->im_block_size = blocksize;
109 marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, 111 marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
110 blocksize); 112 blocksize);
@@ -186,8 +188,7 @@ struct pnfs_block_extent *
186bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, 188bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
187 struct pnfs_block_extent **cow_read); 189 struct pnfs_block_extent **cow_read);
188int bl_mark_sectors_init(struct pnfs_inval_markings *marks, 190int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
189 sector_t offset, sector_t length, 191 sector_t offset, sector_t length);
190 sector_t **pages);
191void bl_put_extent(struct pnfs_block_extent *be); 192void bl_put_extent(struct pnfs_block_extent *be);
192struct pnfs_block_extent *bl_alloc_extent(void); 193struct pnfs_block_extent *bl_alloc_extent(void);
193int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); 194int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
@@ -200,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
200int bl_add_merge_extent(struct pnfs_block_layout *bl, 201int bl_add_merge_extent(struct pnfs_block_layout *bl,
201 struct pnfs_block_extent *new); 202 struct pnfs_block_extent *new);
202int bl_mark_for_commit(struct pnfs_block_extent *be, 203int bl_mark_for_commit(struct pnfs_block_extent *be,
203 sector_t offset, sector_t length); 204 sector_t offset, sector_t length,
205 struct pnfs_block_short_extent *new);
206int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
207struct pnfs_block_short_extent *
208bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
209void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
204 210
205#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ 211#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 19fa7b0b8c00..1abac09f7cd5 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
110 return 0; 110 return 0;
111 } else { 111 } else {
112 struct pnfs_inval_tracking *new; 112 struct pnfs_inval_tracking *new;
113 if (storage) 113 new = storage;
114 new = storage;
115 else {
116 new = kmalloc(sizeof(*new), GFP_NOFS);
117 if (!new)
118 return -ENOMEM;
119 }
120 new->it_sector = s; 114 new->it_sector = s;
121 new->it_tags = (1 << tag); 115 new->it_tags = (1 << tag);
122 list_add(&new->it_link, &pos->it_link); 116 list_add(&new->it_link, &pos->it_link);
@@ -139,11 +133,13 @@ static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length)
139} 133}
140 134
141/* Ensure that future operations on given range of tree will not malloc */ 135/* Ensure that future operations on given range of tree will not malloc */
142static int _preload_range(struct my_tree *tree, u64 offset, u64 length) 136static int _preload_range(struct pnfs_inval_markings *marks,
137 u64 offset, u64 length)
143{ 138{
144 u64 start, end, s; 139 u64 start, end, s;
145 int count, i, used = 0, status = -ENOMEM; 140 int count, i, used = 0, status = -ENOMEM;
146 struct pnfs_inval_tracking **storage; 141 struct pnfs_inval_tracking **storage;
142 struct my_tree *tree = &marks->im_tree;
147 143
148 dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); 144 dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
149 start = normalize(offset, tree->mtt_step_size); 145 start = normalize(offset, tree->mtt_step_size);
@@ -161,12 +157,11 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
161 goto out_cleanup; 157 goto out_cleanup;
162 } 158 }
163 159
164 /* Now need lock - HOW??? */ 160 spin_lock_bh(&marks->im_lock);
165
166 for (s = start; s < end; s += tree->mtt_step_size) 161 for (s = start; s < end; s += tree->mtt_step_size)
167 used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); 162 used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
163 spin_unlock_bh(&marks->im_lock);
168 164
169 /* Unlock - HOW??? */
170 status = 0; 165 status = 0;
171 166
172 out_cleanup: 167 out_cleanup:
@@ -179,41 +174,14 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
179 return status; 174 return status;
180} 175}
181 176
182static void set_needs_init(sector_t *array, sector_t offset)
183{
184 sector_t *p = array;
185
186 dprintk("%s enter\n", __func__);
187 if (!p)
188 return;
189 while (*p < offset)
190 p++;
191 if (*p == offset)
192 return;
193 else if (*p == ~0) {
194 *p++ = offset;
195 *p = ~0;
196 return;
197 } else {
198 sector_t *save = p;
199 dprintk("%s Adding %llu\n", __func__, (u64)offset);
200 while (*p != ~0)
201 p++;
202 p++;
203 memmove(save + 1, save, (char *)p - (char *)save);
204 *save = offset;
205 return;
206 }
207}
208
209/* We are relying on page lock to serialize this */ 177/* We are relying on page lock to serialize this */
210int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) 178int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
211{ 179{
212 int rv; 180 int rv;
213 181
214 spin_lock(&marks->im_lock); 182 spin_lock_bh(&marks->im_lock);
215 rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); 183 rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
216 spin_unlock(&marks->im_lock); 184 spin_unlock_bh(&marks->im_lock);
217 return rv; 185 return rv;
218} 186}
219 187
@@ -253,78 +221,39 @@ static int is_range_written(struct pnfs_inval_markings *marks,
253{ 221{
254 int rv; 222 int rv;
255 223
256 spin_lock(&marks->im_lock); 224 spin_lock_bh(&marks->im_lock);
257 rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); 225 rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
258 spin_unlock(&marks->im_lock); 226 spin_unlock_bh(&marks->im_lock);
259 return rv; 227 return rv;
260} 228}
261 229
262/* Marks sectors in [offest, offset_length) as having been initialized. 230/* Marks sectors in [offest, offset_length) as having been initialized.
263 * All lengths are step-aligned, where step is min(pagesize, blocksize). 231 * All lengths are step-aligned, where step is min(pagesize, blocksize).
264 * Notes where partial block is initialized, and helps prepare it for 232 * Currently assumes offset is page-aligned
265 * complete initialization later.
266 */ 233 */
267/* Currently assumes offset is page-aligned */
268int bl_mark_sectors_init(struct pnfs_inval_markings *marks, 234int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
269 sector_t offset, sector_t length, 235 sector_t offset, sector_t length)
270 sector_t **pages)
271{ 236{
272 sector_t s, start, end; 237 sector_t start, end;
273 sector_t *array = NULL; /* Pages to mark */
274 238
275 dprintk("%s(offset=%llu,len=%llu) enter\n", 239 dprintk("%s(offset=%llu,len=%llu) enter\n",
276 __func__, (u64)offset, (u64)length); 240 __func__, (u64)offset, (u64)length);
277 s = max((sector_t) 3,
278 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
279 dprintk("%s set max=%llu\n", __func__, (u64)s);
280 if (pages) {
281 array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
282 if (!array)
283 goto outerr;
284 array[0] = ~0;
285 }
286 241
287 start = normalize(offset, marks->im_block_size); 242 start = normalize(offset, marks->im_block_size);
288 end = normalize_up(offset + length, marks->im_block_size); 243 end = normalize_up(offset + length, marks->im_block_size);
289 if (_preload_range(&marks->im_tree, start, end - start)) 244 if (_preload_range(marks, start, end - start))
290 goto outerr; 245 goto outerr;
291 246
292 spin_lock(&marks->im_lock); 247 spin_lock_bh(&marks->im_lock);
293
294 for (s = normalize_up(start, PAGE_CACHE_SECTORS);
295 s < offset; s += PAGE_CACHE_SECTORS) {
296 dprintk("%s pre-area pages\n", __func__);
297 /* Portion of used block is not initialized */
298 if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
299 set_needs_init(array, s);
300 }
301 if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) 248 if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
302 goto out_unlock; 249 goto out_unlock;
303 for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); 250 spin_unlock_bh(&marks->im_lock);
304 s < end; s += PAGE_CACHE_SECTORS) {
305 dprintk("%s post-area pages\n", __func__);
306 if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
307 set_needs_init(array, s);
308 }
309
310 spin_unlock(&marks->im_lock);
311 251
312 if (pages) {
313 if (array[0] == ~0) {
314 kfree(array);
315 *pages = NULL;
316 } else
317 *pages = array;
318 }
319 return 0; 252 return 0;
320 253
321 out_unlock: 254out_unlock:
322 spin_unlock(&marks->im_lock); 255 spin_unlock_bh(&marks->im_lock);
323 outerr: 256outerr:
324 if (pages) {
325 kfree(array);
326 *pages = NULL;
327 }
328 return -ENOMEM; 257 return -ENOMEM;
329} 258}
330 259
@@ -338,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks,
338 267
339 dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, 268 dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
340 (u64)offset, (u64)length); 269 (u64)offset, (u64)length);
341 spin_lock(&marks->im_lock); 270 spin_lock_bh(&marks->im_lock);
342 status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); 271 status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
343 spin_unlock(&marks->im_lock); 272 spin_unlock_bh(&marks->im_lock);
344 return status; 273 return status;
345} 274}
346 275
@@ -440,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
440 369
441/* Note the range described by offset, length is guaranteed to be contained 370/* Note the range described by offset, length is guaranteed to be contained
442 * within be. 371 * within be.
372 * new will be freed, either by this function or add_to_commitlist if they
373 * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
443 */ 374 */
444int bl_mark_for_commit(struct pnfs_block_extent *be, 375int bl_mark_for_commit(struct pnfs_block_extent *be,
445 sector_t offset, sector_t length) 376 sector_t offset, sector_t length,
377 struct pnfs_block_short_extent *new)
446{ 378{
447 sector_t new_end, end = offset + length; 379 sector_t new_end, end = offset + length;
448 struct pnfs_block_short_extent *new;
449 struct pnfs_block_layout *bl = container_of(be->be_inval, 380 struct pnfs_block_layout *bl = container_of(be->be_inval,
450 struct pnfs_block_layout, 381 struct pnfs_block_layout,
451 bl_inval); 382 bl_inval);
452 383
453 new = kmalloc(sizeof(*new), GFP_NOFS);
454 if (!new)
455 return -ENOMEM;
456
457 mark_written_sectors(be->be_inval, offset, length); 384 mark_written_sectors(be->be_inval, offset, length);
458 /* We want to add the range to commit list, but it must be 385 /* We want to add the range to commit list, but it must be
459 * block-normalized, and verified that the normalized range has 386 * block-normalized, and verified that the normalized range has
@@ -483,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
483 new->bse_mdev = be->be_mdev; 410 new->bse_mdev = be->be_mdev;
484 411
485 spin_lock(&bl->bl_ext_lock); 412 spin_lock(&bl->bl_ext_lock);
486 /* new will be freed, either by add_to_commitlist if it decides not
487 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
488 */
489 add_to_commitlist(bl, new); 413 add_to_commitlist(bl, new);
490 spin_unlock(&bl->bl_ext_lock); 414 spin_unlock(&bl->bl_ext_lock);
491 return 0; 415 return 0;
@@ -933,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
933 } 857 }
934 } 858 }
935} 859}
860
861int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
862{
863 struct pnfs_block_short_extent *new;
864
865 new = kmalloc(sizeof(*new), GFP_NOFS);
866 if (unlikely(!new))
867 return -ENOMEM;
868
869 spin_lock_bh(&marks->im_lock);
870 list_add(&new->bse_node, &marks->im_extents);
871 spin_unlock_bh(&marks->im_lock);
872
873 return 0;
874}
875
876struct pnfs_block_short_extent *
877bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
878{
879 struct pnfs_block_short_extent *rv = NULL;
880
881 spin_lock_bh(&marks->im_lock);
882 if (!list_empty(&marks->im_extents)) {
883 rv = list_entry((&marks->im_extents)->next,
884 struct pnfs_block_short_extent, bse_node);
885 list_del_init(&rv->bse_node);
886 }
887 spin_unlock_bh(&marks->im_lock);
888
889 return rv;
890}
891
892void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
893{
894 struct pnfs_block_short_extent *se = NULL, *tmp;
895
896 if (num_to_free <= 0)
897 return;
898
899 spin_lock(&marks->im_lock);
900 list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
901 list_del(&se->bse_node);
902 kfree(se);
903 if (--num_to_free == 0)
904 break;
905 }
906 spin_unlock(&marks->im_lock);
907
908 BUG_ON(num_to_free > 0);
909}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 07df5f1d85e5..c89d3b9e483c 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -162,7 +162,7 @@ struct cb_layoutrecallargs {
162 }; 162 };
163}; 163};
164 164
165extern unsigned nfs4_callback_layoutrecall( 165extern __be32 nfs4_callback_layoutrecall(
166 struct cb_layoutrecallargs *args, 166 struct cb_layoutrecallargs *args,
167 void *dummy, struct cb_process_state *cps); 167 void *dummy, struct cb_process_state *cps);
168 168
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 726e59a9e50f..d50b2742f23b 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -305,6 +305,10 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
305 n = ntohl(*p++); 305 n = ntohl(*p++);
306 if (n <= 0) 306 if (n <= 0)
307 goto out; 307 goto out;
308 if (n > ULONG_MAX / sizeof(*args->devs)) {
309 status = htonl(NFS4ERR_BADXDR);
310 goto out;
311 }
308 312
309 args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); 313 args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL);
310 if (!args->devs) { 314 if (!args->devs) {
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index ed388aae9689..8ae91908f5aa 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -382,7 +382,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
382{ 382{
383 struct nfs4_pnfs_ds_addr *da = NULL; 383 struct nfs4_pnfs_ds_addr *da = NULL;
384 char *buf, *portstr; 384 char *buf, *portstr;
385 u32 port; 385 __be16 port;
386 int nlen, rlen; 386 int nlen, rlen;
387 int tmp[2]; 387 int tmp[2];
388 __be32 *p; 388 __be32 *p;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 75366dc89686..f0c849c98fe4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3587,7 +3587,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3587 res.acl_flags |= NFS4_ACL_LEN_REQUEST; 3587 res.acl_flags |= NFS4_ACL_LEN_REQUEST;
3588 resp_buf = page_address(pages[0]); 3588 resp_buf = page_address(pages[0]);
3589 3589
3590 dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", 3590 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
3591 __func__, buf, buflen, npages, args.acl_len); 3591 __func__, buf, buflen, npages, args.acl_len);
3592 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), 3592 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
3593 &msg, &args.seq_args, &res.seq_res, 0); 3593 &msg, &args.seq_args, &res.seq_res, 0);