diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2008-06-11 12:21:19 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2008-07-15 18:10:51 -0400 |
commit | a3d01454bc58b5a211ef64a7670572a40b71e682 (patch) | |
tree | 68c1ba383fb2c6702a8cc02bc81d51da6fb2920b | |
parent | 1b83d707032a1be40a60ed0a9bd841662cc04a5d (diff) |
NFS: Remove BKL requirement from attribute updates
The main problem is dealing with inode->i_size: we need to set the
inode->i_lock on all attribute updates, and so vmtruncate won't cut it.
Make an NFS-private version of vmtruncate that has the necessary locking
semantics.
The result should be that the following inode attribute updates are
protected by inode->i_lock
nfsi->cache_validity
nfsi->read_cache_jiffies
nfsi->attrtimeo
nfsi->attrtimeo_timestamp
nfsi->change_attr
nfsi->last_updated
nfsi->cache_change_attribute
nfsi->access_cache
nfsi->access_cache_entry_lru
nfsi->access_cache_inode_lru
nfsi->acl_access
nfsi->acl_default
nfsi->nfs_page_tree
nfsi->ncommit
nfsi->npages
nfsi->open_files
nfsi->silly_list
nfsi->acl
nfsi->open_states
inode->i_size
inode->i_atime
inode->i_mtime
inode->i_ctime
inode->i_nlink
inode->i_uid
inode->i_gid
The following is protected by dir->i_mutex
nfsi->cookieverf
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | fs/nfs/inode.c | 67 | ||||
-rw-r--r-- | fs/nfs/write.c | 15 |
2 files changed, 71 insertions, 11 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2c23d067e2a6..3adabd154779 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -389,6 +389,62 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
389 | } | 389 | } |
390 | 390 | ||
391 | /** | 391 | /** |
392 | * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall | ||
393 | * @inode: inode of the file used | ||
394 | * @offset: file offset to start truncating | ||
395 | * | ||
396 | * This is a copy of the common vmtruncate, but with the locking | ||
397 | * corrected to take into account the fact that NFS requires | ||
398 | * inode->i_size to be updated under the inode->i_lock. | ||
399 | */ | ||
400 | static int nfs_vmtruncate(struct inode * inode, loff_t offset) | ||
401 | { | ||
402 | if (i_size_read(inode) < offset) { | ||
403 | unsigned long limit; | ||
404 | |||
405 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
406 | if (limit != RLIM_INFINITY && offset > limit) | ||
407 | goto out_sig; | ||
408 | if (offset > inode->i_sb->s_maxbytes) | ||
409 | goto out_big; | ||
410 | spin_lock(&inode->i_lock); | ||
411 | i_size_write(inode, offset); | ||
412 | spin_unlock(&inode->i_lock); | ||
413 | } else { | ||
414 | struct address_space *mapping = inode->i_mapping; | ||
415 | |||
416 | /* | ||
417 | * truncation of in-use swapfiles is disallowed - it would | ||
418 | * cause subsequent swapout to scribble on the now-freed | ||
419 | * blocks. | ||
420 | */ | ||
421 | if (IS_SWAPFILE(inode)) | ||
422 | return -ETXTBSY; | ||
423 | spin_lock(&inode->i_lock); | ||
424 | i_size_write(inode, offset); | ||
425 | spin_unlock(&inode->i_lock); | ||
426 | |||
427 | /* | ||
428 | * unmap_mapping_range is called twice, first simply for | ||
429 | * efficiency so that truncate_inode_pages does fewer | ||
430 | * single-page unmaps. However after this first call, and | ||
431 | * before truncate_inode_pages finishes, it is possible for | ||
432 | * private pages to be COWed, which remain after | ||
433 | * truncate_inode_pages finishes, hence the second | ||
434 | * unmap_mapping_range call must be made for correctness. | ||
435 | */ | ||
436 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
437 | truncate_inode_pages(mapping, offset); | ||
438 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
439 | } | ||
440 | return 0; | ||
441 | out_sig: | ||
442 | send_sig(SIGXFSZ, current, 0); | ||
443 | out_big: | ||
444 | return -EFBIG; | ||
445 | } | ||
446 | |||
447 | /** | ||
392 | * nfs_setattr_update_inode - Update inode metadata after a setattr call. | 448 | * nfs_setattr_update_inode - Update inode metadata after a setattr call. |
393 | * @inode: pointer to struct inode | 449 | * @inode: pointer to struct inode |
394 | * @attr: pointer to struct iattr | 450 | * @attr: pointer to struct iattr |
@@ -414,8 +470,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
414 | } | 470 | } |
415 | if ((attr->ia_valid & ATTR_SIZE) != 0) { | 471 | if ((attr->ia_valid & ATTR_SIZE) != 0) { |
416 | nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); | 472 | nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); |
417 | inode->i_size = attr->ia_size; | 473 | nfs_vmtruncate(inode, attr->ia_size); |
418 | vmtruncate(inode, attr->ia_size); | ||
419 | } | 474 | } |
420 | } | 475 | } |
421 | 476 | ||
@@ -829,9 +884,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
829 | if (S_ISDIR(inode->i_mode)) | 884 | if (S_ISDIR(inode->i_mode)) |
830 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 885 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
831 | } | 886 | } |
832 | if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) && | 887 | if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && |
833 | nfsi->npages == 0) | 888 | nfsi->npages == 0) |
834 | inode->i_size = nfs_size_to_loff_t(fattr->size); | 889 | i_size_write(inode, nfs_size_to_loff_t(fattr->size)); |
835 | } | 890 | } |
836 | } | 891 | } |
837 | 892 | ||
@@ -972,7 +1027,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
972 | (fattr->valid & NFS_ATTR_WCC) == 0) { | 1027 | (fattr->valid & NFS_ATTR_WCC) == 0) { |
973 | memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); | 1028 | memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); |
974 | memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); | 1029 | memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); |
975 | fattr->pre_size = inode->i_size; | 1030 | fattr->pre_size = i_size_read(inode); |
976 | fattr->valid |= NFS_ATTR_WCC; | 1031 | fattr->valid |= NFS_ATTR_WCC; |
977 | } | 1032 | } |
978 | return nfs_post_op_update_inode(inode, fattr); | 1033 | return nfs_post_op_update_inode(inode, fattr); |
@@ -1057,7 +1112,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1057 | /* Do we perhaps have any outstanding writes, or has | 1112 | /* Do we perhaps have any outstanding writes, or has |
1058 | * the file grown beyond our last write? */ | 1113 | * the file grown beyond our last write? */ |
1059 | if (nfsi->npages == 0 || new_isize > cur_isize) { | 1114 | if (nfsi->npages == 0 || new_isize > cur_isize) { |
1060 | inode->i_size = new_isize; | 1115 | i_size_write(inode, new_isize); |
1061 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1116 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
1062 | } | 1117 | } |
1063 | dprintk("NFS: isize change on server for file %s/%ld\n", | 1118 | dprintk("NFS: isize change on server for file %s/%ld\n", |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index feca8c648766..3229e217c773 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -133,16 +133,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page) | |||
133 | static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) | 133 | static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) |
134 | { | 134 | { |
135 | struct inode *inode = page->mapping->host; | 135 | struct inode *inode = page->mapping->host; |
136 | loff_t end, i_size = i_size_read(inode); | 136 | loff_t end, i_size; |
137 | pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | 137 | pgoff_t end_index; |
138 | 138 | ||
139 | spin_lock(&inode->i_lock); | ||
140 | i_size = i_size_read(inode); | ||
141 | end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | ||
139 | if (i_size > 0 && page->index < end_index) | 142 | if (i_size > 0 && page->index < end_index) |
140 | return; | 143 | goto out; |
141 | end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); | 144 | end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); |
142 | if (i_size >= end) | 145 | if (i_size >= end) |
143 | return; | 146 | goto out; |
144 | nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); | ||
145 | i_size_write(inode, end); | 147 | i_size_write(inode, end); |
148 | nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); | ||
149 | out: | ||
150 | spin_unlock(&inode->i_lock); | ||
146 | } | 151 | } |
147 | 152 | ||
148 | /* A writeback failed: mark the page as bad, and invalidate the page cache */ | 153 | /* A writeback failed: mark the page as bad, and invalidate the page cache */ |