diff options
Diffstat (limited to 'fs')
57 files changed, 937 insertions, 412 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index fc06fd27065e..dd6f7ee1e312 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
610 | p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", | 610 | p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", |
611 | page, (unsigned long)filp->private_data); | 611 | page, (unsigned long)filp->private_data); |
612 | 612 | ||
613 | /* Update file times before taking page lock */ | ||
614 | file_update_time(filp); | ||
615 | |||
613 | v9inode = V9FS_I(inode); | 616 | v9inode = V9FS_I(inode); |
614 | /* make sure the cache has finished storing the page */ | 617 | /* make sure the cache has finished storing the page */ |
615 | v9fs_fscache_wait_on_page_write(inode, page); | 618 | v9fs_fscache_wait_on_page_write(inode, page); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fadeba6a5db9..62e0cafd6e25 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1614,8 +1614,6 @@ static int cleaner_kthread(void *arg) | |||
1614 | struct btrfs_root *root = arg; | 1614 | struct btrfs_root *root = arg; |
1615 | 1615 | ||
1616 | do { | 1616 | do { |
1617 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | ||
1618 | |||
1619 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1617 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
1620 | mutex_trylock(&root->fs_info->cleaner_mutex)) { | 1618 | mutex_trylock(&root->fs_info->cleaner_mutex)) { |
1621 | btrfs_run_delayed_iputs(root); | 1619 | btrfs_run_delayed_iputs(root); |
@@ -1647,7 +1645,6 @@ static int transaction_kthread(void *arg) | |||
1647 | do { | 1645 | do { |
1648 | cannot_commit = false; | 1646 | cannot_commit = false; |
1649 | delay = HZ * 30; | 1647 | delay = HZ * 30; |
1650 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | ||
1651 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1648 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1652 | 1649 | ||
1653 | spin_lock(&root->fs_info->trans_lock); | 1650 | spin_lock(&root->fs_info->trans_lock); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9aa01ec2138d..5caf285c6e4d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1379 | ssize_t err = 0; | 1379 | ssize_t err = 0; |
1380 | size_t count, ocount; | 1380 | size_t count, ocount; |
1381 | 1381 | ||
1382 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 1382 | sb_start_write(inode->i_sb); |
1383 | 1383 | ||
1384 | mutex_lock(&inode->i_mutex); | 1384 | mutex_lock(&inode->i_mutex); |
1385 | 1385 | ||
@@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1469 | num_written = err; | 1469 | num_written = err; |
1470 | } | 1470 | } |
1471 | out: | 1471 | out: |
1472 | sb_end_write(inode->i_sb); | ||
1472 | current->backing_dev_info = NULL; | 1473 | current->backing_dev_info = NULL; |
1473 | return num_written ? num_written : err; | 1474 | return num_written ? num_written : err; |
1474 | } | 1475 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 48bdfd2591c2..83baec24946d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -6629,6 +6629,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6629 | u64 page_start; | 6629 | u64 page_start; |
6630 | u64 page_end; | 6630 | u64 page_end; |
6631 | 6631 | ||
6632 | sb_start_pagefault(inode->i_sb); | ||
6632 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 6633 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
6633 | if (!ret) { | 6634 | if (!ret) { |
6634 | ret = file_update_time(vma->vm_file); | 6635 | ret = file_update_time(vma->vm_file); |
@@ -6718,12 +6719,15 @@ again: | |||
6718 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6719 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
6719 | 6720 | ||
6720 | out_unlock: | 6721 | out_unlock: |
6721 | if (!ret) | 6722 | if (!ret) { |
6723 | sb_end_pagefault(inode->i_sb); | ||
6722 | return VM_FAULT_LOCKED; | 6724 | return VM_FAULT_LOCKED; |
6725 | } | ||
6723 | unlock_page(page); | 6726 | unlock_page(page); |
6724 | out: | 6727 | out: |
6725 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 6728 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
6726 | out_noreserve: | 6729 | out_noreserve: |
6730 | sb_end_pagefault(inode->i_sb); | ||
6727 | return ret; | 6731 | return ret; |
6728 | } | 6732 | } |
6729 | 6733 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 43f0012016e3..bc2f6ffff3cf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -195,6 +195,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
195 | if (!inode_owner_or_capable(inode)) | 195 | if (!inode_owner_or_capable(inode)) |
196 | return -EACCES; | 196 | return -EACCES; |
197 | 197 | ||
198 | ret = mnt_want_write_file(file); | ||
199 | if (ret) | ||
200 | return ret; | ||
201 | |||
198 | mutex_lock(&inode->i_mutex); | 202 | mutex_lock(&inode->i_mutex); |
199 | 203 | ||
200 | ip_oldflags = ip->flags; | 204 | ip_oldflags = ip->flags; |
@@ -209,10 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
209 | } | 213 | } |
210 | } | 214 | } |
211 | 215 | ||
212 | ret = mnt_want_write_file(file); | ||
213 | if (ret) | ||
214 | goto out_unlock; | ||
215 | |||
216 | if (flags & FS_SYNC_FL) | 216 | if (flags & FS_SYNC_FL) |
217 | ip->flags |= BTRFS_INODE_SYNC; | 217 | ip->flags |= BTRFS_INODE_SYNC; |
218 | else | 218 | else |
@@ -275,9 +275,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
275 | inode->i_flags = i_oldflags; | 275 | inode->i_flags = i_oldflags; |
276 | } | 276 | } |
277 | 277 | ||
278 | mnt_drop_write_file(file); | ||
279 | out_unlock: | 278 | out_unlock: |
280 | mutex_unlock(&inode->i_mutex); | 279 | mutex_unlock(&inode->i_mutex); |
280 | mnt_drop_write_file(file); | ||
281 | return ret; | 281 | return ret; |
282 | } | 282 | } |
283 | 283 | ||
@@ -664,6 +664,10 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
664 | struct dentry *dentry; | 664 | struct dentry *dentry; |
665 | int error; | 665 | int error; |
666 | 666 | ||
667 | error = mnt_want_write(parent->mnt); | ||
668 | if (error) | ||
669 | return error; | ||
670 | |||
667 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 671 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
668 | 672 | ||
669 | dentry = lookup_one_len(name, parent->dentry, namelen); | 673 | dentry = lookup_one_len(name, parent->dentry, namelen); |
@@ -699,6 +703,7 @@ out_dput: | |||
699 | dput(dentry); | 703 | dput(dentry); |
700 | out_unlock: | 704 | out_unlock: |
701 | mutex_unlock(&dir->i_mutex); | 705 | mutex_unlock(&dir->i_mutex); |
706 | mnt_drop_write(parent->mnt); | ||
702 | return error; | 707 | return error; |
703 | } | 708 | } |
704 | 709 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7ac7cdcc294e..17be3dedacba 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -335,6 +335,8 @@ again: | |||
335 | if (!h) | 335 | if (!h) |
336 | return ERR_PTR(-ENOMEM); | 336 | return ERR_PTR(-ENOMEM); |
337 | 337 | ||
338 | sb_start_intwrite(root->fs_info->sb); | ||
339 | |||
338 | if (may_wait_transaction(root, type)) | 340 | if (may_wait_transaction(root, type)) |
339 | wait_current_trans(root); | 341 | wait_current_trans(root); |
340 | 342 | ||
@@ -345,6 +347,7 @@ again: | |||
345 | } while (ret == -EBUSY); | 347 | } while (ret == -EBUSY); |
346 | 348 | ||
347 | if (ret < 0) { | 349 | if (ret < 0) { |
350 | sb_end_intwrite(root->fs_info->sb); | ||
348 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 351 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
349 | return ERR_PTR(ret); | 352 | return ERR_PTR(ret); |
350 | } | 353 | } |
@@ -548,6 +551,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
548 | btrfs_trans_release_metadata(trans, root); | 551 | btrfs_trans_release_metadata(trans, root); |
549 | trans->block_rsv = NULL; | 552 | trans->block_rsv = NULL; |
550 | 553 | ||
554 | sb_end_intwrite(root->fs_info->sb); | ||
555 | |||
551 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 556 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
552 | should_end_transaction(trans, root)) { | 557 | should_end_transaction(trans, root)) { |
553 | trans->transaction->blocked = 1; | 558 | trans->transaction->blocked = 1; |
@@ -1578,6 +1583,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1578 | put_transaction(cur_trans); | 1583 | put_transaction(cur_trans); |
1579 | put_transaction(cur_trans); | 1584 | put_transaction(cur_trans); |
1580 | 1585 | ||
1586 | sb_end_intwrite(root->fs_info->sb); | ||
1587 | |||
1581 | trace_btrfs_transaction_commit(root); | 1588 | trace_btrfs_transaction_commit(root); |
1582 | 1589 | ||
1583 | btrfs_scrub_continue(root); | 1590 | btrfs_scrub_continue(root); |
diff --git a/fs/buffer.c b/fs/buffer.c index c7062c896d7c..9f6d2e41281d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write); | |||
2306 | * beyond EOF, then the page is guaranteed safe against truncation until we | 2306 | * beyond EOF, then the page is guaranteed safe against truncation until we |
2307 | * unlock the page. | 2307 | * unlock the page. |
2308 | * | 2308 | * |
2309 | * Direct callers of this function should call vfs_check_frozen() so that page | 2309 | * Direct callers of this function should protect against filesystem freezing |
2310 | * fault does not busyloop until the fs is thawed. | 2310 | * using sb_start_write() - sb_end_write() functions. |
2311 | */ | 2311 | */ |
2312 | int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | 2312 | int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, |
2313 | get_block_t get_block) | 2313 | get_block_t get_block) |
@@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2318 | loff_t size; | 2318 | loff_t size; |
2319 | int ret; | 2319 | int ret; |
2320 | 2320 | ||
2321 | /* | ||
2322 | * Update file times before taking page lock. We may end up failing the | ||
2323 | * fault so this update may be superfluous but who really cares... | ||
2324 | */ | ||
2325 | file_update_time(vma->vm_file); | ||
2326 | |||
2321 | lock_page(page); | 2327 | lock_page(page); |
2322 | size = i_size_read(inode); | 2328 | size = i_size_read(inode); |
2323 | if ((page->mapping != inode->i_mapping) || | 2329 | if ((page->mapping != inode->i_mapping) || |
@@ -2339,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2339 | 2345 | ||
2340 | if (unlikely(ret < 0)) | 2346 | if (unlikely(ret < 0)) |
2341 | goto out_unlock; | 2347 | goto out_unlock; |
2342 | /* | ||
2343 | * Freezing in progress? We check after the page is marked dirty and | ||
2344 | * with page lock held so if the test here fails, we are sure freezing | ||
2345 | * code will wait during syncing until the page fault is done - at that | ||
2346 | * point page will be dirty and unlocked so freezing code will write it | ||
2347 | * and writeprotect it again. | ||
2348 | */ | ||
2349 | set_page_dirty(page); | 2348 | set_page_dirty(page); |
2350 | if (inode->i_sb->s_frozen != SB_UNFROZEN) { | ||
2351 | ret = -EAGAIN; | ||
2352 | goto out_unlock; | ||
2353 | } | ||
2354 | wait_on_page_writeback(page); | 2349 | wait_on_page_writeback(page); |
2355 | return 0; | 2350 | return 0; |
2356 | out_unlock: | 2351 | out_unlock: |
@@ -2365,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2365 | int ret; | 2360 | int ret; |
2366 | struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; | 2361 | struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; |
2367 | 2362 | ||
2368 | /* | 2363 | sb_start_pagefault(sb); |
2369 | * This check is racy but catches the common case. The check in | ||
2370 | * __block_page_mkwrite() is reliable. | ||
2371 | */ | ||
2372 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | ||
2373 | ret = __block_page_mkwrite(vma, vmf, get_block); | 2364 | ret = __block_page_mkwrite(vma, vmf, get_block); |
2365 | sb_end_pagefault(sb); | ||
2374 | return block_page_mkwrite_return(ret); | 2366 | return block_page_mkwrite_return(ret); |
2375 | } | 2367 | } |
2376 | EXPORT_SYMBOL(block_page_mkwrite); | 2368 | EXPORT_SYMBOL(block_page_mkwrite); |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8b67304e4b80..452e71a1b753 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1184 | loff_t size, len; | 1184 | loff_t size, len; |
1185 | int ret; | 1185 | int ret; |
1186 | 1186 | ||
1187 | /* Update time before taking page lock */ | ||
1188 | file_update_time(vma->vm_file); | ||
1189 | |||
1187 | size = i_size_read(inode); | 1190 | size = i_size_read(inode); |
1188 | if (off + PAGE_CACHE_SIZE <= size) | 1191 | if (off + PAGE_CACHE_SIZE <= size) |
1189 | len = PAGE_CACHE_SIZE; | 1192 | len = PAGE_CACHE_SIZE; |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ffa2be57804d..c3ca12c33ca2 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -318,21 +318,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, | |||
318 | struct vfsmount *lower_mnt; | 318 | struct vfsmount *lower_mnt; |
319 | int rc = 0; | 319 | int rc = 0; |
320 | 320 | ||
321 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); | ||
322 | fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); | ||
323 | BUG_ON(!lower_dentry->d_count); | ||
324 | |||
325 | dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); | 321 | dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); |
326 | ecryptfs_set_dentry_private(dentry, dentry_info); | ||
327 | if (!dentry_info) { | 322 | if (!dentry_info) { |
328 | printk(KERN_ERR "%s: Out of memory whilst attempting " | 323 | printk(KERN_ERR "%s: Out of memory whilst attempting " |
329 | "to allocate ecryptfs_dentry_info struct\n", | 324 | "to allocate ecryptfs_dentry_info struct\n", |
330 | __func__); | 325 | __func__); |
331 | dput(lower_dentry); | 326 | dput(lower_dentry); |
332 | mntput(lower_mnt); | ||
333 | d_drop(dentry); | ||
334 | return -ENOMEM; | 327 | return -ENOMEM; |
335 | } | 328 | } |
329 | |||
330 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); | ||
331 | fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); | ||
332 | BUG_ON(!lower_dentry->d_count); | ||
333 | |||
334 | ecryptfs_set_dentry_private(dentry, dentry_info); | ||
336 | ecryptfs_set_dentry_lower(dentry, lower_dentry); | 335 | ecryptfs_set_dentry_lower(dentry, lower_dentry); |
337 | ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); | 336 | ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); |
338 | 337 | ||
@@ -381,12 +380,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
381 | struct dentry *lower_dir_dentry, *lower_dentry; | 380 | struct dentry *lower_dir_dentry, *lower_dentry; |
382 | int rc = 0; | 381 | int rc = 0; |
383 | 382 | ||
384 | if ((ecryptfs_dentry->d_name.len == 1 | ||
385 | && !strcmp(ecryptfs_dentry->d_name.name, ".")) | ||
386 | || (ecryptfs_dentry->d_name.len == 2 | ||
387 | && !strcmp(ecryptfs_dentry->d_name.name, ".."))) { | ||
388 | goto out_d_drop; | ||
389 | } | ||
390 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); | 383 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); |
391 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | 384 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); |
392 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, | 385 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, |
@@ -397,8 +390,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
397 | rc = PTR_ERR(lower_dentry); | 390 | rc = PTR_ERR(lower_dentry); |
398 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " | 391 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " |
399 | "[%d] on lower_dentry = [%s]\n", __func__, rc, | 392 | "[%d] on lower_dentry = [%s]\n", __func__, rc, |
400 | encrypted_and_encoded_name); | 393 | ecryptfs_dentry->d_name.name); |
401 | goto out_d_drop; | 394 | goto out; |
402 | } | 395 | } |
403 | if (lower_dentry->d_inode) | 396 | if (lower_dentry->d_inode) |
404 | goto interpose; | 397 | goto interpose; |
@@ -415,7 +408,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
415 | if (rc) { | 408 | if (rc) { |
416 | printk(KERN_ERR "%s: Error attempting to encrypt and encode " | 409 | printk(KERN_ERR "%s: Error attempting to encrypt and encode " |
417 | "filename; rc = [%d]\n", __func__, rc); | 410 | "filename; rc = [%d]\n", __func__, rc); |
418 | goto out_d_drop; | 411 | goto out; |
419 | } | 412 | } |
420 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | 413 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); |
421 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, | 414 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, |
@@ -427,14 +420,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
427 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " | 420 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " |
428 | "[%d] on lower_dentry = [%s]\n", __func__, rc, | 421 | "[%d] on lower_dentry = [%s]\n", __func__, rc, |
429 | encrypted_and_encoded_name); | 422 | encrypted_and_encoded_name); |
430 | goto out_d_drop; | 423 | goto out; |
431 | } | 424 | } |
432 | interpose: | 425 | interpose: |
433 | rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, | 426 | rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, |
434 | ecryptfs_dir_inode); | 427 | ecryptfs_dir_inode); |
435 | goto out; | ||
436 | out_d_drop: | ||
437 | d_drop(ecryptfs_dentry); | ||
438 | out: | 428 | out: |
439 | kfree(encrypted_and_encoded_name); | 429 | kfree(encrypted_and_encoded_name); |
440 | return ERR_PTR(rc); | 430 | return ERR_PTR(rc); |
@@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file) | |||
2069 | */ | 2069 | */ |
2070 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | 2070 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) |
2071 | { | 2071 | { |
2072 | struct file *rp, *wp; | 2072 | struct file *files[2]; |
2073 | struct fdtable *fdt; | 2073 | struct fdtable *fdt; |
2074 | struct coredump_params *cp = (struct coredump_params *)info->data; | 2074 | struct coredump_params *cp = (struct coredump_params *)info->data; |
2075 | struct files_struct *cf = current->files; | 2075 | struct files_struct *cf = current->files; |
2076 | int err = create_pipe_files(files, 0); | ||
2077 | if (err) | ||
2078 | return err; | ||
2076 | 2079 | ||
2077 | wp = create_write_pipe(0); | 2080 | cp->file = files[1]; |
2078 | if (IS_ERR(wp)) | ||
2079 | return PTR_ERR(wp); | ||
2080 | |||
2081 | rp = create_read_pipe(wp, 0); | ||
2082 | if (IS_ERR(rp)) { | ||
2083 | free_write_pipe(wp); | ||
2084 | return PTR_ERR(rp); | ||
2085 | } | ||
2086 | |||
2087 | cp->file = wp; | ||
2088 | 2081 | ||
2089 | sys_close(0); | 2082 | sys_close(0); |
2090 | fd_install(0, rp); | 2083 | fd_install(0, files[0]); |
2091 | spin_lock(&cf->file_lock); | 2084 | spin_lock(&cf->file_lock); |
2092 | fdt = files_fdtable(cf); | 2085 | fdt = files_fdtable(cf); |
2093 | __set_open_fd(0, fdt); | 2086 | __set_open_fd(0, fdt); |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 264d315f6c47..6363ac66fafa 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode) | |||
79 | truncate_inode_pages(&inode->i_data, 0); | 79 | truncate_inode_pages(&inode->i_data, 0); |
80 | 80 | ||
81 | if (want_delete) { | 81 | if (want_delete) { |
82 | sb_start_intwrite(inode->i_sb); | ||
82 | /* set dtime */ | 83 | /* set dtime */ |
83 | EXT2_I(inode)->i_dtime = get_seconds(); | 84 | EXT2_I(inode)->i_dtime = get_seconds(); |
84 | mark_inode_dirty(inode); | 85 | mark_inode_dirty(inode); |
@@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode) | |||
98 | if (unlikely(rsv)) | 99 | if (unlikely(rsv)) |
99 | kfree(rsv); | 100 | kfree(rsv); |
100 | 101 | ||
101 | if (want_delete) | 102 | if (want_delete) { |
102 | ext2_free_inode(inode); | 103 | ext2_free_inode(inode); |
104 | sb_end_intwrite(inode->i_sb); | ||
105 | } | ||
103 | } | 106 | } |
104 | 107 | ||
105 | typedef struct { | 108 | typedef struct { |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9f311d27b16f..af74d9e27b71 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb, | |||
42 | static int ext2_remount (struct super_block * sb, int * flags, char * data); | 42 | static int ext2_remount (struct super_block * sb, int * flags, char * data); |
43 | static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); | 43 | static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); |
44 | static int ext2_sync_fs(struct super_block *sb, int wait); | 44 | static int ext2_sync_fs(struct super_block *sb, int wait); |
45 | static int ext2_freeze(struct super_block *sb); | ||
46 | static int ext2_unfreeze(struct super_block *sb); | ||
45 | 47 | ||
46 | void ext2_error(struct super_block *sb, const char *function, | 48 | void ext2_error(struct super_block *sb, const char *function, |
47 | const char *fmt, ...) | 49 | const char *fmt, ...) |
@@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = { | |||
305 | .evict_inode = ext2_evict_inode, | 307 | .evict_inode = ext2_evict_inode, |
306 | .put_super = ext2_put_super, | 308 | .put_super = ext2_put_super, |
307 | .sync_fs = ext2_sync_fs, | 309 | .sync_fs = ext2_sync_fs, |
310 | .freeze_fs = ext2_freeze, | ||
311 | .unfreeze_fs = ext2_unfreeze, | ||
308 | .statfs = ext2_statfs, | 312 | .statfs = ext2_statfs, |
309 | .remount_fs = ext2_remount, | 313 | .remount_fs = ext2_remount, |
310 | .show_options = ext2_show_options, | 314 | .show_options = ext2_show_options, |
@@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait) | |||
1200 | return 0; | 1204 | return 0; |
1201 | } | 1205 | } |
1202 | 1206 | ||
1207 | static int ext2_freeze(struct super_block *sb) | ||
1208 | { | ||
1209 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
1210 | |||
1211 | /* | ||
1212 | * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared | ||
1213 | * because we have unattached inodes and thus filesystem is not fully | ||
1214 | * consistent. | ||
1215 | */ | ||
1216 | if (atomic_long_read(&sb->s_remove_count)) { | ||
1217 | ext2_sync_fs(sb, 1); | ||
1218 | return 0; | ||
1219 | } | ||
1220 | /* Set EXT2_FS_VALID flag */ | ||
1221 | spin_lock(&sbi->s_lock); | ||
1222 | sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state); | ||
1223 | spin_unlock(&sbi->s_lock); | ||
1224 | ext2_sync_super(sb, sbi->s_es, 1); | ||
1225 | |||
1226 | return 0; | ||
1227 | } | ||
1228 | |||
1229 | static int ext2_unfreeze(struct super_block *sb) | ||
1230 | { | ||
1231 | /* Just write sb to clear EXT2_VALID_FS flag */ | ||
1232 | ext2_write_super(sb); | ||
1233 | |||
1234 | return 0; | ||
1235 | } | ||
1203 | 1236 | ||
1204 | void ext2_write_super(struct super_block *sb) | 1237 | void ext2_write_super(struct super_block *sb) |
1205 | { | 1238 | { |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 89b59cb7f9b8..6324f74e0342 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode) | |||
233 | if (is_bad_inode(inode)) | 233 | if (is_bad_inode(inode)) |
234 | goto no_delete; | 234 | goto no_delete; |
235 | 235 | ||
236 | /* | ||
237 | * Protect us against freezing - iput() caller didn't have to have any | ||
238 | * protection against it | ||
239 | */ | ||
240 | sb_start_intwrite(inode->i_sb); | ||
236 | handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); | 241 | handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); |
237 | if (IS_ERR(handle)) { | 242 | if (IS_ERR(handle)) { |
238 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); | 243 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); |
@@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode) | |||
242 | * cleaned up. | 247 | * cleaned up. |
243 | */ | 248 | */ |
244 | ext4_orphan_del(NULL, inode); | 249 | ext4_orphan_del(NULL, inode); |
250 | sb_end_intwrite(inode->i_sb); | ||
245 | goto no_delete; | 251 | goto no_delete; |
246 | } | 252 | } |
247 | 253 | ||
@@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode) | |||
273 | stop_handle: | 279 | stop_handle: |
274 | ext4_journal_stop(handle); | 280 | ext4_journal_stop(handle); |
275 | ext4_orphan_del(NULL, inode); | 281 | ext4_orphan_del(NULL, inode); |
282 | sb_end_intwrite(inode->i_sb); | ||
276 | goto no_delete; | 283 | goto no_delete; |
277 | } | 284 | } |
278 | } | 285 | } |
@@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode) | |||
301 | else | 308 | else |
302 | ext4_free_inode(handle, inode); | 309 | ext4_free_inode(handle, inode); |
303 | ext4_journal_stop(handle); | 310 | ext4_journal_stop(handle); |
311 | sb_end_intwrite(inode->i_sb); | ||
304 | return; | 312 | return; |
305 | no_delete: | 313 | no_delete: |
306 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ | 314 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
@@ -4779,11 +4787,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
4779 | get_block_t *get_block; | 4787 | get_block_t *get_block; |
4780 | int retries = 0; | 4788 | int retries = 0; |
4781 | 4789 | ||
4782 | /* | 4790 | sb_start_pagefault(inode->i_sb); |
4783 | * This check is racy but catches the common case. We rely on | ||
4784 | * __block_page_mkwrite() to do a reliable check. | ||
4785 | */ | ||
4786 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
4787 | /* Delalloc case is easy... */ | 4791 | /* Delalloc case is easy... */ |
4788 | if (test_opt(inode->i_sb, DELALLOC) && | 4792 | if (test_opt(inode->i_sb, DELALLOC) && |
4789 | !ext4_should_journal_data(inode) && | 4793 | !ext4_should_journal_data(inode) && |
@@ -4851,5 +4855,6 @@ retry_alloc: | |||
4851 | out_ret: | 4855 | out_ret: |
4852 | ret = block_page_mkwrite_return(ret); | 4856 | ret = block_page_mkwrite_return(ret); |
4853 | out: | 4857 | out: |
4858 | sb_end_pagefault(inode->i_sb); | ||
4854 | return ret; | 4859 | return ret; |
4855 | } | 4860 | } |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index f99a1311e847..fe7c63f4717e 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
@@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) | |||
44 | { | 44 | { |
45 | struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); | 45 | struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); |
46 | 46 | ||
47 | /* | ||
48 | * We protect against freezing so that we don't create dirty buffers | ||
49 | * on frozen filesystem. | ||
50 | */ | ||
51 | sb_start_write(sb); | ||
47 | ext4_mmp_csum_set(sb, mmp); | 52 | ext4_mmp_csum_set(sb, mmp); |
48 | mark_buffer_dirty(bh); | 53 | mark_buffer_dirty(bh); |
49 | lock_buffer(bh); | 54 | lock_buffer(bh); |
@@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) | |||
51 | get_bh(bh); | 56 | get_bh(bh); |
52 | submit_bh(WRITE_SYNC, bh); | 57 | submit_bh(WRITE_SYNC, bh); |
53 | wait_on_buffer(bh); | 58 | wait_on_buffer(bh); |
59 | sb_end_write(sb); | ||
54 | if (unlikely(!buffer_uptodate(bh))) | 60 | if (unlikely(!buffer_uptodate(bh))) |
55 | return 1; | 61 | return 1; |
56 | 62 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2d51cd9af225..d76ec8277d3f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -331,33 +331,17 @@ static void ext4_put_nojournal(handle_t *handle) | |||
331 | * journal_end calls result in the superblock being marked dirty, so | 331 | * journal_end calls result in the superblock being marked dirty, so |
332 | * that sync() will call the filesystem's write_super callback if | 332 | * that sync() will call the filesystem's write_super callback if |
333 | * appropriate. | 333 | * appropriate. |
334 | * | ||
335 | * To avoid j_barrier hold in userspace when a user calls freeze(), | ||
336 | * ext4 prevents a new handle from being started by s_frozen, which | ||
337 | * is in an upper layer. | ||
338 | */ | 334 | */ |
339 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | 335 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) |
340 | { | 336 | { |
341 | journal_t *journal; | 337 | journal_t *journal; |
342 | handle_t *handle; | ||
343 | 338 | ||
344 | trace_ext4_journal_start(sb, nblocks, _RET_IP_); | 339 | trace_ext4_journal_start(sb, nblocks, _RET_IP_); |
345 | if (sb->s_flags & MS_RDONLY) | 340 | if (sb->s_flags & MS_RDONLY) |
346 | return ERR_PTR(-EROFS); | 341 | return ERR_PTR(-EROFS); |
347 | 342 | ||
343 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); | ||
348 | journal = EXT4_SB(sb)->s_journal; | 344 | journal = EXT4_SB(sb)->s_journal; |
349 | handle = ext4_journal_current_handle(); | ||
350 | |||
351 | /* | ||
352 | * If a handle has been started, it should be allowed to | ||
353 | * finish, otherwise deadlock could happen between freeze | ||
354 | * and others(e.g. truncate) due to the restart of the | ||
355 | * journal handle if the filesystem is forzen and active | ||
356 | * handles are not stopped. | ||
357 | */ | ||
358 | if (!handle) | ||
359 | vfs_check_frozen(sb, SB_FREEZE_TRANS); | ||
360 | |||
361 | if (!journal) | 345 | if (!journal) |
362 | return ext4_get_nojournal(); | 346 | return ext4_get_nojournal(); |
363 | /* | 347 | /* |
@@ -2747,6 +2731,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) | |||
2747 | sb = elr->lr_super; | 2731 | sb = elr->lr_super; |
2748 | ngroups = EXT4_SB(sb)->s_groups_count; | 2732 | ngroups = EXT4_SB(sb)->s_groups_count; |
2749 | 2733 | ||
2734 | sb_start_write(sb); | ||
2750 | for (group = elr->lr_next_group; group < ngroups; group++) { | 2735 | for (group = elr->lr_next_group; group < ngroups; group++) { |
2751 | gdp = ext4_get_group_desc(sb, group, NULL); | 2736 | gdp = ext4_get_group_desc(sb, group, NULL); |
2752 | if (!gdp) { | 2737 | if (!gdp) { |
@@ -2773,6 +2758,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) | |||
2773 | elr->lr_next_sched = jiffies + elr->lr_timeout; | 2758 | elr->lr_next_sched = jiffies + elr->lr_timeout; |
2774 | elr->lr_next_group = group + 1; | 2759 | elr->lr_next_group = group + 1; |
2775 | } | 2760 | } |
2761 | sb_end_write(sb); | ||
2776 | 2762 | ||
2777 | return ret; | 2763 | return ret; |
2778 | } | 2764 | } |
@@ -4460,10 +4446,8 @@ int ext4_force_commit(struct super_block *sb) | |||
4460 | return 0; | 4446 | return 0; |
4461 | 4447 | ||
4462 | journal = EXT4_SB(sb)->s_journal; | 4448 | journal = EXT4_SB(sb)->s_journal; |
4463 | if (journal) { | 4449 | if (journal) |
4464 | vfs_check_frozen(sb, SB_FREEZE_TRANS); | ||
4465 | ret = ext4_journal_force_commit(journal); | 4450 | ret = ext4_journal_force_commit(journal); |
4466 | } | ||
4467 | 4451 | ||
4468 | return ret; | 4452 | return ret; |
4469 | } | 4453 | } |
@@ -4493,9 +4477,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4493 | * gives us a chance to flush the journal completely and mark the fs clean. | 4477 | * gives us a chance to flush the journal completely and mark the fs clean. |
4494 | * | 4478 | * |
4495 | * Note that only this function cannot bring a filesystem to be in a clean | 4479 | * Note that only this function cannot bring a filesystem to be in a clean |
4496 | * state independently, because ext4 prevents a new handle from being started | 4480 | * state independently. It relies on upper layer to stop all data & metadata |
4497 | * by @sb->s_frozen, which stays in an upper layer. It thus needs help from | 4481 | * modifications. |
4498 | * the upper layer. | ||
4499 | */ | 4482 | */ |
4500 | static int ext4_freeze(struct super_block *sb) | 4483 | static int ext4_freeze(struct super_block *sb) |
4501 | { | 4484 | { |
@@ -4522,7 +4505,7 @@ static int ext4_freeze(struct super_block *sb) | |||
4522 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4505 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
4523 | error = ext4_commit_super(sb, 1); | 4506 | error = ext4_commit_super(sb, 1); |
4524 | out: | 4507 | out: |
4525 | /* we rely on s_frozen to stop further updates */ | 4508 | /* we rely on upper layer to stop further updates */ |
4526 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 4509 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
4527 | return error; | 4510 | return error; |
4528 | } | 4511 | } |
diff --git a/fs/fat/file.c b/fs/fat/file.c index a71fe3715ee8..e007b8bd8e5e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) | |||
43 | if (err) | 43 | if (err) |
44 | goto out; | 44 | goto out; |
45 | 45 | ||
46 | mutex_lock(&inode->i_mutex); | ||
47 | err = mnt_want_write_file(file); | 46 | err = mnt_want_write_file(file); |
48 | if (err) | 47 | if (err) |
49 | goto out_unlock_inode; | 48 | goto out; |
49 | mutex_lock(&inode->i_mutex); | ||
50 | 50 | ||
51 | /* | 51 | /* |
52 | * ATTR_VOLUME and ATTR_DIR cannot be changed; this also | 52 | * ATTR_VOLUME and ATTR_DIR cannot be changed; this also |
@@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) | |||
73 | /* The root directory has no attributes */ | 73 | /* The root directory has no attributes */ |
74 | if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { | 74 | if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { |
75 | err = -EINVAL; | 75 | err = -EINVAL; |
76 | goto out_drop_write; | 76 | goto out_unlock_inode; |
77 | } | 77 | } |
78 | 78 | ||
79 | if (sbi->options.sys_immutable && | 79 | if (sbi->options.sys_immutable && |
80 | ((attr | oldattr) & ATTR_SYS) && | 80 | ((attr | oldattr) & ATTR_SYS) && |
81 | !capable(CAP_LINUX_IMMUTABLE)) { | 81 | !capable(CAP_LINUX_IMMUTABLE)) { |
82 | err = -EPERM; | 82 | err = -EPERM; |
83 | goto out_drop_write; | 83 | goto out_unlock_inode; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* | 86 | /* |
@@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) | |||
90 | */ | 90 | */ |
91 | err = security_inode_setattr(file->f_path.dentry, &ia); | 91 | err = security_inode_setattr(file->f_path.dentry, &ia); |
92 | if (err) | 92 | if (err) |
93 | goto out_drop_write; | 93 | goto out_unlock_inode; |
94 | 94 | ||
95 | /* This MUST be done before doing anything irreversible... */ | 95 | /* This MUST be done before doing anything irreversible... */ |
96 | err = fat_setattr(file->f_path.dentry, &ia); | 96 | err = fat_setattr(file->f_path.dentry, &ia); |
97 | if (err) | 97 | if (err) |
98 | goto out_drop_write; | 98 | goto out_unlock_inode; |
99 | 99 | ||
100 | fsnotify_change(file->f_path.dentry, ia.ia_valid); | 100 | fsnotify_change(file->f_path.dentry, ia.ia_valid); |
101 | if (sbi->options.sys_immutable) { | 101 | if (sbi->options.sys_immutable) { |
@@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) | |||
107 | 107 | ||
108 | fat_save_attrs(inode, attr); | 108 | fat_save_attrs(inode, attr); |
109 | mark_inode_dirty(inode); | 109 | mark_inode_dirty(inode); |
110 | out_drop_write: | ||
111 | mnt_drop_write_file(file); | ||
112 | out_unlock_inode: | 110 | out_unlock_inode: |
113 | mutex_unlock(&inode->i_mutex); | 111 | mutex_unlock(&inode->i_mutex); |
112 | mnt_drop_write_file(file); | ||
114 | out: | 113 | out: |
115 | return err; | 114 | return err; |
116 | } | 115 | } |
diff --git a/fs/file_table.c b/fs/file_table.c index b3fc4d67a26b..701985e4ccda 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly; | |||
43 | 43 | ||
44 | static struct percpu_counter nr_files __cacheline_aligned_in_smp; | 44 | static struct percpu_counter nr_files __cacheline_aligned_in_smp; |
45 | 45 | ||
46 | static inline void file_free_rcu(struct rcu_head *head) | 46 | static void file_free_rcu(struct rcu_head *head) |
47 | { | 47 | { |
48 | struct file *f = container_of(head, struct file, f_u.fu_rcuhead); | 48 | struct file *f = container_of(head, struct file, f_u.fu_rcuhead); |
49 | 49 | ||
@@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file) | |||
217 | return; | 217 | return; |
218 | if (file_check_writeable(file) != 0) | 218 | if (file_check_writeable(file) != 0) |
219 | return; | 219 | return; |
220 | mnt_drop_write(mnt); | 220 | __mnt_drop_write(mnt); |
221 | file_release_write(file); | 221 | file_release_write(file); |
222 | } | 222 | } |
223 | 223 | ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b321a688cde7..93d8d6c9494d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -944,9 +944,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
944 | return err; | 944 | return err; |
945 | 945 | ||
946 | count = ocount; | 946 | count = ocount; |
947 | 947 | sb_start_write(inode->i_sb); | |
948 | mutex_lock(&inode->i_mutex); | 948 | mutex_lock(&inode->i_mutex); |
949 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
950 | 949 | ||
951 | /* We can write back this queue in page reclaim */ | 950 | /* We can write back this queue in page reclaim */ |
952 | current->backing_dev_info = mapping->backing_dev_info; | 951 | current->backing_dev_info = mapping->backing_dev_info; |
@@ -1004,6 +1003,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1004 | out: | 1003 | out: |
1005 | current->backing_dev_info = NULL; | 1004 | current->backing_dev_info = NULL; |
1006 | mutex_unlock(&inode->i_mutex); | 1005 | mutex_unlock(&inode->i_mutex); |
1006 | sb_end_write(inode->i_sb); | ||
1007 | 1007 | ||
1008 | return written ? written : err; | 1008 | return written ? written : err; |
1009 | } | 1009 | } |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9aa6af13823c..d1d791ef38de 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -373,11 +373,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
373 | loff_t size; | 373 | loff_t size; |
374 | int ret; | 374 | int ret; |
375 | 375 | ||
376 | /* Wait if fs is frozen. This is racy so we check again later on | 376 | sb_start_pagefault(inode->i_sb); |
377 | * and retry if the fs has been frozen after the page lock has | 377 | |
378 | * been acquired | 378 | /* Update file times before taking page lock */ |
379 | */ | 379 | file_update_time(vma->vm_file); |
380 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
381 | 380 | ||
382 | ret = gfs2_rs_alloc(ip); | 381 | ret = gfs2_rs_alloc(ip); |
383 | if (ret) | 382 | if (ret) |
@@ -462,14 +461,9 @@ out: | |||
462 | gfs2_holder_uninit(&gh); | 461 | gfs2_holder_uninit(&gh); |
463 | if (ret == 0) { | 462 | if (ret == 0) { |
464 | set_page_dirty(page); | 463 | set_page_dirty(page); |
465 | /* This check must be post dropping of transaction lock */ | 464 | wait_on_page_writeback(page); |
466 | if (inode->i_sb->s_frozen == SB_UNFROZEN) { | ||
467 | wait_on_page_writeback(page); | ||
468 | } else { | ||
469 | ret = -EAGAIN; | ||
470 | unlock_page(page); | ||
471 | } | ||
472 | } | 465 | } |
466 | sb_end_pagefault(inode->i_sb); | ||
473 | return block_page_mkwrite_return(ret); | 467 | return block_page_mkwrite_return(ret); |
474 | } | 468 | } |
475 | 469 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ad3e2fb763d7..adbd27875ef9 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
50 | if (revokes) | 50 | if (revokes) |
51 | tr->tr_reserved += gfs2_struct2blk(sdp, revokes, | 51 | tr->tr_reserved += gfs2_struct2blk(sdp, revokes, |
52 | sizeof(u64)); | 52 | sizeof(u64)); |
53 | sb_start_intwrite(sdp->sd_vfs); | ||
53 | gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); | 54 | gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); |
54 | 55 | ||
55 | error = gfs2_glock_nq(&tr->tr_t_gh); | 56 | error = gfs2_glock_nq(&tr->tr_t_gh); |
@@ -68,6 +69,7 @@ fail_gunlock: | |||
68 | gfs2_glock_dq(&tr->tr_t_gh); | 69 | gfs2_glock_dq(&tr->tr_t_gh); |
69 | 70 | ||
70 | fail_holder_uninit: | 71 | fail_holder_uninit: |
72 | sb_end_intwrite(sdp->sd_vfs); | ||
71 | gfs2_holder_uninit(&tr->tr_t_gh); | 73 | gfs2_holder_uninit(&tr->tr_t_gh); |
72 | kfree(tr); | 74 | kfree(tr); |
73 | 75 | ||
@@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) | |||
116 | gfs2_holder_uninit(&tr->tr_t_gh); | 118 | gfs2_holder_uninit(&tr->tr_t_gh); |
117 | kfree(tr); | 119 | kfree(tr); |
118 | } | 120 | } |
121 | sb_end_intwrite(sdp->sd_vfs); | ||
119 | return; | 122 | return; |
120 | } | 123 | } |
121 | 124 | ||
@@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) | |||
136 | 139 | ||
137 | if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) | 140 | if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) |
138 | gfs2_log_flush(sdp, NULL); | 141 | gfs2_log_flush(sdp, NULL); |
142 | sb_end_intwrite(sdp->sd_vfs); | ||
139 | } | 143 | } |
140 | 144 | ||
141 | /** | 145 | /** |
diff --git a/fs/inode.c b/fs/inode.c index 3cc504320467..ac8d904b3f16 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1542,9 +1542,11 @@ void touch_atime(struct path *path) | |||
1542 | if (timespec_equal(&inode->i_atime, &now)) | 1542 | if (timespec_equal(&inode->i_atime, &now)) |
1543 | return; | 1543 | return; |
1544 | 1544 | ||
1545 | if (mnt_want_write(mnt)) | 1545 | if (!sb_start_write_trylock(inode->i_sb)) |
1546 | return; | 1546 | return; |
1547 | 1547 | ||
1548 | if (__mnt_want_write(mnt)) | ||
1549 | goto skip_update; | ||
1548 | /* | 1550 | /* |
1549 | * File systems can error out when updating inodes if they need to | 1551 | * File systems can error out when updating inodes if they need to |
1550 | * allocate new space to modify an inode (such is the case for | 1552 | * allocate new space to modify an inode (such is the case for |
@@ -1555,7 +1557,9 @@ void touch_atime(struct path *path) | |||
1555 | * of the fs read only, e.g. subvolumes in Btrfs. | 1557 | * of the fs read only, e.g. subvolumes in Btrfs. |
1556 | */ | 1558 | */ |
1557 | update_time(inode, &now, S_ATIME); | 1559 | update_time(inode, &now, S_ATIME); |
1558 | mnt_drop_write(mnt); | 1560 | __mnt_drop_write(mnt); |
1561 | skip_update: | ||
1562 | sb_end_write(inode->i_sb); | ||
1559 | } | 1563 | } |
1560 | EXPORT_SYMBOL(touch_atime); | 1564 | EXPORT_SYMBOL(touch_atime); |
1561 | 1565 | ||
@@ -1662,11 +1666,11 @@ int file_update_time(struct file *file) | |||
1662 | return 0; | 1666 | return 0; |
1663 | 1667 | ||
1664 | /* Finally allowed to write? Takes lock. */ | 1668 | /* Finally allowed to write? Takes lock. */ |
1665 | if (mnt_want_write_file(file)) | 1669 | if (__mnt_want_write_file(file)) |
1666 | return 0; | 1670 | return 0; |
1667 | 1671 | ||
1668 | ret = update_time(inode, &now, sync_it); | 1672 | ret = update_time(inode, &now, sync_it); |
1669 | mnt_drop_write_file(file); | 1673 | __mnt_drop_write_file(file); |
1670 | 1674 | ||
1671 | return ret; | 1675 | return ret; |
1672 | } | 1676 | } |
diff --git a/fs/internal.h b/fs/internal.h index a6fd56c68b11..371bcc4b1697 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -61,6 +61,10 @@ extern void __init mnt_init(void); | |||
61 | 61 | ||
62 | extern struct lglock vfsmount_lock; | 62 | extern struct lglock vfsmount_lock; |
63 | 63 | ||
64 | extern int __mnt_want_write(struct vfsmount *); | ||
65 | extern int __mnt_want_write_file(struct file *); | ||
66 | extern void __mnt_drop_write(struct vfsmount *); | ||
67 | extern void __mnt_drop_write_file(struct file *); | ||
64 | 68 | ||
65 | /* | 69 | /* |
66 | * fs_struct.c | 70 | * fs_struct.c |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 8392cb85bd54..05d29124c6ab 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -156,12 +156,16 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) | |||
156 | struct nlm_rqst *call; | 156 | struct nlm_rqst *call; |
157 | int status; | 157 | int status; |
158 | 158 | ||
159 | nlm_get_host(host); | ||
160 | call = nlm_alloc_call(host); | 159 | call = nlm_alloc_call(host); |
161 | if (call == NULL) | 160 | if (call == NULL) |
162 | return -ENOMEM; | 161 | return -ENOMEM; |
163 | 162 | ||
164 | nlmclnt_locks_init_private(fl, host); | 163 | nlmclnt_locks_init_private(fl, host); |
164 | if (!fl->fl_u.nfs_fl.owner) { | ||
165 | /* lockowner allocation has failed */ | ||
166 | nlmclnt_release_call(call); | ||
167 | return -ENOMEM; | ||
168 | } | ||
165 | /* Set up the argument struct */ | 169 | /* Set up the argument struct */ |
166 | nlmclnt_setlockargs(call, fl); | 170 | nlmclnt_setlockargs(call, fl); |
167 | 171 | ||
@@ -185,9 +189,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc); | |||
185 | 189 | ||
186 | /* | 190 | /* |
187 | * Allocate an NLM RPC call struct | 191 | * Allocate an NLM RPC call struct |
188 | * | ||
189 | * Note: the caller must hold a reference to host. In case of failure, | ||
190 | * this reference will be released. | ||
191 | */ | 192 | */ |
192 | struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) | 193 | struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) |
193 | { | 194 | { |
@@ -199,7 +200,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) | |||
199 | atomic_set(&call->a_count, 1); | 200 | atomic_set(&call->a_count, 1); |
200 | locks_init_lock(&call->a_args.lock.fl); | 201 | locks_init_lock(&call->a_args.lock.fl); |
201 | locks_init_lock(&call->a_res.lock.fl); | 202 | locks_init_lock(&call->a_res.lock.fl); |
202 | call->a_host = host; | 203 | call->a_host = nlm_get_host(host); |
203 | return call; | 204 | return call; |
204 | } | 205 | } |
205 | if (signalled()) | 206 | if (signalled()) |
@@ -207,7 +208,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) | |||
207 | printk("nlm_alloc_call: failed, waiting for memory\n"); | 208 | printk("nlm_alloc_call: failed, waiting for memory\n"); |
208 | schedule_timeout_interruptible(5*HZ); | 209 | schedule_timeout_interruptible(5*HZ); |
209 | } | 210 | } |
210 | nlmclnt_release_host(host); | ||
211 | return NULL; | 211 | return NULL; |
212 | } | 212 | } |
213 | 213 | ||
@@ -750,7 +750,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl | |||
750 | dprintk("lockd: blocking lock attempt was interrupted by a signal.\n" | 750 | dprintk("lockd: blocking lock attempt was interrupted by a signal.\n" |
751 | " Attempting to cancel lock.\n"); | 751 | " Attempting to cancel lock.\n"); |
752 | 752 | ||
753 | req = nlm_alloc_call(nlm_get_host(host)); | 753 | req = nlm_alloc_call(host); |
754 | if (!req) | 754 | if (!req) |
755 | return -ENOMEM; | 755 | return -ENOMEM; |
756 | req->a_flags = RPC_TASK_ASYNC; | 756 | req->a_flags = RPC_TASK_ASYNC; |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4a43d253c045..b147d1ae71fd 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -257,6 +257,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args | |||
257 | return rpc_system_err; | 257 | return rpc_system_err; |
258 | 258 | ||
259 | call = nlm_alloc_call(host); | 259 | call = nlm_alloc_call(host); |
260 | nlmsvc_release_host(host); | ||
260 | if (call == NULL) | 261 | if (call == NULL) |
261 | return rpc_system_err; | 262 | return rpc_system_err; |
262 | 263 | ||
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index afe4488c33d8..fb1a2bedbe97 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, | |||
219 | struct nlm_block *block; | 219 | struct nlm_block *block; |
220 | struct nlm_rqst *call = NULL; | 220 | struct nlm_rqst *call = NULL; |
221 | 221 | ||
222 | nlm_get_host(host); | ||
223 | call = nlm_alloc_call(host); | 222 | call = nlm_alloc_call(host); |
224 | if (call == NULL) | 223 | if (call == NULL) |
225 | return NULL; | 224 | return NULL; |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index de8f2caa2235..3009a365e082 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -297,6 +297,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args | |||
297 | return rpc_system_err; | 297 | return rpc_system_err; |
298 | 298 | ||
299 | call = nlm_alloc_call(host); | 299 | call = nlm_alloc_call(host); |
300 | nlmsvc_release_host(host); | ||
300 | if (call == NULL) | 301 | if (call == NULL) |
301 | return rpc_system_err; | 302 | return rpc_system_err; |
302 | 303 | ||
diff --git a/fs/namei.c b/fs/namei.c index 2ccc35c4dc24..1b464390dde8 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -650,6 +650,121 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki | |||
650 | path_put(link); | 650 | path_put(link); |
651 | } | 651 | } |
652 | 652 | ||
653 | int sysctl_protected_symlinks __read_mostly = 1; | ||
654 | int sysctl_protected_hardlinks __read_mostly = 1; | ||
655 | |||
656 | /** | ||
657 | * may_follow_link - Check symlink following for unsafe situations | ||
658 | * @link: The path of the symlink | ||
659 | * | ||
660 | * In the case of the sysctl_protected_symlinks sysctl being enabled, | ||
661 | * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is | ||
662 | * in a sticky world-writable directory. This is to protect privileged | ||
663 | * processes from failing races against path names that may change out | ||
664 | * from under them by way of other users creating malicious symlinks. | ||
665 | * It will permit symlinks to be followed only when outside a sticky | ||
666 | * world-writable directory, or when the uid of the symlink and follower | ||
667 | * match, or when the directory owner matches the symlink's owner. | ||
668 | * | ||
669 | * Returns 0 if following the symlink is allowed, -ve on error. | ||
670 | */ | ||
671 | static inline int may_follow_link(struct path *link, struct nameidata *nd) | ||
672 | { | ||
673 | const struct inode *inode; | ||
674 | const struct inode *parent; | ||
675 | |||
676 | if (!sysctl_protected_symlinks) | ||
677 | return 0; | ||
678 | |||
679 | /* Allowed if owner and follower match. */ | ||
680 | inode = link->dentry->d_inode; | ||
681 | if (current_cred()->fsuid == inode->i_uid) | ||
682 | return 0; | ||
683 | |||
684 | /* Allowed if parent directory not sticky and world-writable. */ | ||
685 | parent = nd->path.dentry->d_inode; | ||
686 | if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) | ||
687 | return 0; | ||
688 | |||
689 | /* Allowed if parent directory and link owner match. */ | ||
690 | if (parent->i_uid == inode->i_uid) | ||
691 | return 0; | ||
692 | |||
693 | path_put_conditional(link, nd); | ||
694 | path_put(&nd->path); | ||
695 | audit_log_link_denied("follow_link", link); | ||
696 | return -EACCES; | ||
697 | } | ||
698 | |||
699 | /** | ||
700 | * safe_hardlink_source - Check for safe hardlink conditions | ||
701 | * @inode: the source inode to hardlink from | ||
702 | * | ||
703 | * Return false if at least one of the following conditions: | ||
704 | * - inode is not a regular file | ||
705 | * - inode is setuid | ||
706 | * - inode is setgid and group-exec | ||
707 | * - access failure for read and write | ||
708 | * | ||
709 | * Otherwise returns true. | ||
710 | */ | ||
711 | static bool safe_hardlink_source(struct inode *inode) | ||
712 | { | ||
713 | umode_t mode = inode->i_mode; | ||
714 | |||
715 | /* Special files should not get pinned to the filesystem. */ | ||
716 | if (!S_ISREG(mode)) | ||
717 | return false; | ||
718 | |||
719 | /* Setuid files should not get pinned to the filesystem. */ | ||
720 | if (mode & S_ISUID) | ||
721 | return false; | ||
722 | |||
723 | /* Executable setgid files should not get pinned to the filesystem. */ | ||
724 | if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) | ||
725 | return false; | ||
726 | |||
727 | /* Hardlinking to unreadable or unwritable sources is dangerous. */ | ||
728 | if (inode_permission(inode, MAY_READ | MAY_WRITE)) | ||
729 | return false; | ||
730 | |||
731 | return true; | ||
732 | } | ||
733 | |||
734 | /** | ||
735 | * may_linkat - Check permissions for creating a hardlink | ||
736 | * @link: the source to hardlink from | ||
737 | * | ||
738 | * Block hardlink when all of: | ||
739 | * - sysctl_protected_hardlinks enabled | ||
740 | * - fsuid does not match inode | ||
741 | * - hardlink source is unsafe (see safe_hardlink_source() above) | ||
742 | * - not CAP_FOWNER | ||
743 | * | ||
744 | * Returns 0 if successful, -ve on error. | ||
745 | */ | ||
746 | static int may_linkat(struct path *link) | ||
747 | { | ||
748 | const struct cred *cred; | ||
749 | struct inode *inode; | ||
750 | |||
751 | if (!sysctl_protected_hardlinks) | ||
752 | return 0; | ||
753 | |||
754 | cred = current_cred(); | ||
755 | inode = link->dentry->d_inode; | ||
756 | |||
757 | /* Source inode owner (or CAP_FOWNER) can hardlink all they like, | ||
758 | * otherwise, it must be a safe source. | ||
759 | */ | ||
760 | if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || | ||
761 | capable(CAP_FOWNER)) | ||
762 | return 0; | ||
763 | |||
764 | audit_log_link_denied("linkat", link); | ||
765 | return -EPERM; | ||
766 | } | ||
767 | |||
653 | static __always_inline int | 768 | static __always_inline int |
654 | follow_link(struct path *link, struct nameidata *nd, void **p) | 769 | follow_link(struct path *link, struct nameidata *nd, void **p) |
655 | { | 770 | { |
@@ -1818,6 +1933,9 @@ static int path_lookupat(int dfd, const char *name, | |||
1818 | while (err > 0) { | 1933 | while (err > 0) { |
1819 | void *cookie; | 1934 | void *cookie; |
1820 | struct path link = path; | 1935 | struct path link = path; |
1936 | err = may_follow_link(&link, nd); | ||
1937 | if (unlikely(err)) | ||
1938 | break; | ||
1821 | nd->flags |= LOOKUP_PARENT; | 1939 | nd->flags |= LOOKUP_PARENT; |
1822 | err = follow_link(&link, nd, &cookie); | 1940 | err = follow_link(&link, nd, &cookie); |
1823 | if (err) | 1941 | if (err) |
@@ -2277,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) | |||
2277 | static int atomic_open(struct nameidata *nd, struct dentry *dentry, | 2395 | static int atomic_open(struct nameidata *nd, struct dentry *dentry, |
2278 | struct path *path, struct file *file, | 2396 | struct path *path, struct file *file, |
2279 | const struct open_flags *op, | 2397 | const struct open_flags *op, |
2280 | bool *want_write, bool need_lookup, | 2398 | bool got_write, bool need_lookup, |
2281 | int *opened) | 2399 | int *opened) |
2282 | { | 2400 | { |
2283 | struct inode *dir = nd->path.dentry->d_inode; | 2401 | struct inode *dir = nd->path.dentry->d_inode; |
@@ -2300,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, | |||
2300 | if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) | 2418 | if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) |
2301 | mode &= ~current_umask(); | 2419 | mode &= ~current_umask(); |
2302 | 2420 | ||
2303 | if (open_flag & O_EXCL) { | 2421 | if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { |
2304 | open_flag &= ~O_TRUNC; | 2422 | open_flag &= ~O_TRUNC; |
2305 | *opened |= FILE_CREATED; | 2423 | *opened |= FILE_CREATED; |
2306 | } | 2424 | } |
@@ -2314,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, | |||
2314 | * Another problem is returing the "right" error value (e.g. for an | 2432 | * Another problem is returing the "right" error value (e.g. for an |
2315 | * O_EXCL open we want to return EEXIST not EROFS). | 2433 | * O_EXCL open we want to return EEXIST not EROFS). |
2316 | */ | 2434 | */ |
2317 | if ((open_flag & (O_CREAT | O_TRUNC)) || | 2435 | if (((open_flag & (O_CREAT | O_TRUNC)) || |
2318 | (open_flag & O_ACCMODE) != O_RDONLY) { | 2436 | (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) { |
2319 | error = mnt_want_write(nd->path.mnt); | 2437 | if (!(open_flag & O_CREAT)) { |
2320 | if (!error) { | ||
2321 | *want_write = true; | ||
2322 | } else if (!(open_flag & O_CREAT)) { | ||
2323 | /* | 2438 | /* |
2324 | * No O_CREATE -> atomicity not a requirement -> fall | 2439 | * No O_CREATE -> atomicity not a requirement -> fall |
2325 | * back to lookup + open | 2440 | * back to lookup + open |
@@ -2327,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, | |||
2327 | goto no_open; | 2442 | goto no_open; |
2328 | } else if (open_flag & (O_EXCL | O_TRUNC)) { | 2443 | } else if (open_flag & (O_EXCL | O_TRUNC)) { |
2329 | /* Fall back and fail with the right error */ | 2444 | /* Fall back and fail with the right error */ |
2330 | create_error = error; | 2445 | create_error = -EROFS; |
2331 | goto no_open; | 2446 | goto no_open; |
2332 | } else { | 2447 | } else { |
2333 | /* No side effects, safe to clear O_CREAT */ | 2448 | /* No side effects, safe to clear O_CREAT */ |
2334 | create_error = error; | 2449 | create_error = -EROFS; |
2335 | open_flag &= ~O_CREAT; | 2450 | open_flag &= ~O_CREAT; |
2336 | } | 2451 | } |
2337 | } | 2452 | } |
@@ -2438,7 +2553,7 @@ looked_up: | |||
2438 | static int lookup_open(struct nameidata *nd, struct path *path, | 2553 | static int lookup_open(struct nameidata *nd, struct path *path, |
2439 | struct file *file, | 2554 | struct file *file, |
2440 | const struct open_flags *op, | 2555 | const struct open_flags *op, |
2441 | bool *want_write, int *opened) | 2556 | bool got_write, int *opened) |
2442 | { | 2557 | { |
2443 | struct dentry *dir = nd->path.dentry; | 2558 | struct dentry *dir = nd->path.dentry; |
2444 | struct inode *dir_inode = dir->d_inode; | 2559 | struct inode *dir_inode = dir->d_inode; |
@@ -2456,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, | |||
2456 | goto out_no_open; | 2571 | goto out_no_open; |
2457 | 2572 | ||
2458 | if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { | 2573 | if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { |
2459 | return atomic_open(nd, dentry, path, file, op, want_write, | 2574 | return atomic_open(nd, dentry, path, file, op, got_write, |
2460 | need_lookup, opened); | 2575 | need_lookup, opened); |
2461 | } | 2576 | } |
2462 | 2577 | ||
@@ -2480,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path, | |||
2480 | * a permanent write count is taken through | 2595 | * a permanent write count is taken through |
2481 | * the 'struct file' in finish_open(). | 2596 | * the 'struct file' in finish_open(). |
2482 | */ | 2597 | */ |
2483 | error = mnt_want_write(nd->path.mnt); | 2598 | if (!got_write) { |
2484 | if (error) | 2599 | error = -EROFS; |
2485 | goto out_dput; | 2600 | goto out_dput; |
2486 | *want_write = true; | 2601 | } |
2487 | *opened |= FILE_CREATED; | 2602 | *opened |= FILE_CREATED; |
2488 | error = security_path_mknod(&nd->path, dentry, mode, 0); | 2603 | error = security_path_mknod(&nd->path, dentry, mode, 0); |
2489 | if (error) | 2604 | if (error) |
@@ -2513,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path, | |||
2513 | struct dentry *dir = nd->path.dentry; | 2628 | struct dentry *dir = nd->path.dentry; |
2514 | int open_flag = op->open_flag; | 2629 | int open_flag = op->open_flag; |
2515 | bool will_truncate = (open_flag & O_TRUNC) != 0; | 2630 | bool will_truncate = (open_flag & O_TRUNC) != 0; |
2516 | bool want_write = false; | 2631 | bool got_write = false; |
2517 | int acc_mode = op->acc_mode; | 2632 | int acc_mode = op->acc_mode; |
2518 | struct inode *inode; | 2633 | struct inode *inode; |
2519 | bool symlink_ok = false; | 2634 | bool symlink_ok = false; |
@@ -2582,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path, | |||
2582 | } | 2697 | } |
2583 | 2698 | ||
2584 | retry_lookup: | 2699 | retry_lookup: |
2700 | if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { | ||
2701 | error = mnt_want_write(nd->path.mnt); | ||
2702 | if (!error) | ||
2703 | got_write = true; | ||
2704 | /* | ||
2705 | * do _not_ fail yet - we might not need that or fail with | ||
2706 | * a different error; let lookup_open() decide; we'll be | ||
2707 | * dropping this one anyway. | ||
2708 | */ | ||
2709 | } | ||
2585 | mutex_lock(&dir->d_inode->i_mutex); | 2710 | mutex_lock(&dir->d_inode->i_mutex); |
2586 | error = lookup_open(nd, path, file, op, &want_write, opened); | 2711 | error = lookup_open(nd, path, file, op, got_write, opened); |
2587 | mutex_unlock(&dir->d_inode->i_mutex); | 2712 | mutex_unlock(&dir->d_inode->i_mutex); |
2588 | 2713 | ||
2589 | if (error <= 0) { | 2714 | if (error <= 0) { |
@@ -2608,22 +2733,23 @@ retry_lookup: | |||
2608 | } | 2733 | } |
2609 | 2734 | ||
2610 | /* | 2735 | /* |
2611 | * It already exists. | 2736 | * create/update audit record if it already exists. |
2612 | */ | 2737 | */ |
2613 | audit_inode(pathname, path->dentry); | 2738 | if (path->dentry->d_inode) |
2739 | audit_inode(pathname, path->dentry); | ||
2614 | 2740 | ||
2615 | /* | 2741 | /* |
2616 | * If atomic_open() acquired write access it is dropped now due to | 2742 | * If atomic_open() acquired write access it is dropped now due to |
2617 | * possible mount and symlink following (this might be optimized away if | 2743 | * possible mount and symlink following (this might be optimized away if |
2618 | * necessary...) | 2744 | * necessary...) |
2619 | */ | 2745 | */ |
2620 | if (want_write) { | 2746 | if (got_write) { |
2621 | mnt_drop_write(nd->path.mnt); | 2747 | mnt_drop_write(nd->path.mnt); |
2622 | want_write = false; | 2748 | got_write = false; |
2623 | } | 2749 | } |
2624 | 2750 | ||
2625 | error = -EEXIST; | 2751 | error = -EEXIST; |
2626 | if (open_flag & O_EXCL) | 2752 | if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) |
2627 | goto exit_dput; | 2753 | goto exit_dput; |
2628 | 2754 | ||
2629 | error = follow_managed(path, nd->flags); | 2755 | error = follow_managed(path, nd->flags); |
@@ -2684,7 +2810,7 @@ finish_open: | |||
2684 | error = mnt_want_write(nd->path.mnt); | 2810 | error = mnt_want_write(nd->path.mnt); |
2685 | if (error) | 2811 | if (error) |
2686 | goto out; | 2812 | goto out; |
2687 | want_write = true; | 2813 | got_write = true; |
2688 | } | 2814 | } |
2689 | finish_open_created: | 2815 | finish_open_created: |
2690 | error = may_open(&nd->path, acc_mode, open_flag); | 2816 | error = may_open(&nd->path, acc_mode, open_flag); |
@@ -2711,7 +2837,7 @@ opened: | |||
2711 | goto exit_fput; | 2837 | goto exit_fput; |
2712 | } | 2838 | } |
2713 | out: | 2839 | out: |
2714 | if (want_write) | 2840 | if (got_write) |
2715 | mnt_drop_write(nd->path.mnt); | 2841 | mnt_drop_write(nd->path.mnt); |
2716 | path_put(&save_parent); | 2842 | path_put(&save_parent); |
2717 | terminate_walk(nd); | 2843 | terminate_walk(nd); |
@@ -2735,9 +2861,9 @@ stale_open: | |||
2735 | nd->inode = dir->d_inode; | 2861 | nd->inode = dir->d_inode; |
2736 | save_parent.mnt = NULL; | 2862 | save_parent.mnt = NULL; |
2737 | save_parent.dentry = NULL; | 2863 | save_parent.dentry = NULL; |
2738 | if (want_write) { | 2864 | if (got_write) { |
2739 | mnt_drop_write(nd->path.mnt); | 2865 | mnt_drop_write(nd->path.mnt); |
2740 | want_write = false; | 2866 | got_write = false; |
2741 | } | 2867 | } |
2742 | retried = true; | 2868 | retried = true; |
2743 | goto retry_lookup; | 2869 | goto retry_lookup; |
@@ -2777,6 +2903,9 @@ static struct file *path_openat(int dfd, const char *pathname, | |||
2777 | error = -ELOOP; | 2903 | error = -ELOOP; |
2778 | break; | 2904 | break; |
2779 | } | 2905 | } |
2906 | error = may_follow_link(&link, nd); | ||
2907 | if (unlikely(error)) | ||
2908 | break; | ||
2780 | nd->flags |= LOOKUP_PARENT; | 2909 | nd->flags |= LOOKUP_PARENT; |
2781 | nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); | 2910 | nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); |
2782 | error = follow_link(&link, nd, &cookie); | 2911 | error = follow_link(&link, nd, &cookie); |
@@ -2846,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path | |||
2846 | { | 2975 | { |
2847 | struct dentry *dentry = ERR_PTR(-EEXIST); | 2976 | struct dentry *dentry = ERR_PTR(-EEXIST); |
2848 | struct nameidata nd; | 2977 | struct nameidata nd; |
2978 | int err2; | ||
2849 | int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); | 2979 | int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); |
2850 | if (error) | 2980 | if (error) |
2851 | return ERR_PTR(error); | 2981 | return ERR_PTR(error); |
@@ -2859,16 +2989,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path | |||
2859 | nd.flags &= ~LOOKUP_PARENT; | 2989 | nd.flags &= ~LOOKUP_PARENT; |
2860 | nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; | 2990 | nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; |
2861 | 2991 | ||
2992 | /* don't fail immediately if it's r/o, at least try to report other errors */ | ||
2993 | err2 = mnt_want_write(nd.path.mnt); | ||
2862 | /* | 2994 | /* |
2863 | * Do the final lookup. | 2995 | * Do the final lookup. |
2864 | */ | 2996 | */ |
2865 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 2997 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
2866 | dentry = lookup_hash(&nd); | 2998 | dentry = lookup_hash(&nd); |
2867 | if (IS_ERR(dentry)) | 2999 | if (IS_ERR(dentry)) |
2868 | goto fail; | 3000 | goto unlock; |
2869 | 3001 | ||
3002 | error = -EEXIST; | ||
2870 | if (dentry->d_inode) | 3003 | if (dentry->d_inode) |
2871 | goto eexist; | 3004 | goto fail; |
2872 | /* | 3005 | /* |
2873 | * Special case - lookup gave negative, but... we had foo/bar/ | 3006 | * Special case - lookup gave negative, but... we had foo/bar/ |
2874 | * From the vfs_mknod() POV we just have a negative dentry - | 3007 | * From the vfs_mknod() POV we just have a negative dentry - |
@@ -2876,23 +3009,37 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path | |||
2876 | * been asking for (non-existent) directory. -ENOENT for you. | 3009 | * been asking for (non-existent) directory. -ENOENT for you. |
2877 | */ | 3010 | */ |
2878 | if (unlikely(!is_dir && nd.last.name[nd.last.len])) { | 3011 | if (unlikely(!is_dir && nd.last.name[nd.last.len])) { |
2879 | dput(dentry); | 3012 | error = -ENOENT; |
2880 | dentry = ERR_PTR(-ENOENT); | 3013 | goto fail; |
3014 | } | ||
3015 | if (unlikely(err2)) { | ||
3016 | error = err2; | ||
2881 | goto fail; | 3017 | goto fail; |
2882 | } | 3018 | } |
2883 | *path = nd.path; | 3019 | *path = nd.path; |
2884 | return dentry; | 3020 | return dentry; |
2885 | eexist: | ||
2886 | dput(dentry); | ||
2887 | dentry = ERR_PTR(-EEXIST); | ||
2888 | fail: | 3021 | fail: |
3022 | dput(dentry); | ||
3023 | dentry = ERR_PTR(error); | ||
3024 | unlock: | ||
2889 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 3025 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
3026 | if (!err2) | ||
3027 | mnt_drop_write(nd.path.mnt); | ||
2890 | out: | 3028 | out: |
2891 | path_put(&nd.path); | 3029 | path_put(&nd.path); |
2892 | return dentry; | 3030 | return dentry; |
2893 | } | 3031 | } |
2894 | EXPORT_SYMBOL(kern_path_create); | 3032 | EXPORT_SYMBOL(kern_path_create); |
2895 | 3033 | ||
3034 | void done_path_create(struct path *path, struct dentry *dentry) | ||
3035 | { | ||
3036 | dput(dentry); | ||
3037 | mutex_unlock(&path->dentry->d_inode->i_mutex); | ||
3038 | mnt_drop_write(path->mnt); | ||
3039 | path_put(path); | ||
3040 | } | ||
3041 | EXPORT_SYMBOL(done_path_create); | ||
3042 | |||
2896 | struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) | 3043 | struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) |
2897 | { | 3044 | { |
2898 | char *tmp = getname(pathname); | 3045 | char *tmp = getname(pathname); |
@@ -2956,8 +3103,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, | |||
2956 | struct path path; | 3103 | struct path path; |
2957 | int error; | 3104 | int error; |
2958 | 3105 | ||
2959 | if (S_ISDIR(mode)) | 3106 | error = may_mknod(mode); |
2960 | return -EPERM; | 3107 | if (error) |
3108 | return error; | ||
2961 | 3109 | ||
2962 | dentry = user_path_create(dfd, filename, &path, 0); | 3110 | dentry = user_path_create(dfd, filename, &path, 0); |
2963 | if (IS_ERR(dentry)) | 3111 | if (IS_ERR(dentry)) |
@@ -2965,15 +3113,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, | |||
2965 | 3113 | ||
2966 | if (!IS_POSIXACL(path.dentry->d_inode)) | 3114 | if (!IS_POSIXACL(path.dentry->d_inode)) |
2967 | mode &= ~current_umask(); | 3115 | mode &= ~current_umask(); |
2968 | error = may_mknod(mode); | ||
2969 | if (error) | ||
2970 | goto out_dput; | ||
2971 | error = mnt_want_write(path.mnt); | ||
2972 | if (error) | ||
2973 | goto out_dput; | ||
2974 | error = security_path_mknod(&path, dentry, mode, dev); | 3116 | error = security_path_mknod(&path, dentry, mode, dev); |
2975 | if (error) | 3117 | if (error) |
2976 | goto out_drop_write; | 3118 | goto out; |
2977 | switch (mode & S_IFMT) { | 3119 | switch (mode & S_IFMT) { |
2978 | case 0: case S_IFREG: | 3120 | case 0: case S_IFREG: |
2979 | error = vfs_create(path.dentry->d_inode,dentry,mode,true); | 3121 | error = vfs_create(path.dentry->d_inode,dentry,mode,true); |
@@ -2986,13 +3128,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, | |||
2986 | error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); | 3128 | error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); |
2987 | break; | 3129 | break; |
2988 | } | 3130 | } |
2989 | out_drop_write: | 3131 | out: |
2990 | mnt_drop_write(path.mnt); | 3132 | done_path_create(&path, dentry); |
2991 | out_dput: | ||
2992 | dput(dentry); | ||
2993 | mutex_unlock(&path.dentry->d_inode->i_mutex); | ||
2994 | path_put(&path); | ||
2995 | |||
2996 | return error; | 3133 | return error; |
2997 | } | 3134 | } |
2998 | 3135 | ||
@@ -3038,19 +3175,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) | |||
3038 | 3175 | ||
3039 | if (!IS_POSIXACL(path.dentry->d_inode)) | 3176 | if (!IS_POSIXACL(path.dentry->d_inode)) |
3040 | mode &= ~current_umask(); | 3177 | mode &= ~current_umask(); |
3041 | error = mnt_want_write(path.mnt); | ||
3042 | if (error) | ||
3043 | goto out_dput; | ||
3044 | error = security_path_mkdir(&path, dentry, mode); | 3178 | error = security_path_mkdir(&path, dentry, mode); |
3045 | if (error) | 3179 | if (!error) |
3046 | goto out_drop_write; | 3180 | error = vfs_mkdir(path.dentry->d_inode, dentry, mode); |
3047 | error = vfs_mkdir(path.dentry->d_inode, dentry, mode); | 3181 | done_path_create(&path, dentry); |
3048 | out_drop_write: | ||
3049 | mnt_drop_write(path.mnt); | ||
3050 | out_dput: | ||
3051 | dput(dentry); | ||
3052 | mutex_unlock(&path.dentry->d_inode->i_mutex); | ||
3053 | path_put(&path); | ||
3054 | return error; | 3182 | return error; |
3055 | } | 3183 | } |
3056 | 3184 | ||
@@ -3144,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname) | |||
3144 | } | 3272 | } |
3145 | 3273 | ||
3146 | nd.flags &= ~LOOKUP_PARENT; | 3274 | nd.flags &= ~LOOKUP_PARENT; |
3275 | error = mnt_want_write(nd.path.mnt); | ||
3276 | if (error) | ||
3277 | goto exit1; | ||
3147 | 3278 | ||
3148 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 3279 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
3149 | dentry = lookup_hash(&nd); | 3280 | dentry = lookup_hash(&nd); |
@@ -3154,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname) | |||
3154 | error = -ENOENT; | 3285 | error = -ENOENT; |
3155 | goto exit3; | 3286 | goto exit3; |
3156 | } | 3287 | } |
3157 | error = mnt_want_write(nd.path.mnt); | ||
3158 | if (error) | ||
3159 | goto exit3; | ||
3160 | error = security_path_rmdir(&nd.path, dentry); | 3288 | error = security_path_rmdir(&nd.path, dentry); |
3161 | if (error) | 3289 | if (error) |
3162 | goto exit4; | 3290 | goto exit3; |
3163 | error = vfs_rmdir(nd.path.dentry->d_inode, dentry); | 3291 | error = vfs_rmdir(nd.path.dentry->d_inode, dentry); |
3164 | exit4: | ||
3165 | mnt_drop_write(nd.path.mnt); | ||
3166 | exit3: | 3292 | exit3: |
3167 | dput(dentry); | 3293 | dput(dentry); |
3168 | exit2: | 3294 | exit2: |
3169 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 3295 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
3296 | mnt_drop_write(nd.path.mnt); | ||
3170 | exit1: | 3297 | exit1: |
3171 | path_put(&nd.path); | 3298 | path_put(&nd.path); |
3172 | putname(name); | 3299 | putname(name); |
@@ -3233,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) | |||
3233 | goto exit1; | 3360 | goto exit1; |
3234 | 3361 | ||
3235 | nd.flags &= ~LOOKUP_PARENT; | 3362 | nd.flags &= ~LOOKUP_PARENT; |
3363 | error = mnt_want_write(nd.path.mnt); | ||
3364 | if (error) | ||
3365 | goto exit1; | ||
3236 | 3366 | ||
3237 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 3367 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
3238 | dentry = lookup_hash(&nd); | 3368 | dentry = lookup_hash(&nd); |
@@ -3245,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname) | |||
3245 | if (!inode) | 3375 | if (!inode) |
3246 | goto slashes; | 3376 | goto slashes; |
3247 | ihold(inode); | 3377 | ihold(inode); |
3248 | error = mnt_want_write(nd.path.mnt); | ||
3249 | if (error) | ||
3250 | goto exit2; | ||
3251 | error = security_path_unlink(&nd.path, dentry); | 3378 | error = security_path_unlink(&nd.path, dentry); |
3252 | if (error) | 3379 | if (error) |
3253 | goto exit3; | 3380 | goto exit2; |
3254 | error = vfs_unlink(nd.path.dentry->d_inode, dentry); | 3381 | error = vfs_unlink(nd.path.dentry->d_inode, dentry); |
3255 | exit3: | 3382 | exit2: |
3256 | mnt_drop_write(nd.path.mnt); | ||
3257 | exit2: | ||
3258 | dput(dentry); | 3383 | dput(dentry); |
3259 | } | 3384 | } |
3260 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 3385 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
3261 | if (inode) | 3386 | if (inode) |
3262 | iput(inode); /* truncate the inode here */ | 3387 | iput(inode); /* truncate the inode here */ |
3388 | mnt_drop_write(nd.path.mnt); | ||
3263 | exit1: | 3389 | exit1: |
3264 | path_put(&nd.path); | 3390 | path_put(&nd.path); |
3265 | putname(name); | 3391 | putname(name); |
@@ -3324,19 +3450,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, | |||
3324 | if (IS_ERR(dentry)) | 3450 | if (IS_ERR(dentry)) |
3325 | goto out_putname; | 3451 | goto out_putname; |
3326 | 3452 | ||
3327 | error = mnt_want_write(path.mnt); | ||
3328 | if (error) | ||
3329 | goto out_dput; | ||
3330 | error = security_path_symlink(&path, dentry, from); | 3453 | error = security_path_symlink(&path, dentry, from); |
3331 | if (error) | 3454 | if (!error) |
3332 | goto out_drop_write; | 3455 | error = vfs_symlink(path.dentry->d_inode, dentry, from); |
3333 | error = vfs_symlink(path.dentry->d_inode, dentry, from); | 3456 | done_path_create(&path, dentry); |
3334 | out_drop_write: | ||
3335 | mnt_drop_write(path.mnt); | ||
3336 | out_dput: | ||
3337 | dput(dentry); | ||
3338 | mutex_unlock(&path.dentry->d_inode->i_mutex); | ||
3339 | path_put(&path); | ||
3340 | out_putname: | 3457 | out_putname: |
3341 | putname(from); | 3458 | putname(from); |
3342 | return error; | 3459 | return error; |
@@ -3436,19 +3553,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, | |||
3436 | error = -EXDEV; | 3553 | error = -EXDEV; |
3437 | if (old_path.mnt != new_path.mnt) | 3554 | if (old_path.mnt != new_path.mnt) |
3438 | goto out_dput; | 3555 | goto out_dput; |
3439 | error = mnt_want_write(new_path.mnt); | 3556 | error = may_linkat(&old_path); |
3440 | if (error) | 3557 | if (unlikely(error)) |
3441 | goto out_dput; | 3558 | goto out_dput; |
3442 | error = security_path_link(old_path.dentry, &new_path, new_dentry); | 3559 | error = security_path_link(old_path.dentry, &new_path, new_dentry); |
3443 | if (error) | 3560 | if (error) |
3444 | goto out_drop_write; | 3561 | goto out_dput; |
3445 | error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); | 3562 | error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); |
3446 | out_drop_write: | ||
3447 | mnt_drop_write(new_path.mnt); | ||
3448 | out_dput: | 3563 | out_dput: |
3449 | dput(new_dentry); | 3564 | done_path_create(&new_path, new_dentry); |
3450 | mutex_unlock(&new_path.dentry->d_inode->i_mutex); | ||
3451 | path_put(&new_path); | ||
3452 | out: | 3565 | out: |
3453 | path_put(&old_path); | 3566 | path_put(&old_path); |
3454 | 3567 | ||
@@ -3644,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | |||
3644 | if (newnd.last_type != LAST_NORM) | 3757 | if (newnd.last_type != LAST_NORM) |
3645 | goto exit2; | 3758 | goto exit2; |
3646 | 3759 | ||
3760 | error = mnt_want_write(oldnd.path.mnt); | ||
3761 | if (error) | ||
3762 | goto exit2; | ||
3763 | |||
3647 | oldnd.flags &= ~LOOKUP_PARENT; | 3764 | oldnd.flags &= ~LOOKUP_PARENT; |
3648 | newnd.flags &= ~LOOKUP_PARENT; | 3765 | newnd.flags &= ~LOOKUP_PARENT; |
3649 | newnd.flags |= LOOKUP_RENAME_TARGET; | 3766 | newnd.flags |= LOOKUP_RENAME_TARGET; |
@@ -3679,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | |||
3679 | if (new_dentry == trap) | 3796 | if (new_dentry == trap) |
3680 | goto exit5; | 3797 | goto exit5; |
3681 | 3798 | ||
3682 | error = mnt_want_write(oldnd.path.mnt); | ||
3683 | if (error) | ||
3684 | goto exit5; | ||
3685 | error = security_path_rename(&oldnd.path, old_dentry, | 3799 | error = security_path_rename(&oldnd.path, old_dentry, |
3686 | &newnd.path, new_dentry); | 3800 | &newnd.path, new_dentry); |
3687 | if (error) | 3801 | if (error) |
3688 | goto exit6; | 3802 | goto exit5; |
3689 | error = vfs_rename(old_dir->d_inode, old_dentry, | 3803 | error = vfs_rename(old_dir->d_inode, old_dentry, |
3690 | new_dir->d_inode, new_dentry); | 3804 | new_dir->d_inode, new_dentry); |
3691 | exit6: | ||
3692 | mnt_drop_write(oldnd.path.mnt); | ||
3693 | exit5: | 3805 | exit5: |
3694 | dput(new_dentry); | 3806 | dput(new_dentry); |
3695 | exit4: | 3807 | exit4: |
3696 | dput(old_dentry); | 3808 | dput(old_dentry); |
3697 | exit3: | 3809 | exit3: |
3698 | unlock_rename(new_dir, old_dir); | 3810 | unlock_rename(new_dir, old_dir); |
3811 | mnt_drop_write(oldnd.path.mnt); | ||
3699 | exit2: | 3812 | exit2: |
3700 | path_put(&newnd.path); | 3813 | path_put(&newnd.path); |
3701 | putname(to); | 3814 | putname(to); |
diff --git a/fs/namespace.c b/fs/namespace.c index c53d3381b0d0..4d31f73e2561 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt) | |||
283 | } | 283 | } |
284 | 284 | ||
285 | /* | 285 | /* |
286 | * Most r/o checks on a fs are for operations that take | 286 | * Most r/o & frozen checks on a fs are for operations that take discrete |
287 | * discrete amounts of time, like a write() or unlink(). | 287 | * amounts of time, like a write() or unlink(). We must keep track of when |
288 | * We must keep track of when those operations start | 288 | * those operations start (for permission checks) and when they end, so that we |
289 | * (for permission checks) and when they end, so that | 289 | * can determine when writes are able to occur to a filesystem. |
290 | * we can determine when writes are able to occur to | ||
291 | * a filesystem. | ||
292 | */ | 290 | */ |
293 | /** | 291 | /** |
294 | * mnt_want_write - get write access to a mount | 292 | * __mnt_want_write - get write access to a mount without freeze protection |
295 | * @m: the mount on which to take a write | 293 | * @m: the mount on which to take a write |
296 | * | 294 | * |
297 | * This tells the low-level filesystem that a write is | 295 | * This tells the low-level filesystem that a write is about to be performed to |
298 | * about to be performed to it, and makes sure that | 296 | * it, and makes sure that writes are allowed (mnt it read-write) before |
299 | * writes are allowed before returning success. When | 297 | * returning success. This operation does not protect against filesystem being |
300 | * the write operation is finished, mnt_drop_write() | 298 | * frozen. When the write operation is finished, __mnt_drop_write() must be |
301 | * must be called. This is effectively a refcount. | 299 | * called. This is effectively a refcount. |
302 | */ | 300 | */ |
303 | int mnt_want_write(struct vfsmount *m) | 301 | int __mnt_want_write(struct vfsmount *m) |
304 | { | 302 | { |
305 | struct mount *mnt = real_mount(m); | 303 | struct mount *mnt = real_mount(m); |
306 | int ret = 0; | 304 | int ret = 0; |
@@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m) | |||
326 | ret = -EROFS; | 324 | ret = -EROFS; |
327 | } | 325 | } |
328 | preempt_enable(); | 326 | preempt_enable(); |
327 | |||
328 | return ret; | ||
329 | } | ||
330 | |||
331 | /** | ||
332 | * mnt_want_write - get write access to a mount | ||
333 | * @m: the mount on which to take a write | ||
334 | * | ||
335 | * This tells the low-level filesystem that a write is about to be performed to | ||
336 | * it, and makes sure that writes are allowed (mount is read-write, filesystem | ||
337 | * is not frozen) before returning success. When the write operation is | ||
338 | * finished, mnt_drop_write() must be called. This is effectively a refcount. | ||
339 | */ | ||
340 | int mnt_want_write(struct vfsmount *m) | ||
341 | { | ||
342 | int ret; | ||
343 | |||
344 | sb_start_write(m->mnt_sb); | ||
345 | ret = __mnt_want_write(m); | ||
346 | if (ret) | ||
347 | sb_end_write(m->mnt_sb); | ||
329 | return ret; | 348 | return ret; |
330 | } | 349 | } |
331 | EXPORT_SYMBOL_GPL(mnt_want_write); | 350 | EXPORT_SYMBOL_GPL(mnt_want_write); |
@@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt) | |||
355 | EXPORT_SYMBOL_GPL(mnt_clone_write); | 374 | EXPORT_SYMBOL_GPL(mnt_clone_write); |
356 | 375 | ||
357 | /** | 376 | /** |
358 | * mnt_want_write_file - get write access to a file's mount | 377 | * __mnt_want_write_file - get write access to a file's mount |
359 | * @file: the file who's mount on which to take a write | 378 | * @file: the file who's mount on which to take a write |
360 | * | 379 | * |
361 | * This is like mnt_want_write, but it takes a file and can | 380 | * This is like __mnt_want_write, but it takes a file and can |
362 | * do some optimisations if the file is open for write already | 381 | * do some optimisations if the file is open for write already |
363 | */ | 382 | */ |
364 | int mnt_want_write_file(struct file *file) | 383 | int __mnt_want_write_file(struct file *file) |
365 | { | 384 | { |
366 | struct inode *inode = file->f_dentry->d_inode; | 385 | struct inode *inode = file->f_dentry->d_inode; |
386 | |||
367 | if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) | 387 | if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) |
368 | return mnt_want_write(file->f_path.mnt); | 388 | return __mnt_want_write(file->f_path.mnt); |
369 | else | 389 | else |
370 | return mnt_clone_write(file->f_path.mnt); | 390 | return mnt_clone_write(file->f_path.mnt); |
371 | } | 391 | } |
392 | |||
393 | /** | ||
394 | * mnt_want_write_file - get write access to a file's mount | ||
395 | * @file: the file who's mount on which to take a write | ||
396 | * | ||
397 | * This is like mnt_want_write, but it takes a file and can | ||
398 | * do some optimisations if the file is open for write already | ||
399 | */ | ||
400 | int mnt_want_write_file(struct file *file) | ||
401 | { | ||
402 | int ret; | ||
403 | |||
404 | sb_start_write(file->f_path.mnt->mnt_sb); | ||
405 | ret = __mnt_want_write_file(file); | ||
406 | if (ret) | ||
407 | sb_end_write(file->f_path.mnt->mnt_sb); | ||
408 | return ret; | ||
409 | } | ||
372 | EXPORT_SYMBOL_GPL(mnt_want_write_file); | 410 | EXPORT_SYMBOL_GPL(mnt_want_write_file); |
373 | 411 | ||
374 | /** | 412 | /** |
375 | * mnt_drop_write - give up write access to a mount | 413 | * __mnt_drop_write - give up write access to a mount |
376 | * @mnt: the mount on which to give up write access | 414 | * @mnt: the mount on which to give up write access |
377 | * | 415 | * |
378 | * Tells the low-level filesystem that we are done | 416 | * Tells the low-level filesystem that we are done |
379 | * performing writes to it. Must be matched with | 417 | * performing writes to it. Must be matched with |
380 | * mnt_want_write() call above. | 418 | * __mnt_want_write() call above. |
381 | */ | 419 | */ |
382 | void mnt_drop_write(struct vfsmount *mnt) | 420 | void __mnt_drop_write(struct vfsmount *mnt) |
383 | { | 421 | { |
384 | preempt_disable(); | 422 | preempt_disable(); |
385 | mnt_dec_writers(real_mount(mnt)); | 423 | mnt_dec_writers(real_mount(mnt)); |
386 | preempt_enable(); | 424 | preempt_enable(); |
387 | } | 425 | } |
426 | |||
427 | /** | ||
428 | * mnt_drop_write - give up write access to a mount | ||
429 | * @mnt: the mount on which to give up write access | ||
430 | * | ||
431 | * Tells the low-level filesystem that we are done performing writes to it and | ||
432 | * also allows filesystem to be frozen again. Must be matched with | ||
433 | * mnt_want_write() call above. | ||
434 | */ | ||
435 | void mnt_drop_write(struct vfsmount *mnt) | ||
436 | { | ||
437 | __mnt_drop_write(mnt); | ||
438 | sb_end_write(mnt->mnt_sb); | ||
439 | } | ||
388 | EXPORT_SYMBOL_GPL(mnt_drop_write); | 440 | EXPORT_SYMBOL_GPL(mnt_drop_write); |
389 | 441 | ||
442 | void __mnt_drop_write_file(struct file *file) | ||
443 | { | ||
444 | __mnt_drop_write(file->f_path.mnt); | ||
445 | } | ||
446 | |||
390 | void mnt_drop_write_file(struct file *file) | 447 | void mnt_drop_write_file(struct file *file) |
391 | { | 448 | { |
392 | mnt_drop_write(file->f_path.mnt); | 449 | mnt_drop_write(file->f_path.mnt); |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5ff0b7b9fc08..43295d45cc2b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
154 | if (status < 0) | 154 | if (status < 0) |
155 | return; | 155 | return; |
156 | 156 | ||
157 | status = mnt_want_write_file(rec_file); | ||
158 | if (status) | ||
159 | return; | ||
160 | |||
157 | dir = rec_file->f_path.dentry; | 161 | dir = rec_file->f_path.dentry; |
158 | /* lock the parent */ | 162 | /* lock the parent */ |
159 | mutex_lock(&dir->d_inode->i_mutex); | 163 | mutex_lock(&dir->d_inode->i_mutex); |
@@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
173 | * as well be forgiving and just succeed silently. | 177 | * as well be forgiving and just succeed silently. |
174 | */ | 178 | */ |
175 | goto out_put; | 179 | goto out_put; |
176 | status = mnt_want_write_file(rec_file); | ||
177 | if (status) | ||
178 | goto out_put; | ||
179 | status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); | 180 | status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); |
180 | mnt_drop_write_file(rec_file); | ||
181 | out_put: | 181 | out_put: |
182 | dput(dentry); | 182 | dput(dentry); |
183 | out_unlock: | 183 | out_unlock: |
@@ -189,6 +189,7 @@ out_unlock: | |||
189 | " (err %d); please check that %s exists" | 189 | " (err %d); please check that %s exists" |
190 | " and is writeable", status, | 190 | " and is writeable", status, |
191 | user_recovery_dirname); | 191 | user_recovery_dirname); |
192 | mnt_drop_write_file(rec_file); | ||
192 | nfs4_reset_creds(original_cred); | 193 | nfs4_reset_creds(original_cred); |
193 | } | 194 | } |
194 | 195 | ||
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cc793005a87c..032af381b3aa 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp) | |||
635 | fhp->fh_post_saved = 0; | 635 | fhp->fh_post_saved = 0; |
636 | #endif | 636 | #endif |
637 | } | 637 | } |
638 | fh_drop_write(fhp); | ||
638 | if (exp) { | 639 | if (exp) { |
639 | exp_put(exp); | 640 | exp_put(exp); |
640 | fhp->fh_export = NULL; | 641 | fhp->fh_export = NULL; |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index e15dc45fc5ec..aad6d457b9e8 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
196 | struct dentry *dchild; | 196 | struct dentry *dchild; |
197 | int type, mode; | 197 | int type, mode; |
198 | __be32 nfserr; | 198 | __be32 nfserr; |
199 | int hosterr; | ||
199 | dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); | 200 | dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); |
200 | 201 | ||
201 | dprintk("nfsd: CREATE %s %.*s\n", | 202 | dprintk("nfsd: CREATE %s %.*s\n", |
@@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
214 | nfserr = nfserr_exist; | 215 | nfserr = nfserr_exist; |
215 | if (isdotent(argp->name, argp->len)) | 216 | if (isdotent(argp->name, argp->len)) |
216 | goto done; | 217 | goto done; |
218 | hosterr = fh_want_write(dirfhp); | ||
219 | if (hosterr) { | ||
220 | nfserr = nfserrno(hosterr); | ||
221 | goto done; | ||
222 | } | ||
223 | |||
217 | fh_lock_nested(dirfhp, I_MUTEX_PARENT); | 224 | fh_lock_nested(dirfhp, I_MUTEX_PARENT); |
218 | dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); | 225 | dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); |
219 | if (IS_ERR(dchild)) { | 226 | if (IS_ERR(dchild)) { |
@@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
330 | out_unlock: | 337 | out_unlock: |
331 | /* We don't really need to unlock, as fh_put does it. */ | 338 | /* We don't really need to unlock, as fh_put does it. */ |
332 | fh_unlock(dirfhp); | 339 | fh_unlock(dirfhp); |
333 | 340 | fh_drop_write(dirfhp); | |
334 | done: | 341 | done: |
335 | fh_put(dirfhp); | 342 | fh_put(dirfhp); |
336 | return nfsd_return_dirop(nfserr, resp); | 343 | return nfsd_return_dirop(nfserr, resp); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 702f64e820c3..a9269f142cc4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -1284,6 +1284,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1284 | * If it has, the parent directory should already be locked. | 1284 | * If it has, the parent directory should already be locked. |
1285 | */ | 1285 | */ |
1286 | if (!resfhp->fh_dentry) { | 1286 | if (!resfhp->fh_dentry) { |
1287 | host_err = fh_want_write(fhp); | ||
1288 | if (host_err) | ||
1289 | goto out_nfserr; | ||
1290 | |||
1287 | /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ | 1291 | /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ |
1288 | fh_lock_nested(fhp, I_MUTEX_PARENT); | 1292 | fh_lock_nested(fhp, I_MUTEX_PARENT); |
1289 | dchild = lookup_one_len(fname, dentry, flen); | 1293 | dchild = lookup_one_len(fname, dentry, flen); |
@@ -1327,14 +1331,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1327 | goto out; | 1331 | goto out; |
1328 | } | 1332 | } |
1329 | 1333 | ||
1330 | host_err = fh_want_write(fhp); | ||
1331 | if (host_err) | ||
1332 | goto out_nfserr; | ||
1333 | |||
1334 | /* | 1334 | /* |
1335 | * Get the dir op function pointer. | 1335 | * Get the dir op function pointer. |
1336 | */ | 1336 | */ |
1337 | err = 0; | 1337 | err = 0; |
1338 | host_err = 0; | ||
1338 | switch (type) { | 1339 | switch (type) { |
1339 | case S_IFREG: | 1340 | case S_IFREG: |
1340 | host_err = vfs_create(dirp, dchild, iap->ia_mode, true); | 1341 | host_err = vfs_create(dirp, dchild, iap->ia_mode, true); |
@@ -1351,10 +1352,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1351 | host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); | 1352 | host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); |
1352 | break; | 1353 | break; |
1353 | } | 1354 | } |
1354 | if (host_err < 0) { | 1355 | if (host_err < 0) |
1355 | fh_drop_write(fhp); | ||
1356 | goto out_nfserr; | 1356 | goto out_nfserr; |
1357 | } | ||
1358 | 1357 | ||
1359 | err = nfsd_create_setattr(rqstp, resfhp, iap); | 1358 | err = nfsd_create_setattr(rqstp, resfhp, iap); |
1360 | 1359 | ||
@@ -1366,7 +1365,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1366 | err2 = nfserrno(commit_metadata(fhp)); | 1365 | err2 = nfserrno(commit_metadata(fhp)); |
1367 | if (err2) | 1366 | if (err2) |
1368 | err = err2; | 1367 | err = err2; |
1369 | fh_drop_write(fhp); | ||
1370 | /* | 1368 | /* |
1371 | * Update the file handle to get the new inode info. | 1369 | * Update the file handle to get the new inode info. |
1372 | */ | 1370 | */ |
@@ -1425,6 +1423,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1425 | err = nfserr_notdir; | 1423 | err = nfserr_notdir; |
1426 | if (!dirp->i_op->lookup) | 1424 | if (!dirp->i_op->lookup) |
1427 | goto out; | 1425 | goto out; |
1426 | |||
1427 | host_err = fh_want_write(fhp); | ||
1428 | if (host_err) | ||
1429 | goto out_nfserr; | ||
1430 | |||
1428 | fh_lock_nested(fhp, I_MUTEX_PARENT); | 1431 | fh_lock_nested(fhp, I_MUTEX_PARENT); |
1429 | 1432 | ||
1430 | /* | 1433 | /* |
@@ -1457,9 +1460,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1457 | v_atime = verifier[1]&0x7fffffff; | 1460 | v_atime = verifier[1]&0x7fffffff; |
1458 | } | 1461 | } |
1459 | 1462 | ||
1460 | host_err = fh_want_write(fhp); | ||
1461 | if (host_err) | ||
1462 | goto out_nfserr; | ||
1463 | if (dchild->d_inode) { | 1463 | if (dchild->d_inode) { |
1464 | err = 0; | 1464 | err = 0; |
1465 | 1465 | ||
@@ -1530,7 +1530,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1530 | if (!err) | 1530 | if (!err) |
1531 | err = nfserrno(commit_metadata(fhp)); | 1531 | err = nfserrno(commit_metadata(fhp)); |
1532 | 1532 | ||
1533 | fh_drop_write(fhp); | ||
1534 | /* | 1533 | /* |
1535 | * Update the filehandle to get the new inode info. | 1534 | * Update the filehandle to get the new inode info. |
1536 | */ | 1535 | */ |
@@ -1541,6 +1540,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1541 | fh_unlock(fhp); | 1540 | fh_unlock(fhp); |
1542 | if (dchild && !IS_ERR(dchild)) | 1541 | if (dchild && !IS_ERR(dchild)) |
1543 | dput(dchild); | 1542 | dput(dchild); |
1543 | fh_drop_write(fhp); | ||
1544 | return err; | 1544 | return err; |
1545 | 1545 | ||
1546 | out_nfserr: | 1546 | out_nfserr: |
@@ -1621,6 +1621,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1621 | err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); | 1621 | err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); |
1622 | if (err) | 1622 | if (err) |
1623 | goto out; | 1623 | goto out; |
1624 | |||
1625 | host_err = fh_want_write(fhp); | ||
1626 | if (host_err) | ||
1627 | goto out_nfserr; | ||
1628 | |||
1624 | fh_lock(fhp); | 1629 | fh_lock(fhp); |
1625 | dentry = fhp->fh_dentry; | 1630 | dentry = fhp->fh_dentry; |
1626 | dnew = lookup_one_len(fname, dentry, flen); | 1631 | dnew = lookup_one_len(fname, dentry, flen); |
@@ -1628,10 +1633,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1628 | if (IS_ERR(dnew)) | 1633 | if (IS_ERR(dnew)) |
1629 | goto out_nfserr; | 1634 | goto out_nfserr; |
1630 | 1635 | ||
1631 | host_err = fh_want_write(fhp); | ||
1632 | if (host_err) | ||
1633 | goto out_nfserr; | ||
1634 | |||
1635 | if (unlikely(path[plen] != 0)) { | 1636 | if (unlikely(path[plen] != 0)) { |
1636 | char *path_alloced = kmalloc(plen+1, GFP_KERNEL); | 1637 | char *path_alloced = kmalloc(plen+1, GFP_KERNEL); |
1637 | if (path_alloced == NULL) | 1638 | if (path_alloced == NULL) |
@@ -1691,6 +1692,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1691 | if (isdotent(name, len)) | 1692 | if (isdotent(name, len)) |
1692 | goto out; | 1693 | goto out; |
1693 | 1694 | ||
1695 | host_err = fh_want_write(tfhp); | ||
1696 | if (host_err) { | ||
1697 | err = nfserrno(host_err); | ||
1698 | goto out; | ||
1699 | } | ||
1700 | |||
1694 | fh_lock_nested(ffhp, I_MUTEX_PARENT); | 1701 | fh_lock_nested(ffhp, I_MUTEX_PARENT); |
1695 | ddir = ffhp->fh_dentry; | 1702 | ddir = ffhp->fh_dentry; |
1696 | dirp = ddir->d_inode; | 1703 | dirp = ddir->d_inode; |
@@ -1702,18 +1709,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1702 | 1709 | ||
1703 | dold = tfhp->fh_dentry; | 1710 | dold = tfhp->fh_dentry; |
1704 | 1711 | ||
1705 | host_err = fh_want_write(tfhp); | ||
1706 | if (host_err) { | ||
1707 | err = nfserrno(host_err); | ||
1708 | goto out_dput; | ||
1709 | } | ||
1710 | err = nfserr_noent; | 1712 | err = nfserr_noent; |
1711 | if (!dold->d_inode) | 1713 | if (!dold->d_inode) |
1712 | goto out_drop_write; | 1714 | goto out_dput; |
1713 | host_err = nfsd_break_lease(dold->d_inode); | 1715 | host_err = nfsd_break_lease(dold->d_inode); |
1714 | if (host_err) { | 1716 | if (host_err) { |
1715 | err = nfserrno(host_err); | 1717 | err = nfserrno(host_err); |
1716 | goto out_drop_write; | 1718 | goto out_dput; |
1717 | } | 1719 | } |
1718 | host_err = vfs_link(dold, dirp, dnew); | 1720 | host_err = vfs_link(dold, dirp, dnew); |
1719 | if (!host_err) { | 1721 | if (!host_err) { |
@@ -1726,12 +1728,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1726 | else | 1728 | else |
1727 | err = nfserrno(host_err); | 1729 | err = nfserrno(host_err); |
1728 | } | 1730 | } |
1729 | out_drop_write: | ||
1730 | fh_drop_write(tfhp); | ||
1731 | out_dput: | 1731 | out_dput: |
1732 | dput(dnew); | 1732 | dput(dnew); |
1733 | out_unlock: | 1733 | out_unlock: |
1734 | fh_unlock(ffhp); | 1734 | fh_unlock(ffhp); |
1735 | fh_drop_write(tfhp); | ||
1735 | out: | 1736 | out: |
1736 | return err; | 1737 | return err; |
1737 | 1738 | ||
@@ -1774,6 +1775,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1774 | if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) | 1775 | if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) |
1775 | goto out; | 1776 | goto out; |
1776 | 1777 | ||
1778 | host_err = fh_want_write(ffhp); | ||
1779 | if (host_err) { | ||
1780 | err = nfserrno(host_err); | ||
1781 | goto out; | ||
1782 | } | ||
1783 | |||
1777 | /* cannot use fh_lock as we need deadlock protective ordering | 1784 | /* cannot use fh_lock as we need deadlock protective ordering |
1778 | * so do it by hand */ | 1785 | * so do it by hand */ |
1779 | trap = lock_rename(tdentry, fdentry); | 1786 | trap = lock_rename(tdentry, fdentry); |
@@ -1804,17 +1811,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1804 | host_err = -EXDEV; | 1811 | host_err = -EXDEV; |
1805 | if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) | 1812 | if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) |
1806 | goto out_dput_new; | 1813 | goto out_dput_new; |
1807 | host_err = fh_want_write(ffhp); | ||
1808 | if (host_err) | ||
1809 | goto out_dput_new; | ||
1810 | 1814 | ||
1811 | host_err = nfsd_break_lease(odentry->d_inode); | 1815 | host_err = nfsd_break_lease(odentry->d_inode); |
1812 | if (host_err) | 1816 | if (host_err) |
1813 | goto out_drop_write; | 1817 | goto out_dput_new; |
1814 | if (ndentry->d_inode) { | 1818 | if (ndentry->d_inode) { |
1815 | host_err = nfsd_break_lease(ndentry->d_inode); | 1819 | host_err = nfsd_break_lease(ndentry->d_inode); |
1816 | if (host_err) | 1820 | if (host_err) |
1817 | goto out_drop_write; | 1821 | goto out_dput_new; |
1818 | } | 1822 | } |
1819 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); | 1823 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); |
1820 | if (!host_err) { | 1824 | if (!host_err) { |
@@ -1822,8 +1826,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1822 | if (!host_err) | 1826 | if (!host_err) |
1823 | host_err = commit_metadata(ffhp); | 1827 | host_err = commit_metadata(ffhp); |
1824 | } | 1828 | } |
1825 | out_drop_write: | ||
1826 | fh_drop_write(ffhp); | ||
1827 | out_dput_new: | 1829 | out_dput_new: |
1828 | dput(ndentry); | 1830 | dput(ndentry); |
1829 | out_dput_old: | 1831 | out_dput_old: |
@@ -1839,6 +1841,7 @@ out_drop_write: | |||
1839 | fill_post_wcc(tfhp); | 1841 | fill_post_wcc(tfhp); |
1840 | unlock_rename(tdentry, fdentry); | 1842 | unlock_rename(tdentry, fdentry); |
1841 | ffhp->fh_locked = tfhp->fh_locked = 0; | 1843 | ffhp->fh_locked = tfhp->fh_locked = 0; |
1844 | fh_drop_write(ffhp); | ||
1842 | 1845 | ||
1843 | out: | 1846 | out: |
1844 | return err; | 1847 | return err; |
@@ -1864,6 +1867,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
1864 | if (err) | 1867 | if (err) |
1865 | goto out; | 1868 | goto out; |
1866 | 1869 | ||
1870 | host_err = fh_want_write(fhp); | ||
1871 | if (host_err) | ||
1872 | goto out_nfserr; | ||
1873 | |||
1867 | fh_lock_nested(fhp, I_MUTEX_PARENT); | 1874 | fh_lock_nested(fhp, I_MUTEX_PARENT); |
1868 | dentry = fhp->fh_dentry; | 1875 | dentry = fhp->fh_dentry; |
1869 | dirp = dentry->d_inode; | 1876 | dirp = dentry->d_inode; |
@@ -1882,21 +1889,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
1882 | if (!type) | 1889 | if (!type) |
1883 | type = rdentry->d_inode->i_mode & S_IFMT; | 1890 | type = rdentry->d_inode->i_mode & S_IFMT; |
1884 | 1891 | ||
1885 | host_err = fh_want_write(fhp); | ||
1886 | if (host_err) | ||
1887 | goto out_put; | ||
1888 | |||
1889 | host_err = nfsd_break_lease(rdentry->d_inode); | 1892 | host_err = nfsd_break_lease(rdentry->d_inode); |
1890 | if (host_err) | 1893 | if (host_err) |
1891 | goto out_drop_write; | 1894 | goto out_put; |
1892 | if (type != S_IFDIR) | 1895 | if (type != S_IFDIR) |
1893 | host_err = vfs_unlink(dirp, rdentry); | 1896 | host_err = vfs_unlink(dirp, rdentry); |
1894 | else | 1897 | else |
1895 | host_err = vfs_rmdir(dirp, rdentry); | 1898 | host_err = vfs_rmdir(dirp, rdentry); |
1896 | if (!host_err) | 1899 | if (!host_err) |
1897 | host_err = commit_metadata(fhp); | 1900 | host_err = commit_metadata(fhp); |
1898 | out_drop_write: | ||
1899 | fh_drop_write(fhp); | ||
1900 | out_put: | 1901 | out_put: |
1901 | dput(rdentry); | 1902 | dput(rdentry); |
1902 | 1903 | ||
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index ec0611b2b738..359594c393d2 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h | |||
@@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); | |||
110 | 110 | ||
111 | static inline int fh_want_write(struct svc_fh *fh) | 111 | static inline int fh_want_write(struct svc_fh *fh) |
112 | { | 112 | { |
113 | return mnt_want_write(fh->fh_export->ex_path.mnt); | 113 | int ret = mnt_want_write(fh->fh_export->ex_path.mnt); |
114 | |||
115 | if (!ret) | ||
116 | fh->fh_want_write = 1; | ||
117 | return ret; | ||
114 | } | 118 | } |
115 | 119 | ||
116 | static inline void fh_drop_write(struct svc_fh *fh) | 120 | static inline void fh_drop_write(struct svc_fh *fh) |
117 | { | 121 | { |
118 | mnt_drop_write(fh->fh_export->ex_path.mnt); | 122 | if (fh->fh_want_write) { |
123 | fh->fh_want_write = 0; | ||
124 | mnt_drop_write(fh->fh_export->ex_path.mnt); | ||
125 | } | ||
119 | } | 126 | } |
120 | 127 | ||
121 | #endif /* LINUX_NFSD_VFS_H */ | 128 | #endif /* LINUX_NFSD_VFS_H */ |
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 62cebc8e1a1f..a4d56ac02e6c 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c | |||
@@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
69 | struct page *page = vmf->page; | 69 | struct page *page = vmf->page; |
70 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | 70 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
71 | struct nilfs_transaction_info ti; | 71 | struct nilfs_transaction_info ti; |
72 | int ret; | 72 | int ret = 0; |
73 | 73 | ||
74 | if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) | 74 | if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) |
75 | return VM_FAULT_SIGBUS; /* -ENOSPC */ | 75 | return VM_FAULT_SIGBUS; /* -ENOSPC */ |
76 | 76 | ||
77 | sb_start_pagefault(inode->i_sb); | ||
77 | lock_page(page); | 78 | lock_page(page); |
78 | if (page->mapping != inode->i_mapping || | 79 | if (page->mapping != inode->i_mapping || |
79 | page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { | 80 | page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { |
80 | unlock_page(page); | 81 | unlock_page(page); |
81 | return VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 82 | ret = -EFAULT; /* make the VM retry the fault */ |
83 | goto out; | ||
82 | } | 84 | } |
83 | 85 | ||
84 | /* | 86 | /* |
@@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
112 | ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); | 114 | ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); |
113 | /* never returns -ENOMEM, but may return -ENOSPC */ | 115 | /* never returns -ENOMEM, but may return -ENOSPC */ |
114 | if (unlikely(ret)) | 116 | if (unlikely(ret)) |
115 | return VM_FAULT_SIGBUS; | 117 | goto out; |
116 | 118 | ||
117 | ret = block_page_mkwrite(vma, vmf, nilfs_get_block); | 119 | ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); |
118 | if (ret != VM_FAULT_LOCKED) { | 120 | if (ret) { |
119 | nilfs_transaction_abort(inode->i_sb); | 121 | nilfs_transaction_abort(inode->i_sb); |
120 | return ret; | 122 | goto out; |
121 | } | 123 | } |
122 | nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); | 124 | nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); |
123 | nilfs_transaction_commit(inode->i_sb); | 125 | nilfs_transaction_commit(inode->i_sb); |
124 | 126 | ||
125 | mapped: | 127 | mapped: |
126 | wait_on_page_writeback(page); | 128 | wait_on_page_writeback(page); |
127 | return VM_FAULT_LOCKED; | 129 | out: |
130 | sb_end_pagefault(inode->i_sb); | ||
131 | return block_page_mkwrite_return(ret); | ||
128 | } | 132 | } |
129 | 133 | ||
130 | static const struct vm_operations_struct nilfs_file_vm_ops = { | 134 | static const struct vm_operations_struct nilfs_file_vm_ops = { |
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 0b6387c67e6c..fdb180769485 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
@@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, | |||
660 | goto out_free; | 660 | goto out_free; |
661 | } | 661 | } |
662 | 662 | ||
663 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
664 | |||
665 | ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); | 663 | ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); |
666 | if (ret < 0) | 664 | if (ret < 0) |
667 | printk(KERN_ERR "NILFS: GC failed during preparation: " | 665 | printk(KERN_ERR "NILFS: GC failed during preparation: " |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 88e11fb346b6..a5752a589932 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb, | |||
189 | if (ret > 0) | 189 | if (ret > 0) |
190 | return 0; | 190 | return 0; |
191 | 191 | ||
192 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | 192 | sb_start_intwrite(sb); |
193 | 193 | ||
194 | nilfs = sb->s_fs_info; | 194 | nilfs = sb->s_fs_info; |
195 | down_read(&nilfs->ns_segctor_sem); | 195 | down_read(&nilfs->ns_segctor_sem); |
@@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb, | |||
205 | current->journal_info = ti->ti_save; | 205 | current->journal_info = ti->ti_save; |
206 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | 206 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) |
207 | kmem_cache_free(nilfs_transaction_cachep, ti); | 207 | kmem_cache_free(nilfs_transaction_cachep, ti); |
208 | sb_end_intwrite(sb); | ||
208 | return ret; | 209 | return ret; |
209 | } | 210 | } |
210 | 211 | ||
@@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb) | |||
246 | err = nilfs_construct_segment(sb); | 247 | err = nilfs_construct_segment(sb); |
247 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | 248 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) |
248 | kmem_cache_free(nilfs_transaction_cachep, ti); | 249 | kmem_cache_free(nilfs_transaction_cachep, ti); |
250 | sb_end_intwrite(sb); | ||
249 | return err; | 251 | return err; |
250 | } | 252 | } |
251 | 253 | ||
@@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb) | |||
264 | current->journal_info = ti->ti_save; | 266 | current->journal_info = ti->ti_save; |
265 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | 267 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) |
266 | kmem_cache_free(nilfs_transaction_cachep, ti); | 268 | kmem_cache_free(nilfs_transaction_cachep, ti); |
269 | sb_end_intwrite(sb); | ||
267 | } | 270 | } |
268 | 271 | ||
269 | void nilfs_relax_pressure_in_lock(struct super_block *sb) | 272 | void nilfs_relax_pressure_in_lock(struct super_block *sb) |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 7389d2d5e51d..1ecf46448f85 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, | |||
2084 | if (err) | 2084 | if (err) |
2085 | return err; | 2085 | return err; |
2086 | pos = *ppos; | 2086 | pos = *ppos; |
2087 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
2088 | /* We can write back this queue in page reclaim. */ | 2087 | /* We can write back this queue in page reclaim. */ |
2089 | current->backing_dev_info = mapping->backing_dev_info; | 2088 | current->backing_dev_info = mapping->backing_dev_info; |
2090 | written = 0; | 2089 | written = 0; |
@@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2119 | 2118 | ||
2120 | BUG_ON(iocb->ki_pos != pos); | 2119 | BUG_ON(iocb->ki_pos != pos); |
2121 | 2120 | ||
2121 | sb_start_write(inode->i_sb); | ||
2122 | mutex_lock(&inode->i_mutex); | 2122 | mutex_lock(&inode->i_mutex); |
2123 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); | 2123 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); |
2124 | mutex_unlock(&inode->i_mutex); | 2124 | mutex_unlock(&inode->i_mutex); |
@@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2127 | if (err < 0) | 2127 | if (err < 0) |
2128 | ret = err; | 2128 | ret = err; |
2129 | } | 2129 | } |
2130 | sb_end_write(inode->i_sb); | ||
2130 | return ret; | 2131 | return ret; |
2131 | } | 2132 | } |
2132 | 2133 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 7602783d7f41..46a1f6d75104 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, | |||
1971 | { | 1971 | { |
1972 | struct inode *inode = file->f_path.dentry->d_inode; | 1972 | struct inode *inode = file->f_path.dentry->d_inode; |
1973 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1973 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1974 | int ret; | ||
1974 | 1975 | ||
1975 | if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && | 1976 | if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && |
1976 | !ocfs2_writes_unwritten_extents(osb)) | 1977 | !ocfs2_writes_unwritten_extents(osb)) |
@@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, | |||
1985 | if (!(file->f_mode & FMODE_WRITE)) | 1986 | if (!(file->f_mode & FMODE_WRITE)) |
1986 | return -EBADF; | 1987 | return -EBADF; |
1987 | 1988 | ||
1988 | return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); | 1989 | ret = mnt_want_write_file(file); |
1990 | if (ret) | ||
1991 | return ret; | ||
1992 | ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); | ||
1993 | mnt_drop_write_file(file); | ||
1994 | return ret; | ||
1989 | } | 1995 | } |
1990 | 1996 | ||
1991 | static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, | 1997 | static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, |
@@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
2261 | if (iocb->ki_left == 0) | 2267 | if (iocb->ki_left == 0) |
2262 | return 0; | 2268 | return 0; |
2263 | 2269 | ||
2264 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 2270 | sb_start_write(inode->i_sb); |
2265 | 2271 | ||
2266 | appending = file->f_flags & O_APPEND ? 1 : 0; | 2272 | appending = file->f_flags & O_APPEND ? 1 : 0; |
2267 | direct_io = file->f_flags & O_DIRECT ? 1 : 0; | 2273 | direct_io = file->f_flags & O_DIRECT ? 1 : 0; |
@@ -2436,6 +2442,7 @@ out_sems: | |||
2436 | ocfs2_iocb_clear_sem_locked(iocb); | 2442 | ocfs2_iocb_clear_sem_locked(iocb); |
2437 | 2443 | ||
2438 | mutex_unlock(&inode->i_mutex); | 2444 | mutex_unlock(&inode->i_mutex); |
2445 | sb_end_write(inode->i_sb); | ||
2439 | 2446 | ||
2440 | if (written) | 2447 | if (written) |
2441 | ret = written; | 2448 | ret = written; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index d96f7f81d8dd..f20edcbfe700 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
928 | if (get_user(new_clusters, (int __user *)arg)) | 928 | if (get_user(new_clusters, (int __user *)arg)) |
929 | return -EFAULT; | 929 | return -EFAULT; |
930 | 930 | ||
931 | return ocfs2_group_extend(inode, new_clusters); | 931 | status = mnt_want_write_file(filp); |
932 | if (status) | ||
933 | return status; | ||
934 | status = ocfs2_group_extend(inode, new_clusters); | ||
935 | mnt_drop_write_file(filp); | ||
936 | return status; | ||
932 | case OCFS2_IOC_GROUP_ADD: | 937 | case OCFS2_IOC_GROUP_ADD: |
933 | case OCFS2_IOC_GROUP_ADD64: | 938 | case OCFS2_IOC_GROUP_ADD64: |
934 | if (!capable(CAP_SYS_RESOURCE)) | 939 | if (!capable(CAP_SYS_RESOURCE)) |
@@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
937 | if (copy_from_user(&input, (int __user *) arg, sizeof(input))) | 942 | if (copy_from_user(&input, (int __user *) arg, sizeof(input))) |
938 | return -EFAULT; | 943 | return -EFAULT; |
939 | 944 | ||
940 | return ocfs2_group_add(inode, &input); | 945 | status = mnt_want_write_file(filp); |
946 | if (status) | ||
947 | return status; | ||
948 | status = ocfs2_group_add(inode, &input); | ||
949 | mnt_drop_write_file(filp); | ||
950 | return status; | ||
941 | case OCFS2_IOC_REFLINK: | 951 | case OCFS2_IOC_REFLINK: |
942 | if (copy_from_user(&args, argp, sizeof(args))) | 952 | if (copy_from_user(&args, argp, sizeof(args))) |
943 | return -EFAULT; | 953 | return -EFAULT; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 0a42ae96dca7..2dd36af79e26 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | |||
355 | if (journal_current_handle()) | 355 | if (journal_current_handle()) |
356 | return jbd2_journal_start(journal, max_buffs); | 356 | return jbd2_journal_start(journal, max_buffs); |
357 | 357 | ||
358 | sb_start_intwrite(osb->sb); | ||
359 | |||
358 | down_read(&osb->journal->j_trans_barrier); | 360 | down_read(&osb->journal->j_trans_barrier); |
359 | 361 | ||
360 | handle = jbd2_journal_start(journal, max_buffs); | 362 | handle = jbd2_journal_start(journal, max_buffs); |
361 | if (IS_ERR(handle)) { | 363 | if (IS_ERR(handle)) { |
362 | up_read(&osb->journal->j_trans_barrier); | 364 | up_read(&osb->journal->j_trans_barrier); |
365 | sb_end_intwrite(osb->sb); | ||
363 | 366 | ||
364 | mlog_errno(PTR_ERR(handle)); | 367 | mlog_errno(PTR_ERR(handle)); |
365 | 368 | ||
@@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
388 | if (ret < 0) | 391 | if (ret < 0) |
389 | mlog_errno(ret); | 392 | mlog_errno(ret); |
390 | 393 | ||
391 | if (!nested) | 394 | if (!nested) { |
392 | up_read(&journal->j_trans_barrier); | 395 | up_read(&journal->j_trans_barrier); |
396 | sb_end_intwrite(osb->sb); | ||
397 | } | ||
393 | 398 | ||
394 | return ret; | 399 | return ret; |
395 | } | 400 | } |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 9cd41083e991..d150372fd81d 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
136 | sigset_t oldset; | 136 | sigset_t oldset; |
137 | int ret; | 137 | int ret; |
138 | 138 | ||
139 | sb_start_pagefault(inode->i_sb); | ||
139 | ocfs2_block_signals(&oldset); | 140 | ocfs2_block_signals(&oldset); |
140 | 141 | ||
141 | /* | 142 | /* |
@@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
165 | 166 | ||
166 | out: | 167 | out: |
167 | ocfs2_unblock_signals(&oldset); | 168 | ocfs2_unblock_signals(&oldset); |
169 | sb_end_pagefault(inode->i_sb); | ||
168 | return ret; | 170 | return ret; |
169 | } | 171 | } |
170 | 172 | ||
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9f32d7cbb7a3..30a055049e16 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4466,20 +4466,11 @@ int ocfs2_reflink_ioctl(struct inode *inode, | |||
4466 | goto out_dput; | 4466 | goto out_dput; |
4467 | } | 4467 | } |
4468 | 4468 | ||
4469 | error = mnt_want_write(new_path.mnt); | ||
4470 | if (error) { | ||
4471 | mlog_errno(error); | ||
4472 | goto out_dput; | ||
4473 | } | ||
4474 | |||
4475 | error = ocfs2_vfs_reflink(old_path.dentry, | 4469 | error = ocfs2_vfs_reflink(old_path.dentry, |
4476 | new_path.dentry->d_inode, | 4470 | new_path.dentry->d_inode, |
4477 | new_dentry, preserve); | 4471 | new_dentry, preserve); |
4478 | mnt_drop_write(new_path.mnt); | ||
4479 | out_dput: | 4472 | out_dput: |
4480 | dput(new_dentry); | 4473 | done_path_create(&new_path, new_dentry); |
4481 | mutex_unlock(&new_path.dentry->d_inode->i_mutex); | ||
4482 | path_put(&new_path); | ||
4483 | out: | 4474 | out: |
4484 | path_put(&old_path); | 4475 | path_put(&old_path); |
4485 | 4476 | ||
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) | |||
164 | if (IS_APPEND(inode)) | 164 | if (IS_APPEND(inode)) |
165 | goto out_putf; | 165 | goto out_putf; |
166 | 166 | ||
167 | sb_start_write(inode->i_sb); | ||
167 | error = locks_verify_truncate(inode, file, length); | 168 | error = locks_verify_truncate(inode, file, length); |
168 | if (!error) | 169 | if (!error) |
169 | error = security_path_truncate(&file->f_path); | 170 | error = security_path_truncate(&file->f_path); |
170 | if (!error) | 171 | if (!error) |
171 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); | 172 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); |
173 | sb_end_write(inode->i_sb); | ||
172 | out_putf: | 174 | out_putf: |
173 | fput(file); | 175 | fput(file); |
174 | out: | 176 | out: |
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
266 | if (!file->f_op->fallocate) | 268 | if (!file->f_op->fallocate) |
267 | return -EOPNOTSUPP; | 269 | return -EOPNOTSUPP; |
268 | 270 | ||
269 | return file->f_op->fallocate(file, mode, offset, len); | 271 | sb_start_write(inode->i_sb); |
272 | ret = file->f_op->fallocate(file, mode, offset, len); | ||
273 | sb_end_write(inode->i_sb); | ||
274 | return ret; | ||
270 | } | 275 | } |
271 | 276 | ||
272 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) | 277 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) |
@@ -620,7 +625,7 @@ static inline int __get_file_write_access(struct inode *inode, | |||
620 | /* | 625 | /* |
621 | * Balanced in __fput() | 626 | * Balanced in __fput() |
622 | */ | 627 | */ |
623 | error = mnt_want_write(mnt); | 628 | error = __mnt_want_write(mnt); |
624 | if (error) | 629 | if (error) |
625 | put_write_access(inode); | 630 | put_write_access(inode); |
626 | } | 631 | } |
@@ -654,6 +659,7 @@ static int do_dentry_open(struct file *f, | |||
654 | if (unlikely(f->f_flags & O_PATH)) | 659 | if (unlikely(f->f_flags & O_PATH)) |
655 | f->f_mode = FMODE_PATH; | 660 | f->f_mode = FMODE_PATH; |
656 | 661 | ||
662 | path_get(&f->f_path); | ||
657 | inode = f->f_path.dentry->d_inode; | 663 | inode = f->f_path.dentry->d_inode; |
658 | if (f->f_mode & FMODE_WRITE) { | 664 | if (f->f_mode & FMODE_WRITE) { |
659 | error = __get_file_write_access(inode, f->f_path.mnt); | 665 | error = __get_file_write_access(inode, f->f_path.mnt); |
@@ -739,9 +745,7 @@ int finish_open(struct file *file, struct dentry *dentry, | |||
739 | int error; | 745 | int error; |
740 | BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ | 746 | BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ |
741 | 747 | ||
742 | mntget(file->f_path.mnt); | 748 | file->f_path.dentry = dentry; |
743 | file->f_path.dentry = dget(dentry); | ||
744 | |||
745 | error = do_dentry_open(file, open, current_cred()); | 749 | error = do_dentry_open(file, open, current_cred()); |
746 | if (!error) | 750 | if (!error) |
747 | *opened |= FILE_OPENED; | 751 | *opened |= FILE_OPENED; |
@@ -784,7 +788,6 @@ struct file *dentry_open(const struct path *path, int flags, | |||
784 | 788 | ||
785 | f->f_flags = flags; | 789 | f->f_flags = flags; |
786 | f->f_path = *path; | 790 | f->f_path = *path; |
787 | path_get(&f->f_path); | ||
788 | error = do_dentry_open(f, NULL, cred); | 791 | error = do_dentry_open(f, NULL, cred); |
789 | if (!error) { | 792 | if (!error) { |
790 | error = open_check_o_direct(f); | 793 | error = open_check_o_direct(f); |
@@ -1016,18 +1016,16 @@ fail_inode: | |||
1016 | return NULL; | 1016 | return NULL; |
1017 | } | 1017 | } |
1018 | 1018 | ||
1019 | struct file *create_write_pipe(int flags) | 1019 | int create_pipe_files(struct file **res, int flags) |
1020 | { | 1020 | { |
1021 | int err; | 1021 | int err; |
1022 | struct inode *inode; | 1022 | struct inode *inode = get_pipe_inode(); |
1023 | struct file *f; | 1023 | struct file *f; |
1024 | struct path path; | 1024 | struct path path; |
1025 | struct qstr name = { .name = "" }; | 1025 | static struct qstr name = { .name = "" }; |
1026 | 1026 | ||
1027 | err = -ENFILE; | ||
1028 | inode = get_pipe_inode(); | ||
1029 | if (!inode) | 1027 | if (!inode) |
1030 | goto err; | 1028 | return -ENFILE; |
1031 | 1029 | ||
1032 | err = -ENOMEM; | 1030 | err = -ENOMEM; |
1033 | path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); | 1031 | path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); |
@@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags) | |||
1041 | f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); | 1039 | f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); |
1042 | if (!f) | 1040 | if (!f) |
1043 | goto err_dentry; | 1041 | goto err_dentry; |
1044 | f->f_mapping = inode->i_mapping; | ||
1045 | 1042 | ||
1046 | f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); | 1043 | f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); |
1047 | f->f_version = 0; | ||
1048 | 1044 | ||
1049 | return f; | 1045 | res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops); |
1046 | if (!res[0]) | ||
1047 | goto err_file; | ||
1048 | |||
1049 | path_get(&path); | ||
1050 | res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); | ||
1051 | res[1] = f; | ||
1052 | return 0; | ||
1050 | 1053 | ||
1051 | err_dentry: | 1054 | err_file: |
1055 | put_filp(f); | ||
1056 | err_dentry: | ||
1052 | free_pipe_info(inode); | 1057 | free_pipe_info(inode); |
1053 | path_put(&path); | 1058 | path_put(&path); |
1054 | return ERR_PTR(err); | 1059 | return err; |
1055 | 1060 | ||
1056 | err_inode: | 1061 | err_inode: |
1057 | free_pipe_info(inode); | 1062 | free_pipe_info(inode); |
1058 | iput(inode); | 1063 | iput(inode); |
1059 | err: | 1064 | return err; |
1060 | return ERR_PTR(err); | ||
1061 | } | ||
1062 | |||
1063 | void free_write_pipe(struct file *f) | ||
1064 | { | ||
1065 | free_pipe_info(f->f_dentry->d_inode); | ||
1066 | path_put(&f->f_path); | ||
1067 | put_filp(f); | ||
1068 | } | ||
1069 | |||
1070 | struct file *create_read_pipe(struct file *wrf, int flags) | ||
1071 | { | ||
1072 | /* Grab pipe from the writer */ | ||
1073 | struct file *f = alloc_file(&wrf->f_path, FMODE_READ, | ||
1074 | &read_pipefifo_fops); | ||
1075 | if (!f) | ||
1076 | return ERR_PTR(-ENFILE); | ||
1077 | |||
1078 | path_get(&wrf->f_path); | ||
1079 | f->f_flags = O_RDONLY | (flags & O_NONBLOCK); | ||
1080 | |||
1081 | return f; | ||
1082 | } | 1065 | } |
1083 | 1066 | ||
1084 | int do_pipe_flags(int *fd, int flags) | 1067 | int do_pipe_flags(int *fd, int flags) |
1085 | { | 1068 | { |
1086 | struct file *fw, *fr; | 1069 | struct file *files[2]; |
1087 | int error; | 1070 | int error; |
1088 | int fdw, fdr; | 1071 | int fdw, fdr; |
1089 | 1072 | ||
1090 | if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) | 1073 | if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) |
1091 | return -EINVAL; | 1074 | return -EINVAL; |
1092 | 1075 | ||
1093 | fw = create_write_pipe(flags); | 1076 | error = create_pipe_files(files, flags); |
1094 | if (IS_ERR(fw)) | 1077 | if (error) |
1095 | return PTR_ERR(fw); | 1078 | return error; |
1096 | fr = create_read_pipe(fw, flags); | ||
1097 | error = PTR_ERR(fr); | ||
1098 | if (IS_ERR(fr)) | ||
1099 | goto err_write_pipe; | ||
1100 | 1079 | ||
1101 | error = get_unused_fd_flags(flags); | 1080 | error = get_unused_fd_flags(flags); |
1102 | if (error < 0) | 1081 | if (error < 0) |
@@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags) | |||
1109 | fdw = error; | 1088 | fdw = error; |
1110 | 1089 | ||
1111 | audit_fd_pair(fdr, fdw); | 1090 | audit_fd_pair(fdr, fdw); |
1112 | fd_install(fdr, fr); | 1091 | fd_install(fdr, files[0]); |
1113 | fd_install(fdw, fw); | 1092 | fd_install(fdw, files[1]); |
1114 | fd[0] = fdr; | 1093 | fd[0] = fdr; |
1115 | fd[1] = fdw; | 1094 | fd[1] = fdw; |
1116 | 1095 | ||
@@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags) | |||
1119 | err_fdr: | 1098 | err_fdr: |
1120 | put_unused_fd(fdr); | 1099 | put_unused_fd(fdr); |
1121 | err_read_pipe: | 1100 | err_read_pipe: |
1122 | path_put(&fr->f_path); | 1101 | fput(files[0]); |
1123 | put_filp(fr); | 1102 | fput(files[1]); |
1124 | err_write_pipe: | ||
1125 | free_write_pipe(fw); | ||
1126 | return error; | 1103 | return error; |
1127 | } | 1104 | } |
1128 | 1105 | ||
diff --git a/fs/splice.c b/fs/splice.c index 7bf08fa22ec9..41514dd89462 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
996 | }; | 996 | }; |
997 | ssize_t ret; | 997 | ssize_t ret; |
998 | 998 | ||
999 | sb_start_write(inode->i_sb); | ||
1000 | |||
999 | pipe_lock(pipe); | 1001 | pipe_lock(pipe); |
1000 | 1002 | ||
1001 | splice_from_pipe_begin(&sd); | 1003 | splice_from_pipe_begin(&sd); |
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1034 | *ppos += ret; | 1036 | *ppos += ret; |
1035 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 1037 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); |
1036 | } | 1038 | } |
1039 | sb_end_write(inode->i_sb); | ||
1037 | 1040 | ||
1038 | return ret; | 1041 | return ret; |
1039 | } | 1042 | } |
diff --git a/fs/super.c b/fs/super.c index 4bf714459a4b..b05cf47463d0 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -33,12 +33,19 @@ | |||
33 | #include <linux/rculist_bl.h> | 33 | #include <linux/rculist_bl.h> |
34 | #include <linux/cleancache.h> | 34 | #include <linux/cleancache.h> |
35 | #include <linux/fsnotify.h> | 35 | #include <linux/fsnotify.h> |
36 | #include <linux/lockdep.h> | ||
36 | #include "internal.h" | 37 | #include "internal.h" |
37 | 38 | ||
38 | 39 | ||
39 | LIST_HEAD(super_blocks); | 40 | LIST_HEAD(super_blocks); |
40 | DEFINE_SPINLOCK(sb_lock); | 41 | DEFINE_SPINLOCK(sb_lock); |
41 | 42 | ||
43 | static char *sb_writers_name[SB_FREEZE_LEVELS] = { | ||
44 | "sb_writers", | ||
45 | "sb_pagefaults", | ||
46 | "sb_internal", | ||
47 | }; | ||
48 | |||
42 | /* | 49 | /* |
43 | * One thing we have to be careful of with a per-sb shrinker is that we don't | 50 | * One thing we have to be careful of with a per-sb shrinker is that we don't |
44 | * drop the last active reference to the superblock from within the shrinker. | 51 | * drop the last active reference to the superblock from within the shrinker. |
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) | |||
102 | return total_objects; | 109 | return total_objects; |
103 | } | 110 | } |
104 | 111 | ||
112 | static int init_sb_writers(struct super_block *s, struct file_system_type *type) | ||
113 | { | ||
114 | int err; | ||
115 | int i; | ||
116 | |||
117 | for (i = 0; i < SB_FREEZE_LEVELS; i++) { | ||
118 | err = percpu_counter_init(&s->s_writers.counter[i], 0); | ||
119 | if (err < 0) | ||
120 | goto err_out; | ||
121 | lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], | ||
122 | &type->s_writers_key[i], 0); | ||
123 | } | ||
124 | init_waitqueue_head(&s->s_writers.wait); | ||
125 | init_waitqueue_head(&s->s_writers.wait_unfrozen); | ||
126 | return 0; | ||
127 | err_out: | ||
128 | while (--i >= 0) | ||
129 | percpu_counter_destroy(&s->s_writers.counter[i]); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | static void destroy_sb_writers(struct super_block *s) | ||
134 | { | ||
135 | int i; | ||
136 | |||
137 | for (i = 0; i < SB_FREEZE_LEVELS; i++) | ||
138 | percpu_counter_destroy(&s->s_writers.counter[i]); | ||
139 | } | ||
140 | |||
105 | /** | 141 | /** |
106 | * alloc_super - create new superblock | 142 | * alloc_super - create new superblock |
107 | * @type: filesystem type superblock should belong to | 143 | * @type: filesystem type superblock should belong to |
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
117 | 153 | ||
118 | if (s) { | 154 | if (s) { |
119 | if (security_sb_alloc(s)) { | 155 | if (security_sb_alloc(s)) { |
156 | /* | ||
157 | * We cannot call security_sb_free() without | ||
158 | * security_sb_alloc() succeeding. So bail out manually | ||
159 | */ | ||
120 | kfree(s); | 160 | kfree(s); |
121 | s = NULL; | 161 | s = NULL; |
122 | goto out; | 162 | goto out; |
123 | } | 163 | } |
124 | #ifdef CONFIG_SMP | 164 | #ifdef CONFIG_SMP |
125 | s->s_files = alloc_percpu(struct list_head); | 165 | s->s_files = alloc_percpu(struct list_head); |
126 | if (!s->s_files) { | 166 | if (!s->s_files) |
127 | security_sb_free(s); | 167 | goto err_out; |
128 | kfree(s); | 168 | else { |
129 | s = NULL; | ||
130 | goto out; | ||
131 | } else { | ||
132 | int i; | 169 | int i; |
133 | 170 | ||
134 | for_each_possible_cpu(i) | 171 | for_each_possible_cpu(i) |
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
137 | #else | 174 | #else |
138 | INIT_LIST_HEAD(&s->s_files); | 175 | INIT_LIST_HEAD(&s->s_files); |
139 | #endif | 176 | #endif |
177 | if (init_sb_writers(s, type)) | ||
178 | goto err_out; | ||
140 | s->s_flags = flags; | 179 | s->s_flags = flags; |
141 | s->s_bdi = &default_backing_dev_info; | 180 | s->s_bdi = &default_backing_dev_info; |
142 | INIT_HLIST_NODE(&s->s_instances); | 181 | INIT_HLIST_NODE(&s->s_instances); |
@@ -178,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
178 | mutex_init(&s->s_dquot.dqio_mutex); | 217 | mutex_init(&s->s_dquot.dqio_mutex); |
179 | mutex_init(&s->s_dquot.dqonoff_mutex); | 218 | mutex_init(&s->s_dquot.dqonoff_mutex); |
180 | init_rwsem(&s->s_dquot.dqptr_sem); | 219 | init_rwsem(&s->s_dquot.dqptr_sem); |
181 | init_waitqueue_head(&s->s_wait_unfrozen); | ||
182 | s->s_maxbytes = MAX_NON_LFS; | 220 | s->s_maxbytes = MAX_NON_LFS; |
183 | s->s_op = &default_op; | 221 | s->s_op = &default_op; |
184 | s->s_time_gran = 1000000000; | 222 | s->s_time_gran = 1000000000; |
@@ -190,6 +228,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
190 | } | 228 | } |
191 | out: | 229 | out: |
192 | return s; | 230 | return s; |
231 | err_out: | ||
232 | security_sb_free(s); | ||
233 | #ifdef CONFIG_SMP | ||
234 | if (s->s_files) | ||
235 | free_percpu(s->s_files); | ||
236 | #endif | ||
237 | destroy_sb_writers(s); | ||
238 | kfree(s); | ||
239 | s = NULL; | ||
240 | goto out; | ||
193 | } | 241 | } |
194 | 242 | ||
195 | /** | 243 | /** |
@@ -203,6 +251,7 @@ static inline void destroy_super(struct super_block *s) | |||
203 | #ifdef CONFIG_SMP | 251 | #ifdef CONFIG_SMP |
204 | free_percpu(s->s_files); | 252 | free_percpu(s->s_files); |
205 | #endif | 253 | #endif |
254 | destroy_sb_writers(s); | ||
206 | security_sb_free(s); | 255 | security_sb_free(s); |
207 | WARN_ON(!list_empty(&s->s_mounts)); | 256 | WARN_ON(!list_empty(&s->s_mounts)); |
208 | kfree(s->s_subtype); | 257 | kfree(s->s_subtype); |
@@ -651,10 +700,11 @@ struct super_block *get_super_thawed(struct block_device *bdev) | |||
651 | { | 700 | { |
652 | while (1) { | 701 | while (1) { |
653 | struct super_block *s = get_super(bdev); | 702 | struct super_block *s = get_super(bdev); |
654 | if (!s || s->s_frozen == SB_UNFROZEN) | 703 | if (!s || s->s_writers.frozen == SB_UNFROZEN) |
655 | return s; | 704 | return s; |
656 | up_read(&s->s_umount); | 705 | up_read(&s->s_umount); |
657 | vfs_check_frozen(s, SB_FREEZE_WRITE); | 706 | wait_event(s->s_writers.wait_unfrozen, |
707 | s->s_writers.frozen == SB_UNFROZEN); | ||
658 | put_super(s); | 708 | put_super(s); |
659 | } | 709 | } |
660 | } | 710 | } |
@@ -732,7 +782,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
732 | int retval; | 782 | int retval; |
733 | int remount_ro; | 783 | int remount_ro; |
734 | 784 | ||
735 | if (sb->s_frozen != SB_UNFROZEN) | 785 | if (sb->s_writers.frozen != SB_UNFROZEN) |
736 | return -EBUSY; | 786 | return -EBUSY; |
737 | 787 | ||
738 | #ifdef CONFIG_BLOCK | 788 | #ifdef CONFIG_BLOCK |
@@ -1163,6 +1213,120 @@ out: | |||
1163 | return ERR_PTR(error); | 1213 | return ERR_PTR(error); |
1164 | } | 1214 | } |
1165 | 1215 | ||
1216 | /* | ||
1217 | * This is an internal function, please use sb_end_{write,pagefault,intwrite} | ||
1218 | * instead. | ||
1219 | */ | ||
1220 | void __sb_end_write(struct super_block *sb, int level) | ||
1221 | { | ||
1222 | percpu_counter_dec(&sb->s_writers.counter[level-1]); | ||
1223 | /* | ||
1224 | * Make sure s_writers are updated before we wake up waiters in | ||
1225 | * freeze_super(). | ||
1226 | */ | ||
1227 | smp_mb(); | ||
1228 | if (waitqueue_active(&sb->s_writers.wait)) | ||
1229 | wake_up(&sb->s_writers.wait); | ||
1230 | rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_); | ||
1231 | } | ||
1232 | EXPORT_SYMBOL(__sb_end_write); | ||
1233 | |||
1234 | #ifdef CONFIG_LOCKDEP | ||
1235 | /* | ||
1236 | * We want lockdep to tell us about possible deadlocks with freezing but | ||
1237 | * it's it bit tricky to properly instrument it. Getting a freeze protection | ||
1238 | * works as getting a read lock but there are subtle problems. XFS for example | ||
1239 | * gets freeze protection on internal level twice in some cases, which is OK | ||
1240 | * only because we already hold a freeze protection also on higher level. Due | ||
1241 | * to these cases we have to tell lockdep we are doing trylock when we | ||
1242 | * already hold a freeze protection for a higher freeze level. | ||
1243 | */ | ||
1244 | static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock, | ||
1245 | unsigned long ip) | ||
1246 | { | ||
1247 | int i; | ||
1248 | |||
1249 | if (!trylock) { | ||
1250 | for (i = 0; i < level - 1; i++) | ||
1251 | if (lock_is_held(&sb->s_writers.lock_map[i])) { | ||
1252 | trylock = true; | ||
1253 | break; | ||
1254 | } | ||
1255 | } | ||
1256 | rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip); | ||
1257 | } | ||
1258 | #endif | ||
1259 | |||
1260 | /* | ||
1261 | * This is an internal function, please use sb_start_{write,pagefault,intwrite} | ||
1262 | * instead. | ||
1263 | */ | ||
1264 | int __sb_start_write(struct super_block *sb, int level, bool wait) | ||
1265 | { | ||
1266 | retry: | ||
1267 | if (unlikely(sb->s_writers.frozen >= level)) { | ||
1268 | if (!wait) | ||
1269 | return 0; | ||
1270 | wait_event(sb->s_writers.wait_unfrozen, | ||
1271 | sb->s_writers.frozen < level); | ||
1272 | } | ||
1273 | |||
1274 | #ifdef CONFIG_LOCKDEP | ||
1275 | acquire_freeze_lock(sb, level, !wait, _RET_IP_); | ||
1276 | #endif | ||
1277 | percpu_counter_inc(&sb->s_writers.counter[level-1]); | ||
1278 | /* | ||
1279 | * Make sure counter is updated before we check for frozen. | ||
1280 | * freeze_super() first sets frozen and then checks the counter. | ||
1281 | */ | ||
1282 | smp_mb(); | ||
1283 | if (unlikely(sb->s_writers.frozen >= level)) { | ||
1284 | __sb_end_write(sb, level); | ||
1285 | goto retry; | ||
1286 | } | ||
1287 | return 1; | ||
1288 | } | ||
1289 | EXPORT_SYMBOL(__sb_start_write); | ||
1290 | |||
1291 | /** | ||
1292 | * sb_wait_write - wait until all writers to given file system finish | ||
1293 | * @sb: the super for which we wait | ||
1294 | * @level: type of writers we wait for (normal vs page fault) | ||
1295 | * | ||
1296 | * This function waits until there are no writers of given type to given file | ||
1297 | * system. Caller of this function should make sure there can be no new writers | ||
1298 | * of type @level before calling this function. Otherwise this function can | ||
1299 | * livelock. | ||
1300 | */ | ||
1301 | static void sb_wait_write(struct super_block *sb, int level) | ||
1302 | { | ||
1303 | s64 writers; | ||
1304 | |||
1305 | /* | ||
1306 | * We just cycle-through lockdep here so that it does not complain | ||
1307 | * about returning with lock to userspace | ||
1308 | */ | ||
1309 | rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); | ||
1310 | rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); | ||
1311 | |||
1312 | do { | ||
1313 | DEFINE_WAIT(wait); | ||
1314 | |||
1315 | /* | ||
1316 | * We use a barrier in prepare_to_wait() to separate setting | ||
1317 | * of frozen and checking of the counter | ||
1318 | */ | ||
1319 | prepare_to_wait(&sb->s_writers.wait, &wait, | ||
1320 | TASK_UNINTERRUPTIBLE); | ||
1321 | |||
1322 | writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); | ||
1323 | if (writers) | ||
1324 | schedule(); | ||
1325 | |||
1326 | finish_wait(&sb->s_writers.wait, &wait); | ||
1327 | } while (writers); | ||
1328 | } | ||
1329 | |||
1166 | /** | 1330 | /** |
1167 | * freeze_super - lock the filesystem and force it into a consistent state | 1331 | * freeze_super - lock the filesystem and force it into a consistent state |
1168 | * @sb: the super to lock | 1332 | * @sb: the super to lock |
@@ -1170,6 +1334,31 @@ out: | |||
1170 | * Syncs the super to make sure the filesystem is consistent and calls the fs's | 1334 | * Syncs the super to make sure the filesystem is consistent and calls the fs's |
1171 | * freeze_fs. Subsequent calls to this without first thawing the fs will return | 1335 | * freeze_fs. Subsequent calls to this without first thawing the fs will return |
1172 | * -EBUSY. | 1336 | * -EBUSY. |
1337 | * | ||
1338 | * During this function, sb->s_writers.frozen goes through these values: | ||
1339 | * | ||
1340 | * SB_UNFROZEN: File system is normal, all writes progress as usual. | ||
1341 | * | ||
1342 | * SB_FREEZE_WRITE: The file system is in the process of being frozen. New | ||
1343 | * writes should be blocked, though page faults are still allowed. We wait for | ||
1344 | * all writes to complete and then proceed to the next stage. | ||
1345 | * | ||
1346 | * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked | ||
1347 | * but internal fs threads can still modify the filesystem (although they | ||
1348 | * should not dirty new pages or inodes), writeback can run etc. After waiting | ||
1349 | * for all running page faults we sync the filesystem which will clean all | ||
1350 | * dirty pages and inodes (no new dirty pages or inodes can be created when | ||
1351 | * sync is running). | ||
1352 | * | ||
1353 | * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs | ||
1354 | * modification are blocked (e.g. XFS preallocation truncation on inode | ||
1355 | * reclaim). This is usually implemented by blocking new transactions for | ||
1356 | * filesystems that have them and need this additional guard. After all | ||
1357 | * internal writers are finished we call ->freeze_fs() to finish filesystem | ||
1358 | * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is | ||
1359 | * mostly auxiliary for filesystems to verify they do not modify frozen fs. | ||
1360 | * | ||
1361 | * sb->s_writers.frozen is protected by sb->s_umount. | ||
1173 | */ | 1362 | */ |
1174 | int freeze_super(struct super_block *sb) | 1363 | int freeze_super(struct super_block *sb) |
1175 | { | 1364 | { |
@@ -1177,7 +1366,7 @@ int freeze_super(struct super_block *sb) | |||
1177 | 1366 | ||
1178 | atomic_inc(&sb->s_active); | 1367 | atomic_inc(&sb->s_active); |
1179 | down_write(&sb->s_umount); | 1368 | down_write(&sb->s_umount); |
1180 | if (sb->s_frozen) { | 1369 | if (sb->s_writers.frozen != SB_UNFROZEN) { |
1181 | deactivate_locked_super(sb); | 1370 | deactivate_locked_super(sb); |
1182 | return -EBUSY; | 1371 | return -EBUSY; |
1183 | } | 1372 | } |
@@ -1188,33 +1377,53 @@ int freeze_super(struct super_block *sb) | |||
1188 | } | 1377 | } |
1189 | 1378 | ||
1190 | if (sb->s_flags & MS_RDONLY) { | 1379 | if (sb->s_flags & MS_RDONLY) { |
1191 | sb->s_frozen = SB_FREEZE_TRANS; | 1380 | /* Nothing to do really... */ |
1192 | smp_wmb(); | 1381 | sb->s_writers.frozen = SB_FREEZE_COMPLETE; |
1193 | up_write(&sb->s_umount); | 1382 | up_write(&sb->s_umount); |
1194 | return 0; | 1383 | return 0; |
1195 | } | 1384 | } |
1196 | 1385 | ||
1197 | sb->s_frozen = SB_FREEZE_WRITE; | 1386 | /* From now on, no new normal writers can start */ |
1387 | sb->s_writers.frozen = SB_FREEZE_WRITE; | ||
1388 | smp_wmb(); | ||
1389 | |||
1390 | /* Release s_umount to preserve sb_start_write -> s_umount ordering */ | ||
1391 | up_write(&sb->s_umount); | ||
1392 | |||
1393 | sb_wait_write(sb, SB_FREEZE_WRITE); | ||
1394 | |||
1395 | /* Now we go and block page faults... */ | ||
1396 | down_write(&sb->s_umount); | ||
1397 | sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; | ||
1198 | smp_wmb(); | 1398 | smp_wmb(); |
1199 | 1399 | ||
1400 | sb_wait_write(sb, SB_FREEZE_PAGEFAULT); | ||
1401 | |||
1402 | /* All writers are done so after syncing there won't be dirty data */ | ||
1200 | sync_filesystem(sb); | 1403 | sync_filesystem(sb); |
1201 | 1404 | ||
1202 | sb->s_frozen = SB_FREEZE_TRANS; | 1405 | /* Now wait for internal filesystem counter */ |
1406 | sb->s_writers.frozen = SB_FREEZE_FS; | ||
1203 | smp_wmb(); | 1407 | smp_wmb(); |
1408 | sb_wait_write(sb, SB_FREEZE_FS); | ||
1204 | 1409 | ||
1205 | sync_blockdev(sb->s_bdev); | ||
1206 | if (sb->s_op->freeze_fs) { | 1410 | if (sb->s_op->freeze_fs) { |
1207 | ret = sb->s_op->freeze_fs(sb); | 1411 | ret = sb->s_op->freeze_fs(sb); |
1208 | if (ret) { | 1412 | if (ret) { |
1209 | printk(KERN_ERR | 1413 | printk(KERN_ERR |
1210 | "VFS:Filesystem freeze failed\n"); | 1414 | "VFS:Filesystem freeze failed\n"); |
1211 | sb->s_frozen = SB_UNFROZEN; | 1415 | sb->s_writers.frozen = SB_UNFROZEN; |
1212 | smp_wmb(); | 1416 | smp_wmb(); |
1213 | wake_up(&sb->s_wait_unfrozen); | 1417 | wake_up(&sb->s_writers.wait_unfrozen); |
1214 | deactivate_locked_super(sb); | 1418 | deactivate_locked_super(sb); |
1215 | return ret; | 1419 | return ret; |
1216 | } | 1420 | } |
1217 | } | 1421 | } |
1422 | /* | ||
1423 | * This is just for debugging purposes so that fs can warn if it | ||
1424 | * sees write activity when frozen is set to SB_FREEZE_COMPLETE. | ||
1425 | */ | ||
1426 | sb->s_writers.frozen = SB_FREEZE_COMPLETE; | ||
1218 | up_write(&sb->s_umount); | 1427 | up_write(&sb->s_umount); |
1219 | return 0; | 1428 | return 0; |
1220 | } | 1429 | } |
@@ -1231,7 +1440,7 @@ int thaw_super(struct super_block *sb) | |||
1231 | int error; | 1440 | int error; |
1232 | 1441 | ||
1233 | down_write(&sb->s_umount); | 1442 | down_write(&sb->s_umount); |
1234 | if (sb->s_frozen == SB_UNFROZEN) { | 1443 | if (sb->s_writers.frozen == SB_UNFROZEN) { |
1235 | up_write(&sb->s_umount); | 1444 | up_write(&sb->s_umount); |
1236 | return -EINVAL; | 1445 | return -EINVAL; |
1237 | } | 1446 | } |
@@ -1244,16 +1453,15 @@ int thaw_super(struct super_block *sb) | |||
1244 | if (error) { | 1453 | if (error) { |
1245 | printk(KERN_ERR | 1454 | printk(KERN_ERR |
1246 | "VFS:Filesystem thaw failed\n"); | 1455 | "VFS:Filesystem thaw failed\n"); |
1247 | sb->s_frozen = SB_FREEZE_TRANS; | ||
1248 | up_write(&sb->s_umount); | 1456 | up_write(&sb->s_umount); |
1249 | return error; | 1457 | return error; |
1250 | } | 1458 | } |
1251 | } | 1459 | } |
1252 | 1460 | ||
1253 | out: | 1461 | out: |
1254 | sb->s_frozen = SB_UNFROZEN; | 1462 | sb->s_writers.frozen = SB_UNFROZEN; |
1255 | smp_wmb(); | 1463 | smp_wmb(); |
1256 | wake_up(&sb->s_wait_unfrozen); | 1464 | wake_up(&sb->s_writers.wait_unfrozen); |
1257 | deactivate_locked_super(sb); | 1465 | deactivate_locked_super(sb); |
1258 | 1466 | ||
1259 | return 0; | 1467 | return 0; |
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index a4759833d62d..614b2b544880 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
228 | ret = 0; | 228 | ret = 0; |
229 | if (bb->vm_ops->page_mkwrite) | 229 | if (bb->vm_ops->page_mkwrite) |
230 | ret = bb->vm_ops->page_mkwrite(vma, vmf); | 230 | ret = bb->vm_ops->page_mkwrite(vma, vmf); |
231 | else | ||
232 | file_update_time(file); | ||
231 | 233 | ||
232 | sysfs_put_active(attr_sd); | 234 | sysfs_put_active(attr_sd); |
233 | return ret; | 235 | return ret; |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 15052ff916ec..e562dd43f41f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -124,6 +124,12 @@ xfs_setfilesize_trans_alloc( | |||
124 | ioend->io_append_trans = tp; | 124 | ioend->io_append_trans = tp; |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * We will pass freeze protection with a transaction. So tell lockdep | ||
128 | * we released it. | ||
129 | */ | ||
130 | rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
131 | 1, _THIS_IP_); | ||
132 | /* | ||
127 | * We hand off the transaction to the completion thread now, so | 133 | * We hand off the transaction to the completion thread now, so |
128 | * clear the flag here. | 134 | * clear the flag here. |
129 | */ | 135 | */ |
@@ -199,6 +205,15 @@ xfs_end_io( | |||
199 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 205 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
200 | int error = 0; | 206 | int error = 0; |
201 | 207 | ||
208 | if (ioend->io_append_trans) { | ||
209 | /* | ||
210 | * We've got freeze protection passed with the transaction. | ||
211 | * Tell lockdep about it. | ||
212 | */ | ||
213 | rwsem_acquire_read( | ||
214 | &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
215 | 0, 1, _THIS_IP_); | ||
216 | } | ||
202 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 217 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
203 | ioend->io_error = -EIO; | 218 | ioend->io_error = -EIO; |
204 | goto done; | 219 | goto done; |
@@ -1425,6 +1440,9 @@ out_trans_cancel: | |||
1425 | if (ioend->io_append_trans) { | 1440 | if (ioend->io_append_trans) { |
1426 | current_set_flags_nested(&ioend->io_append_trans->t_pflags, | 1441 | current_set_flags_nested(&ioend->io_append_trans->t_pflags, |
1427 | PF_FSTRANS); | 1442 | PF_FSTRANS); |
1443 | rwsem_acquire_read( | ||
1444 | &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
1445 | 0, 1, _THIS_IP_); | ||
1428 | xfs_trans_cancel(ioend->io_append_trans, 0); | 1446 | xfs_trans_cancel(ioend->io_append_trans, 0); |
1429 | } | 1447 | } |
1430 | out_destroy_ioend: | 1448 | out_destroy_ioend: |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c4559c6e6f2c..56afcdb2377d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -770,10 +770,12 @@ xfs_file_aio_write( | |||
770 | if (ocount == 0) | 770 | if (ocount == 0) |
771 | return 0; | 771 | return 0; |
772 | 772 | ||
773 | xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); | 773 | sb_start_write(inode->i_sb); |
774 | 774 | ||
775 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 775 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
776 | return -EIO; | 776 | ret = -EIO; |
777 | goto out; | ||
778 | } | ||
777 | 779 | ||
778 | if (unlikely(file->f_flags & O_DIRECT)) | 780 | if (unlikely(file->f_flags & O_DIRECT)) |
779 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); | 781 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); |
@@ -792,6 +794,8 @@ xfs_file_aio_write( | |||
792 | ret = err; | 794 | ret = err; |
793 | } | 795 | } |
794 | 796 | ||
797 | out: | ||
798 | sb_end_write(inode->i_sb); | ||
795 | return ret; | 799 | return ret; |
796 | } | 800 | } |
797 | 801 | ||
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f1535d25a9b..0e0232c3b6d9 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -364,9 +364,15 @@ xfs_fssetdm_by_handle( | |||
364 | if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) | 364 | if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) |
365 | return -XFS_ERROR(EFAULT); | 365 | return -XFS_ERROR(EFAULT); |
366 | 366 | ||
367 | error = mnt_want_write_file(parfilp); | ||
368 | if (error) | ||
369 | return error; | ||
370 | |||
367 | dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); | 371 | dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); |
368 | if (IS_ERR(dentry)) | 372 | if (IS_ERR(dentry)) { |
373 | mnt_drop_write_file(parfilp); | ||
369 | return PTR_ERR(dentry); | 374 | return PTR_ERR(dentry); |
375 | } | ||
370 | 376 | ||
371 | if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { | 377 | if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { |
372 | error = -XFS_ERROR(EPERM); | 378 | error = -XFS_ERROR(EPERM); |
@@ -382,6 +388,7 @@ xfs_fssetdm_by_handle( | |||
382 | fsd.fsd_dmstate); | 388 | fsd.fsd_dmstate); |
383 | 389 | ||
384 | out: | 390 | out: |
391 | mnt_drop_write_file(parfilp); | ||
385 | dput(dentry); | 392 | dput(dentry); |
386 | return error; | 393 | return error; |
387 | } | 394 | } |
@@ -634,7 +641,11 @@ xfs_ioc_space( | |||
634 | if (ioflags & IO_INVIS) | 641 | if (ioflags & IO_INVIS) |
635 | attr_flags |= XFS_ATTR_DMI; | 642 | attr_flags |= XFS_ATTR_DMI; |
636 | 643 | ||
644 | error = mnt_want_write_file(filp); | ||
645 | if (error) | ||
646 | return error; | ||
637 | error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); | 647 | error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); |
648 | mnt_drop_write_file(filp); | ||
638 | return -error; | 649 | return -error; |
639 | } | 650 | } |
640 | 651 | ||
@@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr( | |||
1163 | { | 1174 | { |
1164 | struct fsxattr fa; | 1175 | struct fsxattr fa; |
1165 | unsigned int mask; | 1176 | unsigned int mask; |
1177 | int error; | ||
1166 | 1178 | ||
1167 | if (copy_from_user(&fa, arg, sizeof(fa))) | 1179 | if (copy_from_user(&fa, arg, sizeof(fa))) |
1168 | return -EFAULT; | 1180 | return -EFAULT; |
@@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr( | |||
1171 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 1183 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
1172 | mask |= FSX_NONBLOCK; | 1184 | mask |= FSX_NONBLOCK; |
1173 | 1185 | ||
1174 | return -xfs_ioctl_setattr(ip, &fa, mask); | 1186 | error = mnt_want_write_file(filp); |
1187 | if (error) | ||
1188 | return error; | ||
1189 | error = xfs_ioctl_setattr(ip, &fa, mask); | ||
1190 | mnt_drop_write_file(filp); | ||
1191 | return -error; | ||
1175 | } | 1192 | } |
1176 | 1193 | ||
1177 | STATIC int | 1194 | STATIC int |
@@ -1196,6 +1213,7 @@ xfs_ioc_setxflags( | |||
1196 | struct fsxattr fa; | 1213 | struct fsxattr fa; |
1197 | unsigned int flags; | 1214 | unsigned int flags; |
1198 | unsigned int mask; | 1215 | unsigned int mask; |
1216 | int error; | ||
1199 | 1217 | ||
1200 | if (copy_from_user(&flags, arg, sizeof(flags))) | 1218 | if (copy_from_user(&flags, arg, sizeof(flags))) |
1201 | return -EFAULT; | 1219 | return -EFAULT; |
@@ -1210,7 +1228,12 @@ xfs_ioc_setxflags( | |||
1210 | mask |= FSX_NONBLOCK; | 1228 | mask |= FSX_NONBLOCK; |
1211 | fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); | 1229 | fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); |
1212 | 1230 | ||
1213 | return -xfs_ioctl_setattr(ip, &fa, mask); | 1231 | error = mnt_want_write_file(filp); |
1232 | if (error) | ||
1233 | return error; | ||
1234 | error = xfs_ioctl_setattr(ip, &fa, mask); | ||
1235 | mnt_drop_write_file(filp); | ||
1236 | return -error; | ||
1214 | } | 1237 | } |
1215 | 1238 | ||
1216 | STATIC int | 1239 | STATIC int |
@@ -1385,8 +1408,13 @@ xfs_file_ioctl( | |||
1385 | if (copy_from_user(&dmi, arg, sizeof(dmi))) | 1408 | if (copy_from_user(&dmi, arg, sizeof(dmi))) |
1386 | return -XFS_ERROR(EFAULT); | 1409 | return -XFS_ERROR(EFAULT); |
1387 | 1410 | ||
1411 | error = mnt_want_write_file(filp); | ||
1412 | if (error) | ||
1413 | return error; | ||
1414 | |||
1388 | error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, | 1415 | error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, |
1389 | dmi.fsd_dmstate); | 1416 | dmi.fsd_dmstate); |
1417 | mnt_drop_write_file(filp); | ||
1390 | return -error; | 1418 | return -error; |
1391 | } | 1419 | } |
1392 | 1420 | ||
@@ -1434,7 +1462,11 @@ xfs_file_ioctl( | |||
1434 | 1462 | ||
1435 | if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) | 1463 | if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) |
1436 | return -XFS_ERROR(EFAULT); | 1464 | return -XFS_ERROR(EFAULT); |
1465 | error = mnt_want_write_file(filp); | ||
1466 | if (error) | ||
1467 | return error; | ||
1437 | error = xfs_swapext(&sxp); | 1468 | error = xfs_swapext(&sxp); |
1469 | mnt_drop_write_file(filp); | ||
1438 | return -error; | 1470 | return -error; |
1439 | } | 1471 | } |
1440 | 1472 | ||
@@ -1463,9 +1495,14 @@ xfs_file_ioctl( | |||
1463 | if (copy_from_user(&inout, arg, sizeof(inout))) | 1495 | if (copy_from_user(&inout, arg, sizeof(inout))) |
1464 | return -XFS_ERROR(EFAULT); | 1496 | return -XFS_ERROR(EFAULT); |
1465 | 1497 | ||
1498 | error = mnt_want_write_file(filp); | ||
1499 | if (error) | ||
1500 | return error; | ||
1501 | |||
1466 | /* input parameter is passed in resblks field of structure */ | 1502 | /* input parameter is passed in resblks field of structure */ |
1467 | in = inout.resblks; | 1503 | in = inout.resblks; |
1468 | error = xfs_reserve_blocks(mp, &in, &inout); | 1504 | error = xfs_reserve_blocks(mp, &in, &inout); |
1505 | mnt_drop_write_file(filp); | ||
1469 | if (error) | 1506 | if (error) |
1470 | return -error; | 1507 | return -error; |
1471 | 1508 | ||
@@ -1496,7 +1533,11 @@ xfs_file_ioctl( | |||
1496 | if (copy_from_user(&in, arg, sizeof(in))) | 1533 | if (copy_from_user(&in, arg, sizeof(in))) |
1497 | return -XFS_ERROR(EFAULT); | 1534 | return -XFS_ERROR(EFAULT); |
1498 | 1535 | ||
1536 | error = mnt_want_write_file(filp); | ||
1537 | if (error) | ||
1538 | return error; | ||
1499 | error = xfs_growfs_data(mp, &in); | 1539 | error = xfs_growfs_data(mp, &in); |
1540 | mnt_drop_write_file(filp); | ||
1500 | return -error; | 1541 | return -error; |
1501 | } | 1542 | } |
1502 | 1543 | ||
@@ -1506,7 +1547,11 @@ xfs_file_ioctl( | |||
1506 | if (copy_from_user(&in, arg, sizeof(in))) | 1547 | if (copy_from_user(&in, arg, sizeof(in))) |
1507 | return -XFS_ERROR(EFAULT); | 1548 | return -XFS_ERROR(EFAULT); |
1508 | 1549 | ||
1550 | error = mnt_want_write_file(filp); | ||
1551 | if (error) | ||
1552 | return error; | ||
1509 | error = xfs_growfs_log(mp, &in); | 1553 | error = xfs_growfs_log(mp, &in); |
1554 | mnt_drop_write_file(filp); | ||
1510 | return -error; | 1555 | return -error; |
1511 | } | 1556 | } |
1512 | 1557 | ||
@@ -1516,7 +1561,11 @@ xfs_file_ioctl( | |||
1516 | if (copy_from_user(&in, arg, sizeof(in))) | 1561 | if (copy_from_user(&in, arg, sizeof(in))) |
1517 | return -XFS_ERROR(EFAULT); | 1562 | return -XFS_ERROR(EFAULT); |
1518 | 1563 | ||
1564 | error = mnt_want_write_file(filp); | ||
1565 | if (error) | ||
1566 | return error; | ||
1519 | error = xfs_growfs_rt(mp, &in); | 1567 | error = xfs_growfs_rt(mp, &in); |
1568 | mnt_drop_write_file(filp); | ||
1520 | return -error; | 1569 | return -error; |
1521 | } | 1570 | } |
1522 | 1571 | ||
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c4f2da0d2bf5..1244274a5674 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -600,7 +600,11 @@ xfs_file_compat_ioctl( | |||
600 | 600 | ||
601 | if (xfs_compat_growfs_data_copyin(&in, arg)) | 601 | if (xfs_compat_growfs_data_copyin(&in, arg)) |
602 | return -XFS_ERROR(EFAULT); | 602 | return -XFS_ERROR(EFAULT); |
603 | error = mnt_want_write_file(filp); | ||
604 | if (error) | ||
605 | return error; | ||
603 | error = xfs_growfs_data(mp, &in); | 606 | error = xfs_growfs_data(mp, &in); |
607 | mnt_drop_write_file(filp); | ||
604 | return -error; | 608 | return -error; |
605 | } | 609 | } |
606 | case XFS_IOC_FSGROWFSRT_32: { | 610 | case XFS_IOC_FSGROWFSRT_32: { |
@@ -608,7 +612,11 @@ xfs_file_compat_ioctl( | |||
608 | 612 | ||
609 | if (xfs_compat_growfs_rt_copyin(&in, arg)) | 613 | if (xfs_compat_growfs_rt_copyin(&in, arg)) |
610 | return -XFS_ERROR(EFAULT); | 614 | return -XFS_ERROR(EFAULT); |
615 | error = mnt_want_write_file(filp); | ||
616 | if (error) | ||
617 | return error; | ||
611 | error = xfs_growfs_rt(mp, &in); | 618 | error = xfs_growfs_rt(mp, &in); |
619 | mnt_drop_write_file(filp); | ||
612 | return -error; | 620 | return -error; |
613 | } | 621 | } |
614 | #endif | 622 | #endif |
@@ -627,7 +635,11 @@ xfs_file_compat_ioctl( | |||
627 | offsetof(struct xfs_swapext, sx_stat)) || | 635 | offsetof(struct xfs_swapext, sx_stat)) || |
628 | xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) | 636 | xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) |
629 | return -XFS_ERROR(EFAULT); | 637 | return -XFS_ERROR(EFAULT); |
638 | error = mnt_want_write_file(filp); | ||
639 | if (error) | ||
640 | return error; | ||
630 | error = xfs_swapext(&sxp); | 641 | error = xfs_swapext(&sxp); |
642 | mnt_drop_write_file(filp); | ||
631 | return -error; | 643 | return -error; |
632 | } | 644 | } |
633 | case XFS_IOC_FSBULKSTAT_32: | 645 | case XFS_IOC_FSBULKSTAT_32: |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 915edf6639f0..973dff6ad935 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -680,9 +680,9 @@ xfs_iomap_write_unwritten( | |||
680 | * the same inode that we complete here and might deadlock | 680 | * the same inode that we complete here and might deadlock |
681 | * on the iolock. | 681 | * on the iolock. |
682 | */ | 682 | */ |
683 | xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); | 683 | sb_start_intwrite(mp->m_super); |
684 | tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); | 684 | tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); |
685 | tp->t_flags |= XFS_TRANS_RESERVE; | 685 | tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; |
686 | error = xfs_trans_reserve(tp, resblks, | 686 | error = xfs_trans_reserve(tp, resblks, |
687 | XFS_WRITE_LOG_RES(mp), 0, | 687 | XFS_WRITE_LOG_RES(mp), 0, |
688 | XFS_TRANS_PERM_LOG_RES, | 688 | XFS_TRANS_PERM_LOG_RES, |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 711ca51ca3d7..29c2f83d4147 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1551,7 +1551,7 @@ xfs_unmountfs( | |||
1551 | int | 1551 | int |
1552 | xfs_fs_writable(xfs_mount_t *mp) | 1552 | xfs_fs_writable(xfs_mount_t *mp) |
1553 | { | 1553 | { |
1554 | return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || | 1554 | return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) || |
1555 | (mp->m_flags & XFS_MOUNT_RDONLY)); | 1555 | (mp->m_flags & XFS_MOUNT_RDONLY)); |
1556 | } | 1556 | } |
1557 | 1557 | ||
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8724336a9a08..05a05a7b6119 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -311,9 +311,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, | |||
311 | #define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ | 311 | #define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ |
312 | #define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ | 312 | #define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ |
313 | 313 | ||
314 | #define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) | ||
315 | #define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) | ||
316 | |||
317 | /* | 314 | /* |
318 | * Flags for xfs_mountfs | 315 | * Flags for xfs_mountfs |
319 | */ | 316 | */ |
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 97304f10e78a..96548176db80 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -403,7 +403,7 @@ xfs_sync_worker( | |||
403 | if (!(mp->m_super->s_flags & MS_ACTIVE) && | 403 | if (!(mp->m_super->s_flags & MS_ACTIVE) && |
404 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { | 404 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
405 | /* dgc: errors ignored here */ | 405 | /* dgc: errors ignored here */ |
406 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 406 | if (mp->m_super->s_writers.frozen == SB_UNFROZEN && |
407 | xfs_log_need_covered(mp)) | 407 | xfs_log_need_covered(mp)) |
408 | error = xfs_fs_log_dummy(mp); | 408 | error = xfs_fs_log_dummy(mp); |
409 | else | 409 | else |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdf324508c5e..06ed520a767f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -576,8 +576,12 @@ xfs_trans_alloc( | |||
576 | xfs_mount_t *mp, | 576 | xfs_mount_t *mp, |
577 | uint type) | 577 | uint type) |
578 | { | 578 | { |
579 | xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); | 579 | xfs_trans_t *tp; |
580 | return _xfs_trans_alloc(mp, type, KM_SLEEP); | 580 | |
581 | sb_start_intwrite(mp->m_super); | ||
582 | tp = _xfs_trans_alloc(mp, type, KM_SLEEP); | ||
583 | tp->t_flags |= XFS_TRANS_FREEZE_PROT; | ||
584 | return tp; | ||
581 | } | 585 | } |
582 | 586 | ||
583 | xfs_trans_t * | 587 | xfs_trans_t * |
@@ -588,6 +592,7 @@ _xfs_trans_alloc( | |||
588 | { | 592 | { |
589 | xfs_trans_t *tp; | 593 | xfs_trans_t *tp; |
590 | 594 | ||
595 | WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); | ||
591 | atomic_inc(&mp->m_active_trans); | 596 | atomic_inc(&mp->m_active_trans); |
592 | 597 | ||
593 | tp = kmem_zone_zalloc(xfs_trans_zone, memflags); | 598 | tp = kmem_zone_zalloc(xfs_trans_zone, memflags); |
@@ -611,6 +616,8 @@ xfs_trans_free( | |||
611 | xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); | 616 | xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); |
612 | 617 | ||
613 | atomic_dec(&tp->t_mountp->m_active_trans); | 618 | atomic_dec(&tp->t_mountp->m_active_trans); |
619 | if (tp->t_flags & XFS_TRANS_FREEZE_PROT) | ||
620 | sb_end_intwrite(tp->t_mountp->m_super); | ||
614 | xfs_trans_free_dqinfo(tp); | 621 | xfs_trans_free_dqinfo(tp); |
615 | kmem_zone_free(xfs_trans_zone, tp); | 622 | kmem_zone_free(xfs_trans_zone, tp); |
616 | } | 623 | } |
@@ -643,7 +650,11 @@ xfs_trans_dup( | |||
643 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 650 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
644 | ASSERT(tp->t_ticket != NULL); | 651 | ASSERT(tp->t_ticket != NULL); |
645 | 652 | ||
646 | ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); | 653 | ntp->t_flags = XFS_TRANS_PERM_LOG_RES | |
654 | (tp->t_flags & XFS_TRANS_RESERVE) | | ||
655 | (tp->t_flags & XFS_TRANS_FREEZE_PROT); | ||
656 | /* We gave our writer reference to the new transaction */ | ||
657 | tp->t_flags &= ~XFS_TRANS_FREEZE_PROT; | ||
647 | ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); | 658 | ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); |
648 | ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; | 659 | ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; |
649 | tp->t_blk_res = tp->t_blk_res_used; | 660 | tp->t_blk_res = tp->t_blk_res_used; |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index bc2afd52a0b7..db056544cbb5 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -179,6 +179,8 @@ struct xfs_log_item_desc { | |||
179 | #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ | 179 | #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ |
180 | #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ | 180 | #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ |
181 | #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ | 181 | #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ |
182 | #define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer | ||
183 | count in superblock */ | ||
182 | 184 | ||
183 | /* | 185 | /* |
184 | * Values for call flags parameter. | 186 | * Values for call flags parameter. |