aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c1
-rw-r--r--fs/bio.c158
-rw-r--r--fs/cramfs/inode.c1
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext2/ioctl.c57
-rw-r--r--fs/ext3/ialloc.c2
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext3/ioctl.c103
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c6
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/fat/file.c12
-rw-r--r--fs/file_table.c42
-rw-r--r--fs/hfsplus/ioctl.c40
-rw-r--r--fs/inode.c51
-rw-r--r--fs/jffs2/jffs2_fs_i.h2
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jfs/ioctl.c33
-rw-r--r--fs/locks.c1
-rw-r--r--fs/namei.c275
-rw-r--r--fs/namespace.c316
-rw-r--r--fs/ncpfs/ioctl.c54
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/vfs.c72
-rw-r--r--fs/ocfs2/ioctl.c11
-rw-r--r--fs/open.c149
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/reiserfs/ioctl.c63
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/xattr.c1
-rw-r--r--fs/select.c2
-rw-r--r--fs/super.c24
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/file.c6
-rw-r--r--fs/utimes.c18
-rw-r--r--fs/xattr.c40
-rw-r--r--fs/xfs/linux-2.6/sema.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c9
45 files changed, 1301 insertions, 411 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index dfebdbe7440e..3031e3233dd6 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/idr.h> 28#include <linux/idr.h>
29#include <asm/semaphore.h>
30#include <net/9p/9p.h> 29#include <net/9p/9p.h>
31#include <net/9p/client.h> 30#include <net/9p/client.h>
32 31
diff --git a/fs/bio.c b/fs/bio.c
index 553b5b7960ad..6e0b6f66df03 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -444,22 +444,27 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
444 444
445struct bio_map_data { 445struct bio_map_data {
446 struct bio_vec *iovecs; 446 struct bio_vec *iovecs;
447 void __user *userptr; 447 int nr_sgvecs;
448 struct sg_iovec *sgvecs;
448}; 449};
449 450
450static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio) 451static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
452 struct sg_iovec *iov, int iov_count)
451{ 453{
452 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); 454 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
455 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
456 bmd->nr_sgvecs = iov_count;
453 bio->bi_private = bmd; 457 bio->bi_private = bmd;
454} 458}
455 459
456static void bio_free_map_data(struct bio_map_data *bmd) 460static void bio_free_map_data(struct bio_map_data *bmd)
457{ 461{
458 kfree(bmd->iovecs); 462 kfree(bmd->iovecs);
463 kfree(bmd->sgvecs);
459 kfree(bmd); 464 kfree(bmd);
460} 465}
461 466
462static struct bio_map_data *bio_alloc_map_data(int nr_segs) 467static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
463{ 468{
464 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); 469 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL);
465 470
@@ -467,13 +472,71 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs)
467 return NULL; 472 return NULL;
468 473
469 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); 474 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL);
470 if (bmd->iovecs) 475 if (!bmd->iovecs) {
476 kfree(bmd);
477 return NULL;
478 }
479
480 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL);
481 if (bmd->sgvecs)
471 return bmd; 482 return bmd;
472 483
484 kfree(bmd->iovecs);
473 kfree(bmd); 485 kfree(bmd);
474 return NULL; 486 return NULL;
475} 487}
476 488
489static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
490 int uncopy)
491{
492 int ret = 0, i;
493 struct bio_vec *bvec;
494 int iov_idx = 0;
495 unsigned int iov_off = 0;
496 int read = bio_data_dir(bio) == READ;
497
498 __bio_for_each_segment(bvec, bio, i, 0) {
499 char *bv_addr = page_address(bvec->bv_page);
500 unsigned int bv_len = bvec->bv_len;
501
502 while (bv_len && iov_idx < iov_count) {
503 unsigned int bytes;
504 char *iov_addr;
505
506 bytes = min_t(unsigned int,
507 iov[iov_idx].iov_len - iov_off, bv_len);
508 iov_addr = iov[iov_idx].iov_base + iov_off;
509
510 if (!ret) {
511 if (!read && !uncopy)
512 ret = copy_from_user(bv_addr, iov_addr,
513 bytes);
514 if (read && uncopy)
515 ret = copy_to_user(iov_addr, bv_addr,
516 bytes);
517
518 if (ret)
519 ret = -EFAULT;
520 }
521
522 bv_len -= bytes;
523 bv_addr += bytes;
524 iov_addr += bytes;
525 iov_off += bytes;
526
527 if (iov[iov_idx].iov_len == iov_off) {
528 iov_idx++;
529 iov_off = 0;
530 }
531 }
532
533 if (uncopy)
534 __free_page(bvec->bv_page);
535 }
536
537 return ret;
538}
539
477/** 540/**
478 * bio_uncopy_user - finish previously mapped bio 541 * bio_uncopy_user - finish previously mapped bio
479 * @bio: bio being terminated 542 * @bio: bio being terminated
@@ -484,55 +547,56 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs)
484int bio_uncopy_user(struct bio *bio) 547int bio_uncopy_user(struct bio *bio)
485{ 548{
486 struct bio_map_data *bmd = bio->bi_private; 549 struct bio_map_data *bmd = bio->bi_private;
487 const int read = bio_data_dir(bio) == READ; 550 int ret;
488 struct bio_vec *bvec;
489 int i, ret = 0;
490 551
491 __bio_for_each_segment(bvec, bio, i, 0) { 552 ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1);
492 char *addr = page_address(bvec->bv_page);
493 unsigned int len = bmd->iovecs[i].bv_len;
494 553
495 if (read && !ret && copy_to_user(bmd->userptr, addr, len))
496 ret = -EFAULT;
497
498 __free_page(bvec->bv_page);
499 bmd->userptr += len;
500 }
501 bio_free_map_data(bmd); 554 bio_free_map_data(bmd);
502 bio_put(bio); 555 bio_put(bio);
503 return ret; 556 return ret;
504} 557}
505 558
506/** 559/**
507 * bio_copy_user - copy user data to bio 560 * bio_copy_user_iov - copy user data to bio
508 * @q: destination block queue 561 * @q: destination block queue
509 * @uaddr: start of user address 562 * @iov: the iovec.
510 * @len: length in bytes 563 * @iov_count: number of elements in the iovec
511 * @write_to_vm: bool indicating writing to pages or not 564 * @write_to_vm: bool indicating writing to pages or not
512 * 565 *
513 * Prepares and returns a bio for indirect user io, bouncing data 566 * Prepares and returns a bio for indirect user io, bouncing data
514 * to/from kernel pages as necessary. Must be paired with 567 * to/from kernel pages as necessary. Must be paired with
515 * call bio_uncopy_user() on io completion. 568 * call bio_uncopy_user() on io completion.
516 */ 569 */
517struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, 570struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
518 unsigned int len, int write_to_vm) 571 int iov_count, int write_to_vm)
519{ 572{
520 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
521 unsigned long start = uaddr >> PAGE_SHIFT;
522 struct bio_map_data *bmd; 573 struct bio_map_data *bmd;
523 struct bio_vec *bvec; 574 struct bio_vec *bvec;
524 struct page *page; 575 struct page *page;
525 struct bio *bio; 576 struct bio *bio;
526 int i, ret; 577 int i, ret;
578 int nr_pages = 0;
579 unsigned int len = 0;
527 580
528 bmd = bio_alloc_map_data(end - start); 581 for (i = 0; i < iov_count; i++) {
582 unsigned long uaddr;
583 unsigned long end;
584 unsigned long start;
585
586 uaddr = (unsigned long)iov[i].iov_base;
587 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
588 start = uaddr >> PAGE_SHIFT;
589
590 nr_pages += end - start;
591 len += iov[i].iov_len;
592 }
593
594 bmd = bio_alloc_map_data(nr_pages, iov_count);
529 if (!bmd) 595 if (!bmd)
530 return ERR_PTR(-ENOMEM); 596 return ERR_PTR(-ENOMEM);
531 597
532 bmd->userptr = (void __user *) uaddr;
533
534 ret = -ENOMEM; 598 ret = -ENOMEM;
535 bio = bio_alloc(GFP_KERNEL, end - start); 599 bio = bio_alloc(GFP_KERNEL, nr_pages);
536 if (!bio) 600 if (!bio)
537 goto out_bmd; 601 goto out_bmd;
538 602
@@ -564,22 +628,12 @@ struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
564 * success 628 * success
565 */ 629 */
566 if (!write_to_vm) { 630 if (!write_to_vm) {
567 char __user *p = (char __user *) uaddr; 631 ret = __bio_copy_iov(bio, iov, iov_count, 0);
568 632 if (ret)
569 /* 633 goto cleanup;
570 * for a write, copy in data to kernel pages
571 */
572 ret = -EFAULT;
573 bio_for_each_segment(bvec, bio, i) {
574 char *addr = page_address(bvec->bv_page);
575
576 if (copy_from_user(addr, p, bvec->bv_len))
577 goto cleanup;
578 p += bvec->bv_len;
579 }
580 } 634 }
581 635
582 bio_set_map_data(bmd, bio); 636 bio_set_map_data(bmd, bio, iov, iov_count);
583 return bio; 637 return bio;
584cleanup: 638cleanup:
585 bio_for_each_segment(bvec, bio, i) 639 bio_for_each_segment(bvec, bio, i)
@@ -591,6 +645,28 @@ out_bmd:
591 return ERR_PTR(ret); 645 return ERR_PTR(ret);
592} 646}
593 647
648/**
649 * bio_copy_user - copy user data to bio
650 * @q: destination block queue
651 * @uaddr: start of user address
652 * @len: length in bytes
653 * @write_to_vm: bool indicating writing to pages or not
654 *
655 * Prepares and returns a bio for indirect user io, bouncing data
656 * to/from kernel pages as necessary. Must be paired with
657 * call bio_uncopy_user() on io completion.
658 */
659struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
660 unsigned int len, int write_to_vm)
661{
662 struct sg_iovec iov;
663
664 iov.iov_base = (void __user *)uaddr;
665 iov.iov_len = len;
666
667 return bio_copy_user_iov(q, &iov, 1, write_to_vm);
668}
669
594static struct bio *__bio_map_user_iov(struct request_queue *q, 670static struct bio *__bio_map_user_iov(struct request_queue *q,
595 struct block_device *bdev, 671 struct block_device *bdev,
596 struct sg_iovec *iov, int iov_count, 672 struct sg_iovec *iov, int iov_count,
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 350680fd7da7..0c3b618c15b3 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -23,7 +23,6 @@
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/vfs.h> 24#include <linux/vfs.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <asm/semaphore.h>
27 26
28#include <asm/uaccess.h> 27#include <asm/uaccess.h>
29 28
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index d30ea8b433a2..7a8824f475f2 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -37,7 +37,6 @@
37#include <linux/jhash.h> 37#include <linux/jhash.h>
38#include <linux/miscdevice.h> 38#include <linux/miscdevice.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <asm/semaphore.h>
41#include <asm/uaccess.h> 40#include <asm/uaccess.h>
42 41
43#include <linux/dlm.h> 42#include <linux/dlm.h>
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 5deb8b74e649..08f647d8188d 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -253,7 +253,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
253 * it has too few free inodes left (min_inodes) or 253 * it has too few free inodes left (min_inodes) or
254 * it has too few free blocks left (min_blocks) or 254 * it has too few free blocks left (min_blocks) or
255 * it's already running too large debt (max_debt). 255 * it's already running too large debt (max_debt).
256 * Parent's group is prefered, if it doesn't satisfy these 256 * Parent's group is preferred, if it doesn't satisfy these
257 * conditions we search cyclically through the rest. If none 257 * conditions we search cyclically through the rest. If none
258 * of the groups look good we just look for a group with more 258 * of the groups look good we just look for a group with more
259 * free inodes than average (starting at parent's group). 259 * free inodes than average (starting at parent's group).
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c62006805427..b8a2990bab83 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -239,7 +239,7 @@ no_block:
239 * @inode: owner 239 * @inode: owner
240 * @ind: descriptor of indirect block. 240 * @ind: descriptor of indirect block.
241 * 241 *
242 * This function returns the prefered place for block allocation. 242 * This function returns the preferred place for block allocation.
243 * It is used when heuristic for sequential allocation fails. 243 * It is used when heuristic for sequential allocation fails.
244 * Rules are: 244 * Rules are:
245 * + if there is a block to the left of our position - allocate near it. 245 * + if there is a block to the left of our position - allocate near it.
@@ -283,7 +283,7 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
283} 283}
284 284
285/** 285/**
286 * ext2_find_goal - find a prefered place for allocation. 286 * ext2_find_goal - find a preferred place for allocation.
287 * @inode: owner 287 * @inode: owner
288 * @block: block we want 288 * @block: block we want
289 * @partial: pointer to the last triple within a chain 289 * @partial: pointer to the last triple within a chain
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index b8ea11fee5c6..de876fa793e1 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/mount.h>
15#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
16#include <asm/current.h> 17#include <asm/current.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
@@ -23,6 +24,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
23 struct ext2_inode_info *ei = EXT2_I(inode); 24 struct ext2_inode_info *ei = EXT2_I(inode);
24 unsigned int flags; 25 unsigned int flags;
25 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
27 int ret;
26 28
27 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); 29 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
28 30
@@ -34,14 +36,19 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
34 case EXT2_IOC_SETFLAGS: { 36 case EXT2_IOC_SETFLAGS: {
35 unsigned int oldflags; 37 unsigned int oldflags;
36 38
37 if (IS_RDONLY(inode)) 39 ret = mnt_want_write(filp->f_path.mnt);
38 return -EROFS; 40 if (ret)
41 return ret;
39 42
40 if (!is_owner_or_cap(inode)) 43 if (!is_owner_or_cap(inode)) {
41 return -EACCES; 44 ret = -EACCES;
45 goto setflags_out;
46 }
42 47
43 if (get_user(flags, (int __user *) arg)) 48 if (get_user(flags, (int __user *) arg)) {
44 return -EFAULT; 49 ret = -EFAULT;
50 goto setflags_out;
51 }
45 52
46 if (!S_ISDIR(inode->i_mode)) 53 if (!S_ISDIR(inode->i_mode))
47 flags &= ~EXT2_DIRSYNC_FL; 54 flags &= ~EXT2_DIRSYNC_FL;
@@ -50,7 +57,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
50 /* Is it quota file? Do not allow user to mess with it */ 57 /* Is it quota file? Do not allow user to mess with it */
51 if (IS_NOQUOTA(inode)) { 58 if (IS_NOQUOTA(inode)) {
52 mutex_unlock(&inode->i_mutex); 59 mutex_unlock(&inode->i_mutex);
53 return -EPERM; 60 ret = -EPERM;
61 goto setflags_out;
54 } 62 }
55 oldflags = ei->i_flags; 63 oldflags = ei->i_flags;
56 64
@@ -63,7 +71,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
63 if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { 71 if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
64 if (!capable(CAP_LINUX_IMMUTABLE)) { 72 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex); 73 mutex_unlock(&inode->i_mutex);
66 return -EPERM; 74 ret = -EPERM;
75 goto setflags_out;
67 } 76 }
68 } 77 }
69 78
@@ -75,20 +84,26 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
75 ext2_set_inode_flags(inode); 84 ext2_set_inode_flags(inode);
76 inode->i_ctime = CURRENT_TIME_SEC; 85 inode->i_ctime = CURRENT_TIME_SEC;
77 mark_inode_dirty(inode); 86 mark_inode_dirty(inode);
78 return 0; 87setflags_out:
88 mnt_drop_write(filp->f_path.mnt);
89 return ret;
79 } 90 }
80 case EXT2_IOC_GETVERSION: 91 case EXT2_IOC_GETVERSION:
81 return put_user(inode->i_generation, (int __user *) arg); 92 return put_user(inode->i_generation, (int __user *) arg);
82 case EXT2_IOC_SETVERSION: 93 case EXT2_IOC_SETVERSION:
83 if (!is_owner_or_cap(inode)) 94 if (!is_owner_or_cap(inode))
84 return -EPERM; 95 return -EPERM;
85 if (IS_RDONLY(inode)) 96 ret = mnt_want_write(filp->f_path.mnt);
86 return -EROFS; 97 if (ret)
87 if (get_user(inode->i_generation, (int __user *) arg)) 98 return ret;
88 return -EFAULT; 99 if (get_user(inode->i_generation, (int __user *) arg)) {
89 inode->i_ctime = CURRENT_TIME_SEC; 100 ret = -EFAULT;
90 mark_inode_dirty(inode); 101 } else {
91 return 0; 102 inode->i_ctime = CURRENT_TIME_SEC;
103 mark_inode_dirty(inode);
104 }
105 mnt_drop_write(filp->f_path.mnt);
106 return ret;
92 case EXT2_IOC_GETRSVSZ: 107 case EXT2_IOC_GETRSVSZ:
93 if (test_opt(inode->i_sb, RESERVATION) 108 if (test_opt(inode->i_sb, RESERVATION)
94 && S_ISREG(inode->i_mode) 109 && S_ISREG(inode->i_mode)
@@ -102,15 +117,16 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
102 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 117 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
103 return -ENOTTY; 118 return -ENOTTY;
104 119
105 if (IS_RDONLY(inode)) 120 if (!is_owner_or_cap(inode))
106 return -EROFS;
107
108 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
109 return -EACCES; 121 return -EACCES;
110 122
111 if (get_user(rsv_window_size, (int __user *)arg)) 123 if (get_user(rsv_window_size, (int __user *)arg))
112 return -EFAULT; 124 return -EFAULT;
113 125
126 ret = mnt_want_write(filp->f_path.mnt);
127 if (ret)
128 return ret;
129
114 if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) 130 if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS)
115 rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; 131 rsv_window_size = EXT2_MAX_RESERVE_BLOCKS;
116 132
@@ -131,6 +147,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
131 rsv->rsv_goal_size = rsv_window_size; 147 rsv->rsv_goal_size = rsv_window_size;
132 } 148 }
133 mutex_unlock(&ei->truncate_mutex); 149 mutex_unlock(&ei->truncate_mutex);
150 mnt_drop_write(filp->f_path.mnt);
134 return 0; 151 return 0;
135 } 152 }
136 default: 153 default:
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 4f4020c54683..96dd5573e49b 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -239,7 +239,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
239 * it has too few free inodes left (min_inodes) or 239 * it has too few free inodes left (min_inodes) or
240 * it has too few free blocks left (min_blocks) or 240 * it has too few free blocks left (min_blocks) or
241 * it's already running too large debt (max_debt). 241 * it's already running too large debt (max_debt).
242 * Parent's group is prefered, if it doesn't satisfy these 242 * Parent's group is preferred, if it doesn't satisfy these
243 * conditions we search cyclically through the rest. If none 243 * conditions we search cyclically through the rest. If none
244 * of the groups look good we just look for a group with more 244 * of the groups look good we just look for a group with more
245 * free inodes than average (starting at parent's group). 245 * free inodes than average (starting at parent's group).
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index eb95670a27eb..c683609b0e3a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -392,7 +392,7 @@ no_block:
392 * @inode: owner 392 * @inode: owner
393 * @ind: descriptor of indirect block. 393 * @ind: descriptor of indirect block.
394 * 394 *
395 * This function returns the prefered place for block allocation. 395 * This function returns the preferred place for block allocation.
396 * It is used when heuristic for sequential allocation fails. 396 * It is used when heuristic for sequential allocation fails.
397 * Rules are: 397 * Rules are:
398 * + if there is a block to the left of our position - allocate near it. 398 * + if there is a block to the left of our position - allocate near it.
@@ -436,12 +436,12 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
436} 436}
437 437
438/** 438/**
439 * ext3_find_goal - find a prefered place for allocation. 439 * ext3_find_goal - find a preferred place for allocation.
440 * @inode: owner 440 * @inode: owner
441 * @block: block we want 441 * @block: block we want
442 * @partial: pointer to the last triple within a chain 442 * @partial: pointer to the last triple within a chain
443 * 443 *
444 * Normally this function find the prefered place for block allocation, 444 * Normally this function find the preferred place for block allocation,
445 * returns it. 445 * returns it.
446 */ 446 */
447 447
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 023a070f55f1..0d0c70151642 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include <linux/ext3_jbd.h> 14#include <linux/ext3_jbd.h>
15#include <linux/mount.h>
15#include <linux/time.h> 16#include <linux/time.h>
16#include <linux/compat.h> 17#include <linux/compat.h>
17#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
@@ -38,14 +39,19 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
38 unsigned int oldflags; 39 unsigned int oldflags;
39 unsigned int jflag; 40 unsigned int jflag;
40 41
41 if (IS_RDONLY(inode)) 42 err = mnt_want_write(filp->f_path.mnt);
42 return -EROFS; 43 if (err)
44 return err;
43 45
44 if (!is_owner_or_cap(inode)) 46 if (!is_owner_or_cap(inode)) {
45 return -EACCES; 47 err = -EACCES;
48 goto flags_out;
49 }
46 50
47 if (get_user(flags, (int __user *) arg)) 51 if (get_user(flags, (int __user *) arg)) {
48 return -EFAULT; 52 err = -EFAULT;
53 goto flags_out;
54 }
49 55
50 if (!S_ISDIR(inode->i_mode)) 56 if (!S_ISDIR(inode->i_mode))
51 flags &= ~EXT3_DIRSYNC_FL; 57 flags &= ~EXT3_DIRSYNC_FL;
@@ -54,7 +60,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
54 /* Is it quota file? Do not allow user to mess with it */ 60 /* Is it quota file? Do not allow user to mess with it */
55 if (IS_NOQUOTA(inode)) { 61 if (IS_NOQUOTA(inode)) {
56 mutex_unlock(&inode->i_mutex); 62 mutex_unlock(&inode->i_mutex);
57 return -EPERM; 63 err = -EPERM;
64 goto flags_out;
58 } 65 }
59 oldflags = ei->i_flags; 66 oldflags = ei->i_flags;
60 67
@@ -70,7 +77,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
70 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 77 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
71 if (!capable(CAP_LINUX_IMMUTABLE)) { 78 if (!capable(CAP_LINUX_IMMUTABLE)) {
72 mutex_unlock(&inode->i_mutex); 79 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 80 err = -EPERM;
81 goto flags_out;
74 } 82 }
75 } 83 }
76 84
@@ -81,7 +89,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
81 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 89 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
82 if (!capable(CAP_SYS_RESOURCE)) { 90 if (!capable(CAP_SYS_RESOURCE)) {
83 mutex_unlock(&inode->i_mutex); 91 mutex_unlock(&inode->i_mutex);
84 return -EPERM; 92 err = -EPERM;
93 goto flags_out;
85 } 94 }
86 } 95 }
87 96
@@ -89,7 +98,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
89 handle = ext3_journal_start(inode, 1); 98 handle = ext3_journal_start(inode, 1);
90 if (IS_ERR(handle)) { 99 if (IS_ERR(handle)) {
91 mutex_unlock(&inode->i_mutex); 100 mutex_unlock(&inode->i_mutex);
92 return PTR_ERR(handle); 101 err = PTR_ERR(handle);
102 goto flags_out;
93 } 103 }
94 if (IS_SYNC(inode)) 104 if (IS_SYNC(inode))
95 handle->h_sync = 1; 105 handle->h_sync = 1;
@@ -115,6 +125,8 @@ flags_err:
115 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 125 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
116 err = ext3_change_inode_journal_flag(inode, jflag); 126 err = ext3_change_inode_journal_flag(inode, jflag);
117 mutex_unlock(&inode->i_mutex); 127 mutex_unlock(&inode->i_mutex);
128flags_out:
129 mnt_drop_write(filp->f_path.mnt);
118 return err; 130 return err;
119 } 131 }
120 case EXT3_IOC_GETVERSION: 132 case EXT3_IOC_GETVERSION:
@@ -129,14 +141,18 @@ flags_err:
129 141
130 if (!is_owner_or_cap(inode)) 142 if (!is_owner_or_cap(inode))
131 return -EPERM; 143 return -EPERM;
132 if (IS_RDONLY(inode)) 144 err = mnt_want_write(filp->f_path.mnt);
133 return -EROFS; 145 if (err)
134 if (get_user(generation, (int __user *) arg)) 146 return err;
135 return -EFAULT; 147 if (get_user(generation, (int __user *) arg)) {
136 148 err = -EFAULT;
149 goto setversion_out;
150 }
137 handle = ext3_journal_start(inode, 1); 151 handle = ext3_journal_start(inode, 1);
138 if (IS_ERR(handle)) 152 if (IS_ERR(handle)) {
139 return PTR_ERR(handle); 153 err = PTR_ERR(handle);
154 goto setversion_out;
155 }
140 err = ext3_reserve_inode_write(handle, inode, &iloc); 156 err = ext3_reserve_inode_write(handle, inode, &iloc);
141 if (err == 0) { 157 if (err == 0) {
142 inode->i_ctime = CURRENT_TIME_SEC; 158 inode->i_ctime = CURRENT_TIME_SEC;
@@ -144,6 +160,8 @@ flags_err:
144 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 160 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
145 } 161 }
146 ext3_journal_stop(handle); 162 ext3_journal_stop(handle);
163setversion_out:
164 mnt_drop_write(filp->f_path.mnt);
147 return err; 165 return err;
148 } 166 }
149#ifdef CONFIG_JBD_DEBUG 167#ifdef CONFIG_JBD_DEBUG
@@ -179,18 +197,24 @@ flags_err:
179 } 197 }
180 return -ENOTTY; 198 return -ENOTTY;
181 case EXT3_IOC_SETRSVSZ: { 199 case EXT3_IOC_SETRSVSZ: {
200 int err;
182 201
183 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 202 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
184 return -ENOTTY; 203 return -ENOTTY;
185 204
186 if (IS_RDONLY(inode)) 205 err = mnt_want_write(filp->f_path.mnt);
187 return -EROFS; 206 if (err)
207 return err;
188 208
189 if (!is_owner_or_cap(inode)) 209 if (!is_owner_or_cap(inode)) {
190 return -EACCES; 210 err = -EACCES;
211 goto setrsvsz_out;
212 }
191 213
192 if (get_user(rsv_window_size, (int __user *)arg)) 214 if (get_user(rsv_window_size, (int __user *)arg)) {
193 return -EFAULT; 215 err = -EFAULT;
216 goto setrsvsz_out;
217 }
194 218
195 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) 219 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
196 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; 220 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
@@ -208,7 +232,9 @@ flags_err:
208 rsv->rsv_goal_size = rsv_window_size; 232 rsv->rsv_goal_size = rsv_window_size;
209 } 233 }
210 mutex_unlock(&ei->truncate_mutex); 234 mutex_unlock(&ei->truncate_mutex);
211 return 0; 235setrsvsz_out:
236 mnt_drop_write(filp->f_path.mnt);
237 return err;
212 } 238 }
213 case EXT3_IOC_GROUP_EXTEND: { 239 case EXT3_IOC_GROUP_EXTEND: {
214 ext3_fsblk_t n_blocks_count; 240 ext3_fsblk_t n_blocks_count;
@@ -218,17 +244,20 @@ flags_err:
218 if (!capable(CAP_SYS_RESOURCE)) 244 if (!capable(CAP_SYS_RESOURCE))
219 return -EPERM; 245 return -EPERM;
220 246
221 if (IS_RDONLY(inode)) 247 err = mnt_want_write(filp->f_path.mnt);
222 return -EROFS; 248 if (err)
223 249 return err;
224 if (get_user(n_blocks_count, (__u32 __user *)arg))
225 return -EFAULT;
226 250
251 if (get_user(n_blocks_count, (__u32 __user *)arg)) {
252 err = -EFAULT;
253 goto group_extend_out;
254 }
227 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); 255 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
228 journal_lock_updates(EXT3_SB(sb)->s_journal); 256 journal_lock_updates(EXT3_SB(sb)->s_journal);
229 journal_flush(EXT3_SB(sb)->s_journal); 257 journal_flush(EXT3_SB(sb)->s_journal);
230 journal_unlock_updates(EXT3_SB(sb)->s_journal); 258 journal_unlock_updates(EXT3_SB(sb)->s_journal);
231 259group_extend_out:
260 mnt_drop_write(filp->f_path.mnt);
232 return err; 261 return err;
233 } 262 }
234 case EXT3_IOC_GROUP_ADD: { 263 case EXT3_IOC_GROUP_ADD: {
@@ -239,18 +268,22 @@ flags_err:
239 if (!capable(CAP_SYS_RESOURCE)) 268 if (!capable(CAP_SYS_RESOURCE))
240 return -EPERM; 269 return -EPERM;
241 270
242 if (IS_RDONLY(inode)) 271 err = mnt_want_write(filp->f_path.mnt);
243 return -EROFS; 272 if (err)
273 return err;
244 274
245 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, 275 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
246 sizeof(input))) 276 sizeof(input))) {
247 return -EFAULT; 277 err = -EFAULT;
278 goto group_add_out;
279 }
248 280
249 err = ext3_group_add(sb, &input); 281 err = ext3_group_add(sb, &input);
250 journal_lock_updates(EXT3_SB(sb)->s_journal); 282 journal_lock_updates(EXT3_SB(sb)->s_journal);
251 journal_flush(EXT3_SB(sb)->s_journal); 283 journal_flush(EXT3_SB(sb)->s_journal);
252 journal_unlock_updates(EXT3_SB(sb)->s_journal); 284 journal_unlock_updates(EXT3_SB(sb)->s_journal);
253 285group_add_out:
286 mnt_drop_write(filp->f_path.mnt);
254 return err; 287 return err;
255 } 288 }
256 289
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8036b9b5376b..486e46a3918d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -305,7 +305,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
305 * it has too few free inodes left (min_inodes) or 305 * it has too few free inodes left (min_inodes) or
306 * it has too few free blocks left (min_blocks) or 306 * it has too few free blocks left (min_blocks) or
307 * it's already running too large debt (max_debt). 307 * it's already running too large debt (max_debt).
308 * Parent's group is prefered, if it doesn't satisfy these 308 * Parent's group is preferred, if it doesn't satisfy these
309 * conditions we search cyclically through the rest. If none 309 * conditions we search cyclically through the rest. If none
310 * of the groups look good we just look for a group with more 310 * of the groups look good we just look for a group with more
311 * free inodes than average (starting at parent's group). 311 * free inodes than average (starting at parent's group).
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 945cbf6cb1fc..8fab233cb05f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -382,7 +382,7 @@ no_block:
382 * @inode: owner 382 * @inode: owner
383 * @ind: descriptor of indirect block. 383 * @ind: descriptor of indirect block.
384 * 384 *
385 * This function returns the prefered place for block allocation. 385 * This function returns the preferred place for block allocation.
386 * It is used when heuristic for sequential allocation fails. 386 * It is used when heuristic for sequential allocation fails.
387 * Rules are: 387 * Rules are:
388 * + if there is a block to the left of our position - allocate near it. 388 * + if there is a block to the left of our position - allocate near it.
@@ -432,12 +432,12 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
432} 432}
433 433
434/** 434/**
435 * ext4_find_goal - find a prefered place for allocation. 435 * ext4_find_goal - find a preferred place for allocation.
436 * @inode: owner 436 * @inode: owner
437 * @block: block we want 437 * @block: block we want
438 * @partial: pointer to the last triple within a chain 438 * @partial: pointer to the last triple within a chain
439 * 439 *
440 * Normally this function find the prefered place for block allocation, 440 * Normally this function find the preferred place for block allocation,
441 * returns it. 441 * returns it.
442 */ 442 */
443static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 443static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2ed7c37f897e..25b13ede8086 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -15,6 +15,7 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/smp_lock.h> 17#include <linux/smp_lock.h>
18#include <linux/mount.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19 20
20int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
@@ -38,24 +39,25 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
38 unsigned int oldflags; 39 unsigned int oldflags;
39 unsigned int jflag; 40 unsigned int jflag;
40 41
41 if (IS_RDONLY(inode))
42 return -EROFS;
43
44 if (!is_owner_or_cap(inode)) 42 if (!is_owner_or_cap(inode))
45 return -EACCES; 43 return -EACCES;
46 44
47 if (get_user(flags, (int __user *) arg)) 45 if (get_user(flags, (int __user *) arg))
48 return -EFAULT; 46 return -EFAULT;
49 47
48 err = mnt_want_write(filp->f_path.mnt);
49 if (err)
50 return err;
51
50 if (!S_ISDIR(inode->i_mode)) 52 if (!S_ISDIR(inode->i_mode))
51 flags &= ~EXT4_DIRSYNC_FL; 53 flags &= ~EXT4_DIRSYNC_FL;
52 54
55 err = -EPERM;
53 mutex_lock(&inode->i_mutex); 56 mutex_lock(&inode->i_mutex);
54 /* Is it quota file? Do not allow user to mess with it */ 57 /* Is it quota file? Do not allow user to mess with it */
55 if (IS_NOQUOTA(inode)) { 58 if (IS_NOQUOTA(inode))
56 mutex_unlock(&inode->i_mutex); 59 goto flags_out;
57 return -EPERM; 60
58 }
59 oldflags = ei->i_flags; 61 oldflags = ei->i_flags;
60 62
61 /* The JOURNAL_DATA flag is modifiable only by root */ 63 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -68,10 +70,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
68 * This test looks nicer. Thanks to Pauline Middelink 70 * This test looks nicer. Thanks to Pauline Middelink
69 */ 71 */
70 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { 72 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
71 if (!capable(CAP_LINUX_IMMUTABLE)) { 73 if (!capable(CAP_LINUX_IMMUTABLE))
72 mutex_unlock(&inode->i_mutex); 74 goto flags_out;
73 return -EPERM;
74 }
75 } 75 }
76 76
77 /* 77 /*
@@ -79,17 +79,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
79 * the relevant capability. 79 * the relevant capability.
80 */ 80 */
81 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { 81 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
82 if (!capable(CAP_SYS_RESOURCE)) { 82 if (!capable(CAP_SYS_RESOURCE))
83 mutex_unlock(&inode->i_mutex); 83 goto flags_out;
84 return -EPERM;
85 }
86 } 84 }
87 85
88
89 handle = ext4_journal_start(inode, 1); 86 handle = ext4_journal_start(inode, 1);
90 if (IS_ERR(handle)) { 87 if (IS_ERR(handle)) {
91 mutex_unlock(&inode->i_mutex); 88 err = PTR_ERR(handle);
92 return PTR_ERR(handle); 89 goto flags_out;
93 } 90 }
94 if (IS_SYNC(inode)) 91 if (IS_SYNC(inode))
95 handle->h_sync = 1; 92 handle->h_sync = 1;
@@ -107,14 +104,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
107 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 104 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
108flags_err: 105flags_err:
109 ext4_journal_stop(handle); 106 ext4_journal_stop(handle);
110 if (err) { 107 if (err)
111 mutex_unlock(&inode->i_mutex); 108 goto flags_out;
112 return err;
113 }
114 109
115 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) 110 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
116 err = ext4_change_inode_journal_flag(inode, jflag); 111 err = ext4_change_inode_journal_flag(inode, jflag);
112flags_out:
117 mutex_unlock(&inode->i_mutex); 113 mutex_unlock(&inode->i_mutex);
114 mnt_drop_write(filp->f_path.mnt);
118 return err; 115 return err;
119 } 116 }
120 case EXT4_IOC_GETVERSION: 117 case EXT4_IOC_GETVERSION:
@@ -129,14 +126,20 @@ flags_err:
129 126
130 if (!is_owner_or_cap(inode)) 127 if (!is_owner_or_cap(inode))
131 return -EPERM; 128 return -EPERM;
132 if (IS_RDONLY(inode)) 129
133 return -EROFS; 130 err = mnt_want_write(filp->f_path.mnt);
134 if (get_user(generation, (int __user *) arg)) 131 if (err)
135 return -EFAULT; 132 return err;
133 if (get_user(generation, (int __user *) arg)) {
134 err = -EFAULT;
135 goto setversion_out;
136 }
136 137
137 handle = ext4_journal_start(inode, 1); 138 handle = ext4_journal_start(inode, 1);
138 if (IS_ERR(handle)) 139 if (IS_ERR(handle)) {
139 return PTR_ERR(handle); 140 err = PTR_ERR(handle);
141 goto setversion_out;
142 }
140 err = ext4_reserve_inode_write(handle, inode, &iloc); 143 err = ext4_reserve_inode_write(handle, inode, &iloc);
141 if (err == 0) { 144 if (err == 0) {
142 inode->i_ctime = ext4_current_time(inode); 145 inode->i_ctime = ext4_current_time(inode);
@@ -144,6 +147,8 @@ flags_err:
144 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 147 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
145 } 148 }
146 ext4_journal_stop(handle); 149 ext4_journal_stop(handle);
150setversion_out:
151 mnt_drop_write(filp->f_path.mnt);
147 return err; 152 return err;
148 } 153 }
149#ifdef CONFIG_JBD2_DEBUG 154#ifdef CONFIG_JBD2_DEBUG
@@ -179,19 +184,21 @@ flags_err:
179 } 184 }
180 return -ENOTTY; 185 return -ENOTTY;
181 case EXT4_IOC_SETRSVSZ: { 186 case EXT4_IOC_SETRSVSZ: {
187 int err;
182 188
183 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 189 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
184 return -ENOTTY; 190 return -ENOTTY;
185 191
186 if (IS_RDONLY(inode))
187 return -EROFS;
188
189 if (!is_owner_or_cap(inode)) 192 if (!is_owner_or_cap(inode))
190 return -EACCES; 193 return -EACCES;
191 194
192 if (get_user(rsv_window_size, (int __user *)arg)) 195 if (get_user(rsv_window_size, (int __user *)arg))
193 return -EFAULT; 196 return -EFAULT;
194 197
198 err = mnt_want_write(filp->f_path.mnt);
199 if (err)
200 return err;
201
195 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) 202 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
196 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; 203 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
197 204
@@ -208,6 +215,7 @@ flags_err:
208 rsv->rsv_goal_size = rsv_window_size; 215 rsv->rsv_goal_size = rsv_window_size;
209 } 216 }
210 up_write(&ei->i_data_sem); 217 up_write(&ei->i_data_sem);
218 mnt_drop_write(filp->f_path.mnt);
211 return 0; 219 return 0;
212 } 220 }
213 case EXT4_IOC_GROUP_EXTEND: { 221 case EXT4_IOC_GROUP_EXTEND: {
@@ -218,16 +226,18 @@ flags_err:
218 if (!capable(CAP_SYS_RESOURCE)) 226 if (!capable(CAP_SYS_RESOURCE))
219 return -EPERM; 227 return -EPERM;
220 228
221 if (IS_RDONLY(inode))
222 return -EROFS;
223
224 if (get_user(n_blocks_count, (__u32 __user *)arg)) 229 if (get_user(n_blocks_count, (__u32 __user *)arg))
225 return -EFAULT; 230 return -EFAULT;
226 231
232 err = mnt_want_write(filp->f_path.mnt);
233 if (err)
234 return err;
235
227 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 236 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
228 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 237 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
229 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 238 jbd2_journal_flush(EXT4_SB(sb)->s_journal);
230 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 239 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
240 mnt_drop_write(filp->f_path.mnt);
231 241
232 return err; 242 return err;
233 } 243 }
@@ -239,17 +249,19 @@ flags_err:
239 if (!capable(CAP_SYS_RESOURCE)) 249 if (!capable(CAP_SYS_RESOURCE))
240 return -EPERM; 250 return -EPERM;
241 251
242 if (IS_RDONLY(inode))
243 return -EROFS;
244
245 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 252 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
246 sizeof(input))) 253 sizeof(input)))
247 return -EFAULT; 254 return -EFAULT;
248 255
256 err = mnt_want_write(filp->f_path.mnt);
257 if (err)
258 return err;
259
249 err = ext4_group_add(sb, &input); 260 err = ext4_group_add(sb, &input);
250 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 261 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
251 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 262 jbd2_journal_flush(EXT4_SB(sb)->s_journal);
252 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 263 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
264 mnt_drop_write(filp->f_path.mnt);
253 265
254 return err; 266 return err;
255 } 267 }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c614175876e0..2a3bed967041 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/capability.h> 9#include <linux/capability.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mount.h>
11#include <linux/time.h> 12#include <linux/time.h>
12#include <linux/msdos_fs.h> 13#include <linux/msdos_fs.h>
13#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
@@ -46,10 +47,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
46 47
47 mutex_lock(&inode->i_mutex); 48 mutex_lock(&inode->i_mutex);
48 49
49 if (IS_RDONLY(inode)) { 50 err = mnt_want_write(filp->f_path.mnt);
50 err = -EROFS; 51 if (err)
51 goto up; 52 goto up_no_drop_write;
52 }
53 53
54 /* 54 /*
55 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also 55 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -105,7 +105,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
105 105
106 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; 106 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
107 mark_inode_dirty(inode); 107 mark_inode_dirty(inode);
108 up: 108up:
109 mnt_drop_write(filp->f_path.mnt);
110up_no_drop_write:
109 mutex_unlock(&inode->i_mutex); 111 mutex_unlock(&inode->i_mutex);
110 return err; 112 return err;
111 } 113 }
diff --git a/fs/file_table.c b/fs/file_table.c
index 986ff4ed0a7c..7a0a9b872251 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head)
42static inline void file_free(struct file *f) 42static inline void file_free(struct file *f)
43{ 43{
44 percpu_counter_dec(&nr_files); 44 percpu_counter_dec(&nr_files);
45 file_check_state(f);
45 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); 46 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
46} 47}
47 48
@@ -199,6 +200,18 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
199 file->f_mapping = dentry->d_inode->i_mapping; 200 file->f_mapping = dentry->d_inode->i_mapping;
200 file->f_mode = mode; 201 file->f_mode = mode;
201 file->f_op = fop; 202 file->f_op = fop;
203
204 /*
205 * These mounts don't really matter in practice
206 * for r/o bind mounts. They aren't userspace-
207 * visible. We do this for consistency, and so
208 * that we can do debugging checks at __fput()
209 */
210 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
211 file_take_write(file);
212 error = mnt_want_write(mnt);
213 WARN_ON(error);
214 }
202 return error; 215 return error;
203} 216}
204EXPORT_SYMBOL(init_file); 217EXPORT_SYMBOL(init_file);
@@ -211,6 +224,31 @@ void fput(struct file *file)
211 224
212EXPORT_SYMBOL(fput); 225EXPORT_SYMBOL(fput);
213 226
227/**
228 * drop_file_write_access - give up ability to write to a file
229 * @file: the file to which we will stop writing
230 *
231 * This is a central place which will give up the ability
232 * to write to @file, along with access to write through
233 * its vfsmount.
234 */
235void drop_file_write_access(struct file *file)
236{
237 struct vfsmount *mnt = file->f_path.mnt;
238 struct dentry *dentry = file->f_path.dentry;
239 struct inode *inode = dentry->d_inode;
240
241 put_write_access(inode);
242
243 if (special_file(inode->i_mode))
244 return;
245 if (file_check_writeable(file) != 0)
246 return;
247 mnt_drop_write(mnt);
248 file_release_write(file);
249}
250EXPORT_SYMBOL_GPL(drop_file_write_access);
251
214/* __fput is called from task context when aio completion releases the last 252/* __fput is called from task context when aio completion releases the last
215 * last use of a struct file *. Do not use otherwise. 253 * last use of a struct file *. Do not use otherwise.
216 */ 254 */
@@ -236,10 +274,10 @@ void __fput(struct file *file)
236 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 274 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
237 cdev_put(inode->i_cdev); 275 cdev_put(inode->i_cdev);
238 fops_put(file->f_op); 276 fops_put(file->f_op);
239 if (file->f_mode & FMODE_WRITE)
240 put_write_access(inode);
241 put_pid(file->f_owner.pid); 277 put_pid(file->f_owner.pid);
242 file_kill(file); 278 file_kill(file);
279 if (file->f_mode & FMODE_WRITE)
280 drop_file_write_access(file);
243 file->f_path.dentry = NULL; 281 file->f_path.dentry = NULL;
244 file->f_path.mnt = NULL; 282 file->f_path.mnt = NULL;
245 file_free(file); 283 file_free(file);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index b60c0affbec5..f457d2ca51ab 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/mount.h>
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/xattr.h> 19#include <linux/xattr.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
@@ -35,25 +36,32 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
35 flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ 36 flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
36 return put_user(flags, (int __user *)arg); 37 return put_user(flags, (int __user *)arg);
37 case HFSPLUS_IOC_EXT2_SETFLAGS: { 38 case HFSPLUS_IOC_EXT2_SETFLAGS: {
38 if (IS_RDONLY(inode)) 39 int err = 0;
39 return -EROFS; 40 err = mnt_want_write(filp->f_path.mnt);
40 41 if (err)
41 if (!is_owner_or_cap(inode)) 42 return err;
42 return -EACCES; 43
43 44 if (!is_owner_or_cap(inode)) {
44 if (get_user(flags, (int __user *)arg)) 45 err = -EACCES;
45 return -EFAULT; 46 goto setflags_out;
46 47 }
48 if (get_user(flags, (int __user *)arg)) {
49 err = -EFAULT;
50 goto setflags_out;
51 }
47 if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || 52 if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
48 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { 53 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
49 if (!capable(CAP_LINUX_IMMUTABLE)) 54 if (!capable(CAP_LINUX_IMMUTABLE)) {
50 return -EPERM; 55 err = -EPERM;
56 goto setflags_out;
57 }
51 } 58 }
52 59
53 /* don't silently ignore unsupported ext2 flags */ 60 /* don't silently ignore unsupported ext2 flags */
54 if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) 61 if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
55 return -EOPNOTSUPP; 62 err = -EOPNOTSUPP;
56 63 goto setflags_out;
64 }
57 if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ 65 if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
58 inode->i_flags |= S_IMMUTABLE; 66 inode->i_flags |= S_IMMUTABLE;
59 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; 67 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
@@ -75,7 +83,9 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
75 83
76 inode->i_ctime = CURRENT_TIME_SEC; 84 inode->i_ctime = CURRENT_TIME_SEC;
77 mark_inode_dirty(inode); 85 mark_inode_dirty(inode);
78 return 0; 86setflags_out:
87 mnt_drop_write(filp->f_path.mnt);
88 return err;
79 } 89 }
80 default: 90 default:
81 return -ENOTTY; 91 return -ENOTTY;
diff --git a/fs/inode.c b/fs/inode.c
index 53245ffcf93d..27ee1af50d02 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1199,42 +1199,37 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1199 struct inode *inode = dentry->d_inode; 1199 struct inode *inode = dentry->d_inode;
1200 struct timespec now; 1200 struct timespec now;
1201 1201
1202 if (inode->i_flags & S_NOATIME) 1202 if (mnt_want_write(mnt))
1203 return; 1203 return;
1204 if (inode->i_flags & S_NOATIME)
1205 goto out;
1204 if (IS_NOATIME(inode)) 1206 if (IS_NOATIME(inode))
1205 return; 1207 goto out;
1206 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1208 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1207 return; 1209 goto out;
1208 1210
1209 /* 1211 if (mnt->mnt_flags & MNT_NOATIME)
1210 * We may have a NULL vfsmount when coming from NFSD 1212 goto out;
1211 */ 1213 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1212 if (mnt) { 1214 goto out;
1213 if (mnt->mnt_flags & MNT_NOATIME) 1215 if (mnt->mnt_flags & MNT_RELATIME) {
1214 return; 1216 /*
1215 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1217 * With relative atime, only update atime if the previous
1216 return; 1218 * atime is earlier than either the ctime or mtime.
1217 1219 */
1218 if (mnt->mnt_flags & MNT_RELATIME) { 1220 if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
1219 /* 1221 timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
1220 * With relative atime, only update atime if the 1222 goto out;
1221 * previous atime is earlier than either the ctime or
1222 * mtime.
1223 */
1224 if (timespec_compare(&inode->i_mtime,
1225 &inode->i_atime) < 0 &&
1226 timespec_compare(&inode->i_ctime,
1227 &inode->i_atime) < 0)
1228 return;
1229 }
1230 } 1223 }
1231 1224
1232 now = current_fs_time(inode->i_sb); 1225 now = current_fs_time(inode->i_sb);
1233 if (timespec_equal(&inode->i_atime, &now)) 1226 if (timespec_equal(&inode->i_atime, &now))
1234 return; 1227 goto out;
1235 1228
1236 inode->i_atime = now; 1229 inode->i_atime = now;
1237 mark_inode_dirty_sync(inode); 1230 mark_inode_dirty_sync(inode);
1231out:
1232 mnt_drop_write(mnt);
1238} 1233}
1239EXPORT_SYMBOL(touch_atime); 1234EXPORT_SYMBOL(touch_atime);
1240 1235
@@ -1255,10 +1250,13 @@ void file_update_time(struct file *file)
1255 struct inode *inode = file->f_path.dentry->d_inode; 1250 struct inode *inode = file->f_path.dentry->d_inode;
1256 struct timespec now; 1251 struct timespec now;
1257 int sync_it = 0; 1252 int sync_it = 0;
1253 int err;
1258 1254
1259 if (IS_NOCMTIME(inode)) 1255 if (IS_NOCMTIME(inode))
1260 return; 1256 return;
1261 if (IS_RDONLY(inode)) 1257
1258 err = mnt_want_write(file->f_path.mnt);
1259 if (err)
1262 return; 1260 return;
1263 1261
1264 now = current_fs_time(inode->i_sb); 1262 now = current_fs_time(inode->i_sb);
@@ -1279,6 +1277,7 @@ void file_update_time(struct file *file)
1279 1277
1280 if (sync_it) 1278 if (sync_it)
1281 mark_inode_dirty_sync(inode); 1279 mark_inode_dirty_sync(inode);
1280 mnt_drop_write(file->f_path.mnt);
1282} 1281}
1283 1282
1284EXPORT_SYMBOL(file_update_time); 1283EXPORT_SYMBOL(file_update_time);
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 0b78fdc9773b..a841f4973a74 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -15,7 +15,7 @@
15#include <linux/version.h> 15#include <linux/version.h>
16#include <linux/rbtree.h> 16#include <linux/rbtree.h>
17#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
18#include <asm/semaphore.h> 18#include <linux/semaphore.h>
19 19
20struct jffs2_inode_info { 20struct jffs2_inode_info {
21 /* We need an internal mutex similar to inode->i_mutex. 21 /* We need an internal mutex similar to inode->i_mutex.
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 3a2197f3c812..18fca2b9e531 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -16,7 +16,7 @@
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/completion.h> 18#include <linux/completion.h>
19#include <asm/semaphore.h> 19#include <linux/semaphore.h>
20#include <linux/timer.h> 20#include <linux/timer.h>
21#include <linux/wait.h> 21#include <linux/wait.h>
22#include <linux/list.h> 22#include <linux/list.h>
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index a1f8e375ad21..afe222bf300f 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -8,6 +8,7 @@
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ctype.h> 9#include <linux/ctype.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/mount.h>
11#include <linux/time.h> 12#include <linux/time.h>
12#include <linux/sched.h> 13#include <linux/sched.h>
13#include <asm/current.h> 14#include <asm/current.h>
@@ -65,23 +66,30 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
65 return put_user(flags, (int __user *) arg); 66 return put_user(flags, (int __user *) arg);
66 case JFS_IOC_SETFLAGS: { 67 case JFS_IOC_SETFLAGS: {
67 unsigned int oldflags; 68 unsigned int oldflags;
69 int err;
68 70
69 if (IS_RDONLY(inode)) 71 err = mnt_want_write(filp->f_path.mnt);
70 return -EROFS; 72 if (err)
73 return err;
71 74
72 if (!is_owner_or_cap(inode)) 75 if (!is_owner_or_cap(inode)) {
73 return -EACCES; 76 err = -EACCES;
74 77 goto setflags_out;
75 if (get_user(flags, (int __user *) arg)) 78 }
76 return -EFAULT; 79 if (get_user(flags, (int __user *) arg)) {
80 err = -EFAULT;
81 goto setflags_out;
82 }
77 83
78 flags = jfs_map_ext2(flags, 1); 84 flags = jfs_map_ext2(flags, 1);
79 if (!S_ISDIR(inode->i_mode)) 85 if (!S_ISDIR(inode->i_mode))
80 flags &= ~JFS_DIRSYNC_FL; 86 flags &= ~JFS_DIRSYNC_FL;
81 87
82 /* Is it quota file? Do not allow user to mess with it */ 88 /* Is it quota file? Do not allow user to mess with it */
83 if (IS_NOQUOTA(inode)) 89 if (IS_NOQUOTA(inode)) {
84 return -EPERM; 90 err = -EPERM;
91 goto setflags_out;
92 }
85 93
86 /* Lock against other parallel changes of flags */ 94 /* Lock against other parallel changes of flags */
87 mutex_lock(&inode->i_mutex); 95 mutex_lock(&inode->i_mutex);
@@ -98,7 +106,8 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
98 (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { 106 (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
99 if (!capable(CAP_LINUX_IMMUTABLE)) { 107 if (!capable(CAP_LINUX_IMMUTABLE)) {
100 mutex_unlock(&inode->i_mutex); 108 mutex_unlock(&inode->i_mutex);
101 return -EPERM; 109 err = -EPERM;
110 goto setflags_out;
102 } 111 }
103 } 112 }
104 113
@@ -110,7 +119,9 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
110 mutex_unlock(&inode->i_mutex); 119 mutex_unlock(&inode->i_mutex);
111 inode->i_ctime = CURRENT_TIME_SEC; 120 inode->i_ctime = CURRENT_TIME_SEC;
112 mark_inode_dirty(inode); 121 mark_inode_dirty(inode);
113 return 0; 122setflags_out:
123 mnt_drop_write(filp->f_path.mnt);
124 return err;
114 } 125 }
115 default: 126 default:
116 return -ENOTTY; 127 return -ENOTTY;
diff --git a/fs/locks.c b/fs/locks.c
index 43c0af21a0c5..592faadbcec1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -127,7 +127,6 @@
127#include <linux/rcupdate.h> 127#include <linux/rcupdate.h>
128#include <linux/pid_namespace.h> 128#include <linux/pid_namespace.h>
129 129
130#include <asm/semaphore.h>
131#include <asm/uaccess.h> 130#include <asm/uaccess.h>
132 131
133#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 132#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
diff --git a/fs/namei.c b/fs/namei.c
index 8cf9bb9c2fc0..e179f71bfcb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1623 return -EACCES; 1623 return -EACCES;
1624 1624
1625 flag &= ~O_TRUNC; 1625 flag &= ~O_TRUNC;
1626 } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) 1626 }
1627 return -EROFS;
1628 1627
1629 error = vfs_permission(nd, acc_mode); 1628 error = vfs_permission(nd, acc_mode);
1630 if (error) 1629 if (error)
@@ -1677,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1677 return 0; 1676 return 0;
1678} 1677}
1679 1678
1680static int open_namei_create(struct nameidata *nd, struct path *path, 1679/*
1680 * Be careful about ever adding any more callers of this
1681 * function. Its flags must be in the namei format, not
1682 * what get passed to sys_open().
1683 */
1684static int __open_namei_create(struct nameidata *nd, struct path *path,
1681 int flag, int mode) 1685 int flag, int mode)
1682{ 1686{
1683 int error; 1687 int error;
@@ -1696,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path,
1696} 1700}
1697 1701
1698/* 1702/*
1699 * open_namei() 1703 * Note that while the flag value (low two bits) for sys_open means:
1704 * 00 - read-only
1705 * 01 - write-only
1706 * 10 - read-write
1707 * 11 - special
1708 * it is changed into
1709 * 00 - no permissions needed
1710 * 01 - read-permission
1711 * 10 - write-permission
1712 * 11 - read-write
1713 * for the internal routines (ie open_namei()/follow_link() etc)
1714 * This is more logical, and also allows the 00 "no perm needed"
1715 * to be used for symlinks (where the permissions are checked
1716 * later).
1700 * 1717 *
1701 * namei for open - this is in fact almost the whole open-routine. 1718*/
1702 * 1719static inline int open_to_namei_flags(int flag)
1703 * Note that the low bits of "flag" aren't the same as in the open 1720{
1704 * system call - they are 00 - no permissions needed 1721 if ((flag+1) & O_ACCMODE)
1705 * 01 - read permission needed 1722 flag++;
1706 * 10 - write permission needed 1723 return flag;
1707 * 11 - read/write permissions needed 1724}
1708 * which is a lot more logical, and also allows the "no perm" needed 1725
1709 * for symlinks (where the permissions are checked later). 1726static int open_will_write_to_fs(int flag, struct inode *inode)
1710 * SMP-safe 1727{
1728 /*
1729 * We'll never write to the fs underlying
1730 * a device file.
1731 */
1732 if (special_file(inode->i_mode))
1733 return 0;
1734 return (flag & O_TRUNC);
1735}
1736
1737/*
1738 * Note that the low bits of the passed in "open_flag"
1739 * are not the same as in the local variable "flag". See
1740 * open_to_namei_flags() for more details.
1711 */ 1741 */
1712int open_namei(int dfd, const char *pathname, int flag, 1742struct file *do_filp_open(int dfd, const char *pathname,
1713 int mode, struct nameidata *nd) 1743 int open_flag, int mode)
1714{ 1744{
1745 struct file *filp;
1746 struct nameidata nd;
1715 int acc_mode, error; 1747 int acc_mode, error;
1716 struct path path; 1748 struct path path;
1717 struct dentry *dir; 1749 struct dentry *dir;
1718 int count = 0; 1750 int count = 0;
1751 int will_write;
1752 int flag = open_to_namei_flags(open_flag);
1719 1753
1720 acc_mode = ACC_MODE(flag); 1754 acc_mode = ACC_MODE(flag);
1721 1755
@@ -1733,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag,
1733 */ 1767 */
1734 if (!(flag & O_CREAT)) { 1768 if (!(flag & O_CREAT)) {
1735 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1769 error = path_lookup_open(dfd, pathname, lookup_flags(flag),
1736 nd, flag); 1770 &nd, flag);
1737 if (error) 1771 if (error)
1738 return error; 1772 return ERR_PTR(error);
1739 goto ok; 1773 goto ok;
1740 } 1774 }
1741 1775
1742 /* 1776 /*
1743 * Create - we need to know the parent. 1777 * Create - we need to know the parent.
1744 */ 1778 */
1745 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); 1779 error = path_lookup_create(dfd, pathname, LOOKUP_PARENT,
1780 &nd, flag, mode);
1746 if (error) 1781 if (error)
1747 return error; 1782 return ERR_PTR(error);
1748 1783
1749 /* 1784 /*
1750 * We have the parent and last component. First of all, check 1785 * We have the parent and last component. First of all, check
@@ -1752,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag,
1752 * will not do. 1787 * will not do.
1753 */ 1788 */
1754 error = -EISDIR; 1789 error = -EISDIR;
1755 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1790 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
1756 goto exit; 1791 goto exit;
1757 1792
1758 dir = nd->path.dentry; 1793 dir = nd.path.dentry;
1759 nd->flags &= ~LOOKUP_PARENT; 1794 nd.flags &= ~LOOKUP_PARENT;
1760 mutex_lock(&dir->d_inode->i_mutex); 1795 mutex_lock(&dir->d_inode->i_mutex);
1761 path.dentry = lookup_hash(nd); 1796 path.dentry = lookup_hash(&nd);
1762 path.mnt = nd->path.mnt; 1797 path.mnt = nd.path.mnt;
1763 1798
1764do_last: 1799do_last:
1765 error = PTR_ERR(path.dentry); 1800 error = PTR_ERR(path.dentry);
@@ -1768,18 +1803,31 @@ do_last:
1768 goto exit; 1803 goto exit;
1769 } 1804 }
1770 1805
1771 if (IS_ERR(nd->intent.open.file)) { 1806 if (IS_ERR(nd.intent.open.file)) {
1772 mutex_unlock(&dir->d_inode->i_mutex); 1807 error = PTR_ERR(nd.intent.open.file);
1773 error = PTR_ERR(nd->intent.open.file); 1808 goto exit_mutex_unlock;
1774 goto exit_dput;
1775 } 1809 }
1776 1810
1777 /* Negative dentry, just create the file */ 1811 /* Negative dentry, just create the file */
1778 if (!path.dentry->d_inode) { 1812 if (!path.dentry->d_inode) {
1779 error = open_namei_create(nd, &path, flag, mode); 1813 /*
1814 * This write is needed to ensure that a
1815 * ro->rw transition does not occur between
1816 * the time when the file is created and when
1817 * a permanent write count is taken through
1818 * the 'struct file' in nameidata_to_filp().
1819 */
1820 error = mnt_want_write(nd.path.mnt);
1780 if (error) 1821 if (error)
1822 goto exit_mutex_unlock;
1823 error = __open_namei_create(&nd, &path, flag, mode);
1824 if (error) {
1825 mnt_drop_write(nd.path.mnt);
1781 goto exit; 1826 goto exit;
1782 return 0; 1827 }
1828 filp = nameidata_to_filp(&nd, open_flag);
1829 mnt_drop_write(nd.path.mnt);
1830 return filp;
1783 } 1831 }
1784 1832
1785 /* 1833 /*
@@ -1804,23 +1852,52 @@ do_last:
1804 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1852 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
1805 goto do_link; 1853 goto do_link;
1806 1854
1807 path_to_nameidata(&path, nd); 1855 path_to_nameidata(&path, &nd);
1808 error = -EISDIR; 1856 error = -EISDIR;
1809 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1857 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
1810 goto exit; 1858 goto exit;
1811ok: 1859ok:
1812 error = may_open(nd, acc_mode, flag); 1860 /*
1813 if (error) 1861 * Consider:
1862 * 1. may_open() truncates a file
1863 * 2. a rw->ro mount transition occurs
1864 * 3. nameidata_to_filp() fails due to
1865 * the ro mount.
1866 * That would be inconsistent, and should
1867 * be avoided. Taking this mnt write here
1868 * ensures that (2) can not occur.
1869 */
1870 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
1871 if (will_write) {
1872 error = mnt_want_write(nd.path.mnt);
1873 if (error)
1874 goto exit;
1875 }
1876 error = may_open(&nd, acc_mode, flag);
1877 if (error) {
1878 if (will_write)
1879 mnt_drop_write(nd.path.mnt);
1814 goto exit; 1880 goto exit;
1815 return 0; 1881 }
1882 filp = nameidata_to_filp(&nd, open_flag);
1883 /*
1884 * It is now safe to drop the mnt write
1885 * because the filp has had a write taken
1886 * on its behalf.
1887 */
1888 if (will_write)
1889 mnt_drop_write(nd.path.mnt);
1890 return filp;
1816 1891
1892exit_mutex_unlock:
1893 mutex_unlock(&dir->d_inode->i_mutex);
1817exit_dput: 1894exit_dput:
1818 path_put_conditional(&path, nd); 1895 path_put_conditional(&path, &nd);
1819exit: 1896exit:
1820 if (!IS_ERR(nd->intent.open.file)) 1897 if (!IS_ERR(nd.intent.open.file))
1821 release_open_intent(nd); 1898 release_open_intent(&nd);
1822 path_put(&nd->path); 1899 path_put(&nd.path);
1823 return error; 1900 return ERR_PTR(error);
1824 1901
1825do_link: 1902do_link:
1826 error = -ELOOP; 1903 error = -ELOOP;
@@ -1836,43 +1913,60 @@ do_link:
1836 * stored in nd->last.name and we will have to putname() it when we 1913 * stored in nd->last.name and we will have to putname() it when we
1837 * are done. Procfs-like symlinks just set LAST_BIND. 1914 * are done. Procfs-like symlinks just set LAST_BIND.
1838 */ 1915 */
1839 nd->flags |= LOOKUP_PARENT; 1916 nd.flags |= LOOKUP_PARENT;
1840 error = security_inode_follow_link(path.dentry, nd); 1917 error = security_inode_follow_link(path.dentry, &nd);
1841 if (error) 1918 if (error)
1842 goto exit_dput; 1919 goto exit_dput;
1843 error = __do_follow_link(&path, nd); 1920 error = __do_follow_link(&path, &nd);
1844 if (error) { 1921 if (error) {
1845 /* Does someone understand code flow here? Or it is only 1922 /* Does someone understand code flow here? Or it is only
1846 * me so stupid? Anathema to whoever designed this non-sense 1923 * me so stupid? Anathema to whoever designed this non-sense
1847 * with "intent.open". 1924 * with "intent.open".
1848 */ 1925 */
1849 release_open_intent(nd); 1926 release_open_intent(&nd);
1850 return error; 1927 return ERR_PTR(error);
1851 } 1928 }
1852 nd->flags &= ~LOOKUP_PARENT; 1929 nd.flags &= ~LOOKUP_PARENT;
1853 if (nd->last_type == LAST_BIND) 1930 if (nd.last_type == LAST_BIND)
1854 goto ok; 1931 goto ok;
1855 error = -EISDIR; 1932 error = -EISDIR;
1856 if (nd->last_type != LAST_NORM) 1933 if (nd.last_type != LAST_NORM)
1857 goto exit; 1934 goto exit;
1858 if (nd->last.name[nd->last.len]) { 1935 if (nd.last.name[nd.last.len]) {
1859 __putname(nd->last.name); 1936 __putname(nd.last.name);
1860 goto exit; 1937 goto exit;
1861 } 1938 }
1862 error = -ELOOP; 1939 error = -ELOOP;
1863 if (count++==32) { 1940 if (count++==32) {
1864 __putname(nd->last.name); 1941 __putname(nd.last.name);
1865 goto exit; 1942 goto exit;
1866 } 1943 }
1867 dir = nd->path.dentry; 1944 dir = nd.path.dentry;
1868 mutex_lock(&dir->d_inode->i_mutex); 1945 mutex_lock(&dir->d_inode->i_mutex);
1869 path.dentry = lookup_hash(nd); 1946 path.dentry = lookup_hash(&nd);
1870 path.mnt = nd->path.mnt; 1947 path.mnt = nd.path.mnt;
1871 __putname(nd->last.name); 1948 __putname(nd.last.name);
1872 goto do_last; 1949 goto do_last;
1873} 1950}
1874 1951
1875/** 1952/**
1953 * filp_open - open file and return file pointer
1954 *
1955 * @filename: path to open
1956 * @flags: open flags as per the open(2) second argument
1957 * @mode: mode for the new file if O_CREAT is set, else ignored
1958 *
1959 * This is the helper to open a file from kernelspace if you really
1960 * have to. But in generally you should not do this, so please move
1961 * along, nothing to see here..
1962 */
1963struct file *filp_open(const char *filename, int flags, int mode)
1964{
1965 return do_filp_open(AT_FDCWD, filename, flags, mode);
1966}
1967EXPORT_SYMBOL(filp_open);
1968
1969/**
1876 * lookup_create - lookup a dentry, creating it if it doesn't exist 1970 * lookup_create - lookup a dentry, creating it if it doesn't exist
1877 * @nd: nameidata info 1971 * @nd: nameidata info
1878 * @is_dir: directory flag 1972 * @is_dir: directory flag
@@ -1945,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1945 return error; 2039 return error;
1946} 2040}
1947 2041
2042static int may_mknod(mode_t mode)
2043{
2044 switch (mode & S_IFMT) {
2045 case S_IFREG:
2046 case S_IFCHR:
2047 case S_IFBLK:
2048 case S_IFIFO:
2049 case S_IFSOCK:
2050 case 0: /* zero mode translates to S_IFREG */
2051 return 0;
2052 case S_IFDIR:
2053 return -EPERM;
2054 default:
2055 return -EINVAL;
2056 }
2057}
2058
1948asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 2059asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1949 unsigned dev) 2060 unsigned dev)
1950{ 2061{
@@ -1963,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1963 if (error) 2074 if (error)
1964 goto out; 2075 goto out;
1965 dentry = lookup_create(&nd, 0); 2076 dentry = lookup_create(&nd, 0);
1966 error = PTR_ERR(dentry); 2077 if (IS_ERR(dentry)) {
1967 2078 error = PTR_ERR(dentry);
2079 goto out_unlock;
2080 }
1968 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2081 if (!IS_POSIXACL(nd.path.dentry->d_inode))
1969 mode &= ~current->fs->umask; 2082 mode &= ~current->fs->umask;
1970 if (!IS_ERR(dentry)) { 2083 error = may_mknod(mode);
1971 switch (mode & S_IFMT) { 2084 if (error)
2085 goto out_dput;
2086 error = mnt_want_write(nd.path.mnt);
2087 if (error)
2088 goto out_dput;
2089 switch (mode & S_IFMT) {
1972 case 0: case S_IFREG: 2090 case 0: case S_IFREG:
1973 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2091 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
1974 break; 2092 break;
@@ -1979,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1979 case S_IFIFO: case S_IFSOCK: 2097 case S_IFIFO: case S_IFSOCK:
1980 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2098 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
1981 break; 2099 break;
1982 case S_IFDIR:
1983 error = -EPERM;
1984 break;
1985 default:
1986 error = -EINVAL;
1987 }
1988 dput(dentry);
1989 } 2100 }
2101 mnt_drop_write(nd.path.mnt);
2102out_dput:
2103 dput(dentry);
2104out_unlock:
1990 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2105 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1991 path_put(&nd.path); 2106 path_put(&nd.path);
1992out: 2107out:
@@ -2044,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
2044 2159
2045 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2160 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2046 mode &= ~current->fs->umask; 2161 mode &= ~current->fs->umask;
2162 error = mnt_want_write(nd.path.mnt);
2163 if (error)
2164 goto out_dput;
2047 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2165 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
2166 mnt_drop_write(nd.path.mnt);
2167out_dput:
2048 dput(dentry); 2168 dput(dentry);
2049out_unlock: 2169out_unlock:
2050 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2170 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2151,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname)
2151 error = PTR_ERR(dentry); 2271 error = PTR_ERR(dentry);
2152 if (IS_ERR(dentry)) 2272 if (IS_ERR(dentry))
2153 goto exit2; 2273 goto exit2;
2274 error = mnt_want_write(nd.path.mnt);
2275 if (error)
2276 goto exit3;
2154 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2277 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
2278 mnt_drop_write(nd.path.mnt);
2279exit3:
2155 dput(dentry); 2280 dput(dentry);
2156exit2: 2281exit2:
2157 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2282 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2232,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2232 inode = dentry->d_inode; 2357 inode = dentry->d_inode;
2233 if (inode) 2358 if (inode)
2234 atomic_inc(&inode->i_count); 2359 atomic_inc(&inode->i_count);
2360 error = mnt_want_write(nd.path.mnt);
2361 if (error)
2362 goto exit2;
2235 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2363 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
2364 mnt_drop_write(nd.path.mnt);
2236 exit2: 2365 exit2:
2237 dput(dentry); 2366 dput(dentry);
2238 } 2367 }
@@ -2313,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
2313 if (IS_ERR(dentry)) 2442 if (IS_ERR(dentry))
2314 goto out_unlock; 2443 goto out_unlock;
2315 2444
2445 error = mnt_want_write(nd.path.mnt);
2446 if (error)
2447 goto out_dput;
2316 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); 2448 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
2449 mnt_drop_write(nd.path.mnt);
2450out_dput:
2317 dput(dentry); 2451 dput(dentry);
2318out_unlock: 2452out_unlock:
2319 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2453 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2408,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2408 error = PTR_ERR(new_dentry); 2542 error = PTR_ERR(new_dentry);
2409 if (IS_ERR(new_dentry)) 2543 if (IS_ERR(new_dentry))
2410 goto out_unlock; 2544 goto out_unlock;
2545 error = mnt_want_write(nd.path.mnt);
2546 if (error)
2547 goto out_dput;
2411 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); 2548 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
2549 mnt_drop_write(nd.path.mnt);
2550out_dput:
2412 dput(new_dentry); 2551 dput(new_dentry);
2413out_unlock: 2552out_unlock:
2414 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2553 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2634,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname,
2634 if (new_dentry == trap) 2773 if (new_dentry == trap)
2635 goto exit5; 2774 goto exit5;
2636 2775
2776 error = mnt_want_write(oldnd.path.mnt);
2777 if (error)
2778 goto exit5;
2637 error = vfs_rename(old_dir->d_inode, old_dentry, 2779 error = vfs_rename(old_dir->d_inode, old_dentry,
2638 new_dir->d_inode, new_dentry); 2780 new_dir->d_inode, new_dentry);
2781 mnt_drop_write(oldnd.path.mnt);
2639exit5: 2782exit5:
2640 dput(new_dentry); 2783 dput(new_dentry);
2641exit4: 2784exit4:
diff --git a/fs/namespace.c b/fs/namespace.c
index 94f026ec990a..678f7ce060f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -17,6 +17,7 @@
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/acct.h> 18#include <linux/acct.h>
19#include <linux/capability.h> 19#include <linux/capability.h>
20#include <linux/cpumask.h>
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/sysfs.h> 22#include <linux/sysfs.h>
22#include <linux/seq_file.h> 23#include <linux/seq_file.h>
@@ -55,6 +56,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
55 return tmp & (HASH_SIZE - 1); 56 return tmp & (HASH_SIZE - 1);
56} 57}
57 58
59#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
60
58struct vfsmount *alloc_vfsmnt(const char *name) 61struct vfsmount *alloc_vfsmnt(const char *name)
59{ 62{
60 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 63 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -68,6 +71,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
68 INIT_LIST_HEAD(&mnt->mnt_share); 71 INIT_LIST_HEAD(&mnt->mnt_share);
69 INIT_LIST_HEAD(&mnt->mnt_slave_list); 72 INIT_LIST_HEAD(&mnt->mnt_slave_list);
70 INIT_LIST_HEAD(&mnt->mnt_slave); 73 INIT_LIST_HEAD(&mnt->mnt_slave);
74 atomic_set(&mnt->__mnt_writers, 0);
71 if (name) { 75 if (name) {
72 int size = strlen(name) + 1; 76 int size = strlen(name) + 1;
73 char *newname = kmalloc(size, GFP_KERNEL); 77 char *newname = kmalloc(size, GFP_KERNEL);
@@ -80,6 +84,263 @@ struct vfsmount *alloc_vfsmnt(const char *name)
80 return mnt; 84 return mnt;
81} 85}
82 86
87/*
88 * Most r/o checks on a fs are for operations that take
89 * discrete amounts of time, like a write() or unlink().
90 * We must keep track of when those operations start
91 * (for permission checks) and when they end, so that
92 * we can determine when writes are able to occur to
93 * a filesystem.
94 */
95/*
96 * __mnt_is_readonly: check whether a mount is read-only
97 * @mnt: the mount to check for its write status
98 *
99 * This shouldn't be used directly ouside of the VFS.
100 * It does not guarantee that the filesystem will stay
101 * r/w, just that it is right *now*. This can not and
102 * should not be used in place of IS_RDONLY(inode).
103 * mnt_want/drop_write() will _keep_ the filesystem
104 * r/w.
105 */
106int __mnt_is_readonly(struct vfsmount *mnt)
107{
108 if (mnt->mnt_flags & MNT_READONLY)
109 return 1;
110 if (mnt->mnt_sb->s_flags & MS_RDONLY)
111 return 1;
112 return 0;
113}
114EXPORT_SYMBOL_GPL(__mnt_is_readonly);
115
116struct mnt_writer {
117 /*
118 * If holding multiple instances of this lock, they
119 * must be ordered by cpu number.
120 */
121 spinlock_t lock;
122 struct lock_class_key lock_class; /* compiles out with !lockdep */
123 unsigned long count;
124 struct vfsmount *mnt;
125} ____cacheline_aligned_in_smp;
126static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
127
128static int __init init_mnt_writers(void)
129{
130 int cpu;
131 for_each_possible_cpu(cpu) {
132 struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
133 spin_lock_init(&writer->lock);
134 lockdep_set_class(&writer->lock, &writer->lock_class);
135 writer->count = 0;
136 }
137 return 0;
138}
139fs_initcall(init_mnt_writers);
140
141static void unlock_mnt_writers(void)
142{
143 int cpu;
144 struct mnt_writer *cpu_writer;
145
146 for_each_possible_cpu(cpu) {
147 cpu_writer = &per_cpu(mnt_writers, cpu);
148 spin_unlock(&cpu_writer->lock);
149 }
150}
151
152static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
153{
154 if (!cpu_writer->mnt)
155 return;
156 /*
157 * This is in case anyone ever leaves an invalid,
158 * old ->mnt and a count of 0.
159 */
160 if (!cpu_writer->count)
161 return;
162 atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
163 cpu_writer->count = 0;
164}
165 /*
166 * must hold cpu_writer->lock
167 */
168static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
169 struct vfsmount *mnt)
170{
171 if (cpu_writer->mnt == mnt)
172 return;
173 __clear_mnt_count(cpu_writer);
174 cpu_writer->mnt = mnt;
175}
176
177/*
178 * Most r/o checks on a fs are for operations that take
179 * discrete amounts of time, like a write() or unlink().
180 * We must keep track of when those operations start
181 * (for permission checks) and when they end, so that
182 * we can determine when writes are able to occur to
183 * a filesystem.
184 */
185/**
186 * mnt_want_write - get write access to a mount
187 * @mnt: the mount on which to take a write
188 *
189 * This tells the low-level filesystem that a write is
190 * about to be performed to it, and makes sure that
191 * writes are allowed before returning success. When
192 * the write operation is finished, mnt_drop_write()
193 * must be called. This is effectively a refcount.
194 */
195int mnt_want_write(struct vfsmount *mnt)
196{
197 int ret = 0;
198 struct mnt_writer *cpu_writer;
199
200 cpu_writer = &get_cpu_var(mnt_writers);
201 spin_lock(&cpu_writer->lock);
202 if (__mnt_is_readonly(mnt)) {
203 ret = -EROFS;
204 goto out;
205 }
206 use_cpu_writer_for_mount(cpu_writer, mnt);
207 cpu_writer->count++;
208out:
209 spin_unlock(&cpu_writer->lock);
210 put_cpu_var(mnt_writers);
211 return ret;
212}
213EXPORT_SYMBOL_GPL(mnt_want_write);
214
215static void lock_mnt_writers(void)
216{
217 int cpu;
218 struct mnt_writer *cpu_writer;
219
220 for_each_possible_cpu(cpu) {
221 cpu_writer = &per_cpu(mnt_writers, cpu);
222 spin_lock(&cpu_writer->lock);
223 __clear_mnt_count(cpu_writer);
224 cpu_writer->mnt = NULL;
225 }
226}
227
228/*
229 * These per-cpu write counts are not guaranteed to have
230 * matched increments and decrements on any given cpu.
231 * A file open()ed for write on one cpu and close()d on
232 * another cpu will imbalance this count. Make sure it
233 * does not get too far out of whack.
234 */
235static void handle_write_count_underflow(struct vfsmount *mnt)
236{
237 if (atomic_read(&mnt->__mnt_writers) >=
238 MNT_WRITER_UNDERFLOW_LIMIT)
239 return;
240 /*
241 * It isn't necessary to hold all of the locks
242 * at the same time, but doing it this way makes
243 * us share a lot more code.
244 */
245 lock_mnt_writers();
246 /*
247 * vfsmount_lock is for mnt_flags.
248 */
249 spin_lock(&vfsmount_lock);
250 /*
251 * If coalescing the per-cpu writer counts did not
252 * get us back to a positive writer count, we have
253 * a bug.
254 */
255 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
256 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
257 printk(KERN_DEBUG "leak detected on mount(%p) writers "
258 "count: %d\n",
259 mnt, atomic_read(&mnt->__mnt_writers));
260 WARN_ON(1);
261 /* use the flag to keep the dmesg spam down */
262 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
263 }
264 spin_unlock(&vfsmount_lock);
265 unlock_mnt_writers();
266}
267
268/**
269 * mnt_drop_write - give up write access to a mount
270 * @mnt: the mount on which to give up write access
271 *
272 * Tells the low-level filesystem that we are done
273 * performing writes to it. Must be matched with
274 * mnt_want_write() call above.
275 */
276void mnt_drop_write(struct vfsmount *mnt)
277{
278 int must_check_underflow = 0;
279 struct mnt_writer *cpu_writer;
280
281 cpu_writer = &get_cpu_var(mnt_writers);
282 spin_lock(&cpu_writer->lock);
283
284 use_cpu_writer_for_mount(cpu_writer, mnt);
285 if (cpu_writer->count > 0) {
286 cpu_writer->count--;
287 } else {
288 must_check_underflow = 1;
289 atomic_dec(&mnt->__mnt_writers);
290 }
291
292 spin_unlock(&cpu_writer->lock);
293 /*
294 * Logically, we could call this each time,
295 * but the __mnt_writers cacheline tends to
296 * be cold, and makes this expensive.
297 */
298 if (must_check_underflow)
299 handle_write_count_underflow(mnt);
300 /*
301 * This could be done right after the spinlock
302 * is taken because the spinlock keeps us on
303 * the cpu, and disables preemption. However,
304 * putting it here bounds the amount that
305 * __mnt_writers can underflow. Without it,
306 * we could theoretically wrap __mnt_writers.
307 */
308 put_cpu_var(mnt_writers);
309}
310EXPORT_SYMBOL_GPL(mnt_drop_write);
311
312static int mnt_make_readonly(struct vfsmount *mnt)
313{
314 int ret = 0;
315
316 lock_mnt_writers();
317 /*
318 * With all the locks held, this value is stable
319 */
320 if (atomic_read(&mnt->__mnt_writers) > 0) {
321 ret = -EBUSY;
322 goto out;
323 }
324 /*
325 * nobody can do a successful mnt_want_write() with all
326 * of the counts in MNT_DENIED_WRITE and the locks held.
327 */
328 spin_lock(&vfsmount_lock);
329 if (!ret)
330 mnt->mnt_flags |= MNT_READONLY;
331 spin_unlock(&vfsmount_lock);
332out:
333 unlock_mnt_writers();
334 return ret;
335}
336
337static void __mnt_unmake_readonly(struct vfsmount *mnt)
338{
339 spin_lock(&vfsmount_lock);
340 mnt->mnt_flags &= ~MNT_READONLY;
341 spin_unlock(&vfsmount_lock);
342}
343
83int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 344int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
84{ 345{
85 mnt->mnt_sb = sb; 346 mnt->mnt_sb = sb;
@@ -271,7 +532,36 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
271 532
272static inline void __mntput(struct vfsmount *mnt) 533static inline void __mntput(struct vfsmount *mnt)
273{ 534{
535 int cpu;
274 struct super_block *sb = mnt->mnt_sb; 536 struct super_block *sb = mnt->mnt_sb;
537 /*
538 * We don't have to hold all of the locks at the
539 * same time here because we know that we're the
540 * last reference to mnt and that no new writers
541 * can come in.
542 */
543 for_each_possible_cpu(cpu) {
544 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
545 if (cpu_writer->mnt != mnt)
546 continue;
547 spin_lock(&cpu_writer->lock);
548 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
549 cpu_writer->count = 0;
550 /*
551 * Might as well do this so that no one
552 * ever sees the pointer and expects
553 * it to be valid.
554 */
555 cpu_writer->mnt = NULL;
556 spin_unlock(&cpu_writer->lock);
557 }
558 /*
559 * This probably indicates that somebody messed
560 * up a mnt_want/drop_write() pair. If this
561 * happens, the filesystem was probably unable
562 * to make r/w->r/o transitions.
563 */
564 WARN_ON(atomic_read(&mnt->__mnt_writers));
275 dput(mnt->mnt_root); 565 dput(mnt->mnt_root);
276 free_vfsmnt(mnt); 566 free_vfsmnt(mnt);
277 deactivate_super(sb); 567 deactivate_super(sb);
@@ -417,7 +707,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
417 seq_putc(m, '.'); 707 seq_putc(m, '.');
418 mangle(m, mnt->mnt_sb->s_subtype); 708 mangle(m, mnt->mnt_sb->s_subtype);
419 } 709 }
420 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); 710 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
421 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 711 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
422 if (mnt->mnt_sb->s_flags & fs_infop->flag) 712 if (mnt->mnt_sb->s_flags & fs_infop->flag)
423 seq_puts(m, fs_infop->str); 713 seq_puts(m, fs_infop->str);
@@ -1019,6 +1309,23 @@ out:
1019 return err; 1309 return err;
1020} 1310}
1021 1311
1312static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1313{
1314 int error = 0;
1315 int readonly_request = 0;
1316
1317 if (ms_flags & MS_RDONLY)
1318 readonly_request = 1;
1319 if (readonly_request == __mnt_is_readonly(mnt))
1320 return 0;
1321
1322 if (readonly_request)
1323 error = mnt_make_readonly(mnt);
1324 else
1325 __mnt_unmake_readonly(mnt);
1326 return error;
1327}
1328
1022/* 1329/*
1023 * change filesystem flags. dir should be a physical root of filesystem. 1330 * change filesystem flags. dir should be a physical root of filesystem.
1024 * If you've mounted a non-root directory somewhere and want to do remount 1331 * If you've mounted a non-root directory somewhere and want to do remount
@@ -1041,7 +1348,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
1041 return -EINVAL; 1348 return -EINVAL;
1042 1349
1043 down_write(&sb->s_umount); 1350 down_write(&sb->s_umount);
1044 err = do_remount_sb(sb, flags, data, 0); 1351 if (flags & MS_BIND)
1352 err = change_mount_flags(nd->path.mnt, flags);
1353 else
1354 err = do_remount_sb(sb, flags, data, 0);
1045 if (!err) 1355 if (!err)
1046 nd->path.mnt->mnt_flags = mnt_flags; 1356 nd->path.mnt->mnt_flags = mnt_flags;
1047 up_write(&sb->s_umount); 1357 up_write(&sb->s_umount);
@@ -1425,6 +1735,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1425 mnt_flags |= MNT_NODIRATIME; 1735 mnt_flags |= MNT_NODIRATIME;
1426 if (flags & MS_RELATIME) 1736 if (flags & MS_RELATIME)
1427 mnt_flags |= MNT_RELATIME; 1737 mnt_flags |= MNT_RELATIME;
1738 if (flags & MS_RDONLY)
1739 mnt_flags |= MNT_READONLY;
1428 1740
1429 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1741 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1430 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); 1742 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c67b4bdcf719..ad8f167e54bc 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/ioctl.h> 14#include <linux/ioctl.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/mount.h>
17#include <linux/highuid.h> 18#include <linux/highuid.h>
18#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
19#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
@@ -261,7 +262,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
261} 262}
262#endif /* CONFIG_NCPFS_NLS */ 263#endif /* CONFIG_NCPFS_NLS */
263 264
264int ncp_ioctl(struct inode *inode, struct file *filp, 265static int __ncp_ioctl(struct inode *inode, struct file *filp,
265 unsigned int cmd, unsigned long arg) 266 unsigned int cmd, unsigned long arg)
266{ 267{
267 struct ncp_server *server = NCP_SERVER(inode); 268 struct ncp_server *server = NCP_SERVER(inode);
@@ -822,6 +823,57 @@ outrel:
822 return -EINVAL; 823 return -EINVAL;
823} 824}
824 825
826static int ncp_ioctl_need_write(unsigned int cmd)
827{
828 switch (cmd) {
829 case NCP_IOC_GET_FS_INFO:
830 case NCP_IOC_GET_FS_INFO_V2:
831 case NCP_IOC_NCPREQUEST:
832 case NCP_IOC_SETDENTRYTTL:
833 case NCP_IOC_SIGN_INIT:
834 case NCP_IOC_LOCKUNLOCK:
835 case NCP_IOC_SET_SIGN_WANTED:
836 return 1;
837 case NCP_IOC_GETOBJECTNAME:
838 case NCP_IOC_SETOBJECTNAME:
839 case NCP_IOC_GETPRIVATEDATA:
840 case NCP_IOC_SETPRIVATEDATA:
841 case NCP_IOC_SETCHARSETS:
842 case NCP_IOC_GETCHARSETS:
843 case NCP_IOC_CONN_LOGGED_IN:
844 case NCP_IOC_GETDENTRYTTL:
845 case NCP_IOC_GETMOUNTUID2:
846 case NCP_IOC_SIGN_WANTED:
847 case NCP_IOC_GETROOT:
848 case NCP_IOC_SETROOT:
849 return 0;
850 default:
851 /* unkown IOCTL command, assume write */
852 return 1;
853 }
854}
855
856int ncp_ioctl(struct inode *inode, struct file *filp,
857 unsigned int cmd, unsigned long arg)
858{
859 int ret;
860
861 if (ncp_ioctl_need_write(cmd)) {
862 /*
863 * inside the ioctl(), any failures which
864 * are because of file_permission() are
865 * -EACCESS, so it seems consistent to keep
866 * that here.
867 */
868 if (mnt_want_write(filp->f_path.mnt))
869 return -EACCES;
870 }
871 ret = __ncp_ioctl(inode, filp, cmd, arg);
872 if (ncp_ioctl_need_write(cmd))
873 mnt_drop_write(filp->f_path.mnt);
874 return ret;
875}
876
825#ifdef CONFIG_COMPAT 877#ifdef CONFIG_COMPAT
826long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 878long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
827{ 879{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6cea7479c5b4..d9e30ac2798d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -967,7 +967,8 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
967 if (nd->flags & LOOKUP_DIRECTORY) 967 if (nd->flags & LOOKUP_DIRECTORY)
968 return 0; 968 return 0;
969 /* Are we trying to write to a read only partition? */ 969 /* Are we trying to write to a read only partition? */
970 if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) 970 if (__mnt_is_readonly(nd->path.mnt) &&
971 (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
971 return 0; 972 return 0;
972 return 1; 973 return 1;
973} 974}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c593db047d8b..c309c881bd4e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -658,14 +658,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
658 return status; 658 return status;
659 } 659 }
660 } 660 }
661 status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
662 if (status)
663 return status;
661 status = nfs_ok; 664 status = nfs_ok;
662 if (setattr->sa_acl != NULL) 665 if (setattr->sa_acl != NULL)
663 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, 666 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
664 setattr->sa_acl); 667 setattr->sa_acl);
665 if (status) 668 if (status)
666 return status; 669 goto out;
667 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 670 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
668 0, (time_t)0); 671 0, (time_t)0);
672out:
673 mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
669 return status; 674 return status;
670} 675}
671 676
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 1ff90625860f..145b3c877a27 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -46,6 +46,7 @@
46#include <linux/scatterlist.h> 46#include <linux/scatterlist.h>
47#include <linux/crypto.h> 47#include <linux/crypto.h>
48#include <linux/sched.h> 48#include <linux/sched.h>
49#include <linux/mount.h>
49 50
50#define NFSDDBG_FACILITY NFSDDBG_PROC 51#define NFSDDBG_FACILITY NFSDDBG_PROC
51 52
@@ -154,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
154 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); 155 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
155 goto out_put; 156 goto out_put;
156 } 157 }
158 status = mnt_want_write(rec_dir.path.mnt);
159 if (status)
160 goto out_put;
157 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); 161 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU);
162 mnt_drop_write(rec_dir.path.mnt);
158out_put: 163out_put:
159 dput(dentry); 164 dput(dentry);
160out_unlock: 165out_unlock:
@@ -313,12 +318,17 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
313 if (!rec_dir_init || !clp->cl_firststate) 318 if (!rec_dir_init || !clp->cl_firststate)
314 return; 319 return;
315 320
321 status = mnt_want_write(rec_dir.path.mnt);
322 if (status)
323 goto out;
316 clp->cl_firststate = 0; 324 clp->cl_firststate = 0;
317 nfs4_save_user(&uid, &gid); 325 nfs4_save_user(&uid, &gid);
318 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); 326 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
319 nfs4_reset_user(uid, gid); 327 nfs4_reset_user(uid, gid);
320 if (status == 0) 328 if (status == 0)
321 nfsd4_sync_rec_dir(); 329 nfsd4_sync_rec_dir();
330 mnt_drop_write(rec_dir.path.mnt);
331out:
322 if (status) 332 if (status)
323 printk("NFSD: Failed to remove expired client state directory" 333 printk("NFSD: Failed to remove expired client state directory"
324 " %.*s\n", HEXDIR_LEN, clp->cl_recdir); 334 " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
@@ -347,13 +357,17 @@ nfsd4_recdir_purge_old(void) {
347 357
348 if (!rec_dir_init) 358 if (!rec_dir_init)
349 return; 359 return;
360 status = mnt_want_write(rec_dir.path.mnt);
361 if (status)
362 goto out;
350 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); 363 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old);
351 if (status == 0) 364 if (status == 0)
352 nfsd4_sync_rec_dir(); 365 nfsd4_sync_rec_dir();
366 mnt_drop_write(rec_dir.path.mnt);
367out:
353 if (status) 368 if (status)
354 printk("nfsd4: failed to purge old clients from recovery" 369 printk("nfsd4: failed to purge old clients from recovery"
355 " directory %s\n", rec_dir.path.dentry->d_name.name); 370 " directory %s\n", rec_dir.path.dentry->d_name.name);
356 return;
357} 371}
358 372
359static int 373static int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bcb97d8e8b8b..81a75f3081f4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -41,6 +41,7 @@
41#include <linux/sunrpc/svc.h> 41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h> 42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/cache.h> 43#include <linux/nfsd/cache.h>
44#include <linux/file.h>
44#include <linux/mount.h> 45#include <linux/mount.h>
45#include <linux/workqueue.h> 46#include <linux/workqueue.h>
46#include <linux/smp_lock.h> 47#include <linux/smp_lock.h>
@@ -1239,7 +1240,7 @@ static inline void
1239nfs4_file_downgrade(struct file *filp, unsigned int share_access) 1240nfs4_file_downgrade(struct file *filp, unsigned int share_access)
1240{ 1241{
1241 if (share_access & NFS4_SHARE_ACCESS_WRITE) { 1242 if (share_access & NFS4_SHARE_ACCESS_WRITE) {
1242 put_write_access(filp->f_path.dentry->d_inode); 1243 drop_file_write_access(filp);
1243 filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; 1244 filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
1244 } 1245 }
1245} 1246}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46f59d5365a0..304bf5f643c9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1255,23 +1255,35 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1255 err = 0; 1255 err = 0;
1256 switch (type) { 1256 switch (type) {
1257 case S_IFREG: 1257 case S_IFREG:
1258 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1259 if (host_err)
1260 goto out_nfserr;
1258 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1261 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1259 break; 1262 break;
1260 case S_IFDIR: 1263 case S_IFDIR:
1264 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1265 if (host_err)
1266 goto out_nfserr;
1261 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1267 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1262 break; 1268 break;
1263 case S_IFCHR: 1269 case S_IFCHR:
1264 case S_IFBLK: 1270 case S_IFBLK:
1265 case S_IFIFO: 1271 case S_IFIFO:
1266 case S_IFSOCK: 1272 case S_IFSOCK:
1273 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1274 if (host_err)
1275 goto out_nfserr;
1267 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1276 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1268 break; 1277 break;
1269 default: 1278 default:
1270 printk("nfsd: bad file type %o in nfsd_create\n", type); 1279 printk("nfsd: bad file type %o in nfsd_create\n", type);
1271 host_err = -EINVAL; 1280 host_err = -EINVAL;
1281 goto out_nfserr;
1272 } 1282 }
1273 if (host_err < 0) 1283 if (host_err < 0) {
1284 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1274 goto out_nfserr; 1285 goto out_nfserr;
1286 }
1275 1287
1276 if (EX_ISSYNC(fhp->fh_export)) { 1288 if (EX_ISSYNC(fhp->fh_export)) {
1277 err = nfserrno(nfsd_sync_dir(dentry)); 1289 err = nfserrno(nfsd_sync_dir(dentry));
@@ -1282,6 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1282 err2 = nfsd_create_setattr(rqstp, resfhp, iap); 1294 err2 = nfsd_create_setattr(rqstp, resfhp, iap);
1283 if (err2) 1295 if (err2)
1284 err = err2; 1296 err = err2;
1297 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1285 /* 1298 /*
1286 * Update the file handle to get the new inode info. 1299 * Update the file handle to get the new inode info.
1287 */ 1300 */
@@ -1359,6 +1372,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1359 v_atime = verifier[1]&0x7fffffff; 1372 v_atime = verifier[1]&0x7fffffff;
1360 } 1373 }
1361 1374
1375 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1376 if (host_err)
1377 goto out_nfserr;
1362 if (dchild->d_inode) { 1378 if (dchild->d_inode) {
1363 err = 0; 1379 err = 0;
1364 1380
@@ -1390,12 +1406,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1390 case NFS3_CREATE_GUARDED: 1406 case NFS3_CREATE_GUARDED:
1391 err = nfserr_exist; 1407 err = nfserr_exist;
1392 } 1408 }
1409 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1393 goto out; 1410 goto out;
1394 } 1411 }
1395 1412
1396 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1413 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1397 if (host_err < 0) 1414 if (host_err < 0) {
1415 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1398 goto out_nfserr; 1416 goto out_nfserr;
1417 }
1399 if (created) 1418 if (created)
1400 *created = 1; 1419 *created = 1;
1401 1420
@@ -1420,6 +1439,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1420 if (err2) 1439 if (err2)
1421 err = err2; 1440 err = err2;
1422 1441
1442 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1423 /* 1443 /*
1424 * Update the filehandle to get the new inode info. 1444 * Update the filehandle to get the new inode info.
1425 */ 1445 */
@@ -1522,6 +1542,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1522 if (iap && (iap->ia_valid & ATTR_MODE)) 1542 if (iap && (iap->ia_valid & ATTR_MODE))
1523 mode = iap->ia_mode & S_IALLUGO; 1543 mode = iap->ia_mode & S_IALLUGO;
1524 1544
1545 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1546 if (host_err)
1547 goto out_nfserr;
1548
1525 if (unlikely(path[plen] != 0)) { 1549 if (unlikely(path[plen] != 0)) {
1526 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1550 char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1527 if (path_alloced == NULL) 1551 if (path_alloced == NULL)
@@ -1542,6 +1566,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1542 err = nfserrno(host_err); 1566 err = nfserrno(host_err);
1543 fh_unlock(fhp); 1567 fh_unlock(fhp);
1544 1568
1569 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1570
1545 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1571 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1546 dput(dnew); 1572 dput(dnew);
1547 if (err==0) err = cerr; 1573 if (err==0) err = cerr;
@@ -1592,6 +1618,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1592 dold = tfhp->fh_dentry; 1618 dold = tfhp->fh_dentry;
1593 dest = dold->d_inode; 1619 dest = dold->d_inode;
1594 1620
1621 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
1622 if (host_err) {
1623 err = nfserrno(host_err);
1624 goto out_dput;
1625 }
1595 host_err = vfs_link(dold, dirp, dnew); 1626 host_err = vfs_link(dold, dirp, dnew);
1596 if (!host_err) { 1627 if (!host_err) {
1597 if (EX_ISSYNC(ffhp->fh_export)) { 1628 if (EX_ISSYNC(ffhp->fh_export)) {
@@ -1605,7 +1636,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1605 else 1636 else
1606 err = nfserrno(host_err); 1637 err = nfserrno(host_err);
1607 } 1638 }
1608 1639 mnt_drop_write(tfhp->fh_export->ex_path.mnt);
1640out_dput:
1609 dput(dnew); 1641 dput(dnew);
1610out_unlock: 1642out_unlock:
1611 fh_unlock(ffhp); 1643 fh_unlock(ffhp);
@@ -1678,13 +1710,20 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1678 if (ndentry == trap) 1710 if (ndentry == trap)
1679 goto out_dput_new; 1711 goto out_dput_new;
1680 1712
1681#ifdef MSNFS 1713 if (svc_msnfs(ffhp) &&
1682 if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1683 ((atomic_read(&odentry->d_count) > 1) 1714 ((atomic_read(&odentry->d_count) > 1)
1684 || (atomic_read(&ndentry->d_count) > 1))) { 1715 || (atomic_read(&ndentry->d_count) > 1))) {
1685 host_err = -EPERM; 1716 host_err = -EPERM;
1686 } else 1717 goto out_dput_new;
1687#endif 1718 }
1719
1720 host_err = -EXDEV;
1721 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1722 goto out_dput_new;
1723 host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
1724 if (host_err)
1725 goto out_dput_new;
1726
1688 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1727 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1689 if (!host_err && EX_ISSYNC(tfhp->fh_export)) { 1728 if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
1690 host_err = nfsd_sync_dir(tdentry); 1729 host_err = nfsd_sync_dir(tdentry);
@@ -1692,6 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1692 host_err = nfsd_sync_dir(fdentry); 1731 host_err = nfsd_sync_dir(fdentry);
1693 } 1732 }
1694 1733
1734 mnt_drop_write(ffhp->fh_export->ex_path.mnt);
1735
1695 out_dput_new: 1736 out_dput_new:
1696 dput(ndentry); 1737 dput(ndentry);
1697 out_dput_old: 1738 out_dput_old:
@@ -1750,6 +1791,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1750 if (!type) 1791 if (!type)
1751 type = rdentry->d_inode->i_mode & S_IFMT; 1792 type = rdentry->d_inode->i_mode & S_IFMT;
1752 1793
1794 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1795 if (host_err)
1796 goto out_nfserr;
1797
1753 if (type != S_IFDIR) { /* It's UNLINK */ 1798 if (type != S_IFDIR) { /* It's UNLINK */
1754#ifdef MSNFS 1799#ifdef MSNFS
1755 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1800 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
@@ -1765,10 +1810,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1765 dput(rdentry); 1810 dput(rdentry);
1766 1811
1767 if (host_err) 1812 if (host_err)
1768 goto out_nfserr; 1813 goto out_drop;
1769 if (EX_ISSYNC(fhp->fh_export)) 1814 if (EX_ISSYNC(fhp->fh_export))
1770 host_err = nfsd_sync_dir(dentry); 1815 host_err = nfsd_sync_dir(dentry);
1771 1816
1817out_drop:
1818 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1772out_nfserr: 1819out_nfserr:
1773 err = nfserrno(host_err); 1820 err = nfserrno(host_err);
1774out: 1821out:
@@ -1865,7 +1912,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1865 inode->i_mode, 1912 inode->i_mode,
1866 IS_IMMUTABLE(inode)? " immut" : "", 1913 IS_IMMUTABLE(inode)? " immut" : "",
1867 IS_APPEND(inode)? " append" : "", 1914 IS_APPEND(inode)? " append" : "",
1868 IS_RDONLY(inode)? " ro" : ""); 1915 __mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
1869 dprintk(" owner %d/%d user %d/%d\n", 1916 dprintk(" owner %d/%d user %d/%d\n",
1870 inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); 1917 inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
1871#endif 1918#endif
@@ -1876,7 +1923,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1876 */ 1923 */
1877 if (!(acc & MAY_LOCAL_ACCESS)) 1924 if (!(acc & MAY_LOCAL_ACCESS))
1878 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { 1925 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
1879 if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode)) 1926 if (exp_rdonly(rqstp, exp) ||
1927 __mnt_is_readonly(exp->ex_path.mnt))
1880 return nfserr_rofs; 1928 return nfserr_rofs;
1881 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) 1929 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
1882 return nfserr_perm; 1930 return nfserr_perm;
@@ -2039,6 +2087,9 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2039 } else 2087 } else
2040 size = 0; 2088 size = 0;
2041 2089
2090 error = mnt_want_write(fhp->fh_export->ex_path.mnt);
2091 if (error)
2092 goto getout;
2042 if (size) 2093 if (size)
2043 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); 2094 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
2044 else { 2095 else {
@@ -2050,6 +2101,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2050 error = 0; 2101 error = 0;
2051 } 2102 }
2052 } 2103 }
2104 mnt_drop_write(fhp->fh_export->ex_path.mnt);
2053 2105
2054getout: 2106getout:
2055 kfree(value); 2107 kfree(value);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index b413166dd163..7b142f0ce995 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -60,10 +60,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
60 goto bail; 60 goto bail;
61 } 61 }
62 62
63 status = -EROFS;
64 if (IS_RDONLY(inode))
65 goto bail_unlock;
66
67 status = -EACCES; 63 status = -EACCES;
68 if (!is_owner_or_cap(inode)) 64 if (!is_owner_or_cap(inode))
69 goto bail_unlock; 65 goto bail_unlock;
@@ -134,8 +130,13 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
134 if (get_user(flags, (int __user *) arg)) 130 if (get_user(flags, (int __user *) arg))
135 return -EFAULT; 131 return -EFAULT;
136 132
137 return ocfs2_set_inode_attr(inode, flags, 133 status = mnt_want_write(filp->f_path.mnt);
134 if (status)
135 return status;
136 status = ocfs2_set_inode_attr(inode, flags,
138 OCFS2_FL_MODIFIABLE); 137 OCFS2_FL_MODIFIABLE);
138 mnt_drop_write(filp->f_path.mnt);
139 return status;
139 case OCFS2_IOC_RESVSP: 140 case OCFS2_IOC_RESVSP:
140 case OCFS2_IOC_RESVSP64: 141 case OCFS2_IOC_RESVSP64:
141 case OCFS2_IOC_UNRESVSP: 142 case OCFS2_IOC_UNRESVSP:
diff --git a/fs/open.c b/fs/open.c
index 3fa4e4ffce4c..b70e7666bb2c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -244,21 +244,21 @@ static long do_sys_truncate(const char __user * path, loff_t length)
244 if (!S_ISREG(inode->i_mode)) 244 if (!S_ISREG(inode->i_mode))
245 goto dput_and_out; 245 goto dput_and_out;
246 246
247 error = vfs_permission(&nd, MAY_WRITE); 247 error = mnt_want_write(nd.path.mnt);
248 if (error) 248 if (error)
249 goto dput_and_out; 249 goto dput_and_out;
250 250
251 error = -EROFS; 251 error = vfs_permission(&nd, MAY_WRITE);
252 if (IS_RDONLY(inode)) 252 if (error)
253 goto dput_and_out; 253 goto mnt_drop_write_and_out;
254 254
255 error = -EPERM; 255 error = -EPERM;
256 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 256 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
257 goto dput_and_out; 257 goto mnt_drop_write_and_out;
258 258
259 error = get_write_access(inode); 259 error = get_write_access(inode);
260 if (error) 260 if (error)
261 goto dput_and_out; 261 goto mnt_drop_write_and_out;
262 262
263 /* 263 /*
264 * Make sure that there are no leases. get_write_access() protects 264 * Make sure that there are no leases. get_write_access() protects
@@ -276,6 +276,8 @@ static long do_sys_truncate(const char __user * path, loff_t length)
276 276
277put_write_and_out: 277put_write_and_out:
278 put_write_access(inode); 278 put_write_access(inode);
279mnt_drop_write_and_out:
280 mnt_drop_write(nd.path.mnt);
279dput_and_out: 281dput_and_out:
280 path_put(&nd.path); 282 path_put(&nd.path);
281out: 283out:
@@ -457,8 +459,17 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
457 if(res || !(mode & S_IWOTH) || 459 if(res || !(mode & S_IWOTH) ||
458 special_file(nd.path.dentry->d_inode->i_mode)) 460 special_file(nd.path.dentry->d_inode->i_mode))
459 goto out_path_release; 461 goto out_path_release;
460 462 /*
461 if(IS_RDONLY(nd.path.dentry->d_inode)) 463 * This is a rare case where using __mnt_is_readonly()
464 * is OK without a mnt_want/drop_write() pair. Since
465 * no actual write to the fs is performed here, we do
466 * not need to telegraph to that to anyone.
467 *
468 * By doing this, we accept that this access is
469 * inherently racy and know that the fs may change
470 * state before we even see this result.
471 */
472 if (__mnt_is_readonly(nd.path.mnt))
462 res = -EROFS; 473 res = -EROFS;
463 474
464out_path_release: 475out_path_release:
@@ -567,12 +578,12 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
567 578
568 audit_inode(NULL, dentry); 579 audit_inode(NULL, dentry);
569 580
570 err = -EROFS; 581 err = mnt_want_write(file->f_path.mnt);
571 if (IS_RDONLY(inode)) 582 if (err)
572 goto out_putf; 583 goto out_putf;
573 err = -EPERM; 584 err = -EPERM;
574 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 585 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
575 goto out_putf; 586 goto out_drop_write;
576 mutex_lock(&inode->i_mutex); 587 mutex_lock(&inode->i_mutex);
577 if (mode == (mode_t) -1) 588 if (mode == (mode_t) -1)
578 mode = inode->i_mode; 589 mode = inode->i_mode;
@@ -581,6 +592,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
581 err = notify_change(dentry, &newattrs); 592 err = notify_change(dentry, &newattrs);
582 mutex_unlock(&inode->i_mutex); 593 mutex_unlock(&inode->i_mutex);
583 594
595out_drop_write:
596 mnt_drop_write(file->f_path.mnt);
584out_putf: 597out_putf:
585 fput(file); 598 fput(file);
586out: 599out:
@@ -600,13 +613,13 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
600 goto out; 613 goto out;
601 inode = nd.path.dentry->d_inode; 614 inode = nd.path.dentry->d_inode;
602 615
603 error = -EROFS; 616 error = mnt_want_write(nd.path.mnt);
604 if (IS_RDONLY(inode)) 617 if (error)
605 goto dput_and_out; 618 goto dput_and_out;
606 619
607 error = -EPERM; 620 error = -EPERM;
608 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 621 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
609 goto dput_and_out; 622 goto out_drop_write;
610 623
611 mutex_lock(&inode->i_mutex); 624 mutex_lock(&inode->i_mutex);
612 if (mode == (mode_t) -1) 625 if (mode == (mode_t) -1)
@@ -616,6 +629,8 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
616 error = notify_change(nd.path.dentry, &newattrs); 629 error = notify_change(nd.path.dentry, &newattrs);
617 mutex_unlock(&inode->i_mutex); 630 mutex_unlock(&inode->i_mutex);
618 631
632out_drop_write:
633 mnt_drop_write(nd.path.mnt);
619dput_and_out: 634dput_and_out:
620 path_put(&nd.path); 635 path_put(&nd.path);
621out: 636out:
@@ -638,9 +653,6 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
638 printk(KERN_ERR "chown_common: NULL inode\n"); 653 printk(KERN_ERR "chown_common: NULL inode\n");
639 goto out; 654 goto out;
640 } 655 }
641 error = -EROFS;
642 if (IS_RDONLY(inode))
643 goto out;
644 error = -EPERM; 656 error = -EPERM;
645 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 657 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
646 goto out; 658 goto out;
@@ -671,7 +683,12 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
671 error = user_path_walk(filename, &nd); 683 error = user_path_walk(filename, &nd);
672 if (error) 684 if (error)
673 goto out; 685 goto out;
686 error = mnt_want_write(nd.path.mnt);
687 if (error)
688 goto out_release;
674 error = chown_common(nd.path.dentry, user, group); 689 error = chown_common(nd.path.dentry, user, group);
690 mnt_drop_write(nd.path.mnt);
691out_release:
675 path_put(&nd.path); 692 path_put(&nd.path);
676out: 693out:
677 return error; 694 return error;
@@ -691,7 +708,12 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
691 error = __user_walk_fd(dfd, filename, follow, &nd); 708 error = __user_walk_fd(dfd, filename, follow, &nd);
692 if (error) 709 if (error)
693 goto out; 710 goto out;
711 error = mnt_want_write(nd.path.mnt);
712 if (error)
713 goto out_release;
694 error = chown_common(nd.path.dentry, user, group); 714 error = chown_common(nd.path.dentry, user, group);
715 mnt_drop_write(nd.path.mnt);
716out_release:
695 path_put(&nd.path); 717 path_put(&nd.path);
696out: 718out:
697 return error; 719 return error;
@@ -705,7 +727,12 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group
705 error = user_path_walk_link(filename, &nd); 727 error = user_path_walk_link(filename, &nd);
706 if (error) 728 if (error)
707 goto out; 729 goto out;
730 error = mnt_want_write(nd.path.mnt);
731 if (error)
732 goto out_release;
708 error = chown_common(nd.path.dentry, user, group); 733 error = chown_common(nd.path.dentry, user, group);
734 mnt_drop_write(nd.path.mnt);
735out_release:
709 path_put(&nd.path); 736 path_put(&nd.path);
710out: 737out:
711 return error; 738 return error;
@@ -722,14 +749,48 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
722 if (!file) 749 if (!file)
723 goto out; 750 goto out;
724 751
752 error = mnt_want_write(file->f_path.mnt);
753 if (error)
754 goto out_fput;
725 dentry = file->f_path.dentry; 755 dentry = file->f_path.dentry;
726 audit_inode(NULL, dentry); 756 audit_inode(NULL, dentry);
727 error = chown_common(dentry, user, group); 757 error = chown_common(dentry, user, group);
758 mnt_drop_write(file->f_path.mnt);
759out_fput:
728 fput(file); 760 fput(file);
729out: 761out:
730 return error; 762 return error;
731} 763}
732 764
765/*
766 * You have to be very careful that these write
767 * counts get cleaned up in error cases and
768 * upon __fput(). This should probably never
769 * be called outside of __dentry_open().
770 */
771static inline int __get_file_write_access(struct inode *inode,
772 struct vfsmount *mnt)
773{
774 int error;
775 error = get_write_access(inode);
776 if (error)
777 return error;
778 /*
779 * Do not take mount writer counts on
780 * special files since no writes to
781 * the mount itself will occur.
782 */
783 if (!special_file(inode->i_mode)) {
784 /*
785 * Balanced in __fput()
786 */
787 error = mnt_want_write(mnt);
788 if (error)
789 put_write_access(inode);
790 }
791 return error;
792}
793
733static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 794static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
734 int flags, struct file *f, 795 int flags, struct file *f,
735 int (*open)(struct inode *, struct file *)) 796 int (*open)(struct inode *, struct file *))
@@ -742,9 +803,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
742 FMODE_PREAD | FMODE_PWRITE; 803 FMODE_PREAD | FMODE_PWRITE;
743 inode = dentry->d_inode; 804 inode = dentry->d_inode;
744 if (f->f_mode & FMODE_WRITE) { 805 if (f->f_mode & FMODE_WRITE) {
745 error = get_write_access(inode); 806 error = __get_file_write_access(inode, mnt);
746 if (error) 807 if (error)
747 goto cleanup_file; 808 goto cleanup_file;
809 if (!special_file(inode->i_mode))
810 file_take_write(f);
748 } 811 }
749 812
750 f->f_mapping = inode->i_mapping; 813 f->f_mapping = inode->i_mapping;
@@ -784,8 +847,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
784 847
785cleanup_all: 848cleanup_all:
786 fops_put(f->f_op); 849 fops_put(f->f_op);
787 if (f->f_mode & FMODE_WRITE) 850 if (f->f_mode & FMODE_WRITE) {
788 put_write_access(inode); 851 put_write_access(inode);
852 if (!special_file(inode->i_mode)) {
853 /*
854 * We don't consider this a real
855 * mnt_want/drop_write() pair
856 * because it all happenend right
857 * here, so just reset the state.
858 */
859 file_reset_write(f);
860 mnt_drop_write(mnt);
861 }
862 }
789 file_kill(f); 863 file_kill(f);
790 f->f_path.dentry = NULL; 864 f->f_path.dentry = NULL;
791 f->f_path.mnt = NULL; 865 f->f_path.mnt = NULL;
@@ -796,43 +870,6 @@ cleanup_file:
796 return ERR_PTR(error); 870 return ERR_PTR(error);
797} 871}
798 872
799/*
800 * Note that while the flag value (low two bits) for sys_open means:
801 * 00 - read-only
802 * 01 - write-only
803 * 10 - read-write
804 * 11 - special
805 * it is changed into
806 * 00 - no permissions needed
807 * 01 - read-permission
808 * 10 - write-permission
809 * 11 - read-write
810 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
811 * used by symlinks.
812 */
813static struct file *do_filp_open(int dfd, const char *filename, int flags,
814 int mode)
815{
816 int namei_flags, error;
817 struct nameidata nd;
818
819 namei_flags = flags;
820 if ((namei_flags+1) & O_ACCMODE)
821 namei_flags++;
822
823 error = open_namei(dfd, filename, namei_flags, mode, &nd);
824 if (!error)
825 return nameidata_to_filp(&nd, flags);
826
827 return ERR_PTR(error);
828}
829
830struct file *filp_open(const char *filename, int flags, int mode)
831{
832 return do_filp_open(AT_FDCWD, filename, flags, mode);
833}
834EXPORT_SYMBOL(filp_open);
835
836/** 873/**
837 * lookup_instantiate_filp - instantiates the open intent filp 874 * lookup_instantiate_filp - instantiates the open intent filp
838 * @nd: pointer to nameidata 875 * @nd: pointer to nameidata
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 03f808c5b79d..6149e4b58c88 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -473,6 +473,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
473 return 0; 473 return 0;
474 if (IS_ERR(state)) /* I/O error reading the partition table */ 474 if (IS_ERR(state)) /* I/O error reading the partition table */
475 return -EIO; 475 return -EIO;
476
477 /* tell userspace that the media / partition table may have changed */
478 kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE);
479
476 for (p = 1; p < state->limit; p++) { 480 for (p = 1; p < state->limit; p++) {
477 sector_t size = state->parts[p].size; 481 sector_t size = state->parts[p].size;
478 sector_t from = state->parts[p].from; 482 sector_t from = state->parts[p].from;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index e0f0f098a523..74363a7aacbc 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -4,6 +4,7 @@
4 4
5#include <linux/capability.h> 5#include <linux/capability.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/mount.h>
7#include <linux/reiserfs_fs.h> 8#include <linux/reiserfs_fs.h>
8#include <linux/time.h> 9#include <linux/time.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
@@ -25,6 +26,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
25 unsigned long arg) 26 unsigned long arg)
26{ 27{
27 unsigned int flags; 28 unsigned int flags;
29 int err = 0;
28 30
29 switch (cmd) { 31 switch (cmd) {
30 case REISERFS_IOC_UNPACK: 32 case REISERFS_IOC_UNPACK:
@@ -48,50 +50,67 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
48 if (!reiserfs_attrs(inode->i_sb)) 50 if (!reiserfs_attrs(inode->i_sb))
49 return -ENOTTY; 51 return -ENOTTY;
50 52
51 if (IS_RDONLY(inode)) 53 err = mnt_want_write(filp->f_path.mnt);
52 return -EROFS; 54 if (err)
55 return err;
53 56
54 if (!is_owner_or_cap(inode)) 57 if (!is_owner_or_cap(inode)) {
55 return -EPERM; 58 err = -EPERM;
56 59 goto setflags_out;
57 if (get_user(flags, (int __user *)arg)) 60 }
58 return -EFAULT; 61 if (get_user(flags, (int __user *)arg)) {
59 62 err = -EFAULT;
60 /* Is it quota file? Do not allow user to mess with it. */ 63 goto setflags_out;
61 if (IS_NOQUOTA(inode)) 64 }
62 return -EPERM; 65 /*
66 * Is it quota file? Do not allow user to mess with it
67 */
68 if (IS_NOQUOTA(inode)) {
69 err = -EPERM;
70 goto setflags_out;
71 }
63 if (((flags ^ REISERFS_I(inode)-> 72 if (((flags ^ REISERFS_I(inode)->
64 i_attrs) & (REISERFS_IMMUTABLE_FL | 73 i_attrs) & (REISERFS_IMMUTABLE_FL |
65 REISERFS_APPEND_FL)) 74 REISERFS_APPEND_FL))
66 && !capable(CAP_LINUX_IMMUTABLE)) 75 && !capable(CAP_LINUX_IMMUTABLE)) {
67 return -EPERM; 76 err = -EPERM;
68 77 goto setflags_out;
78 }
69 if ((flags & REISERFS_NOTAIL_FL) && 79 if ((flags & REISERFS_NOTAIL_FL) &&
70 S_ISREG(inode->i_mode)) { 80 S_ISREG(inode->i_mode)) {
71 int result; 81 int result;
72 82
73 result = reiserfs_unpack(inode, filp); 83 result = reiserfs_unpack(inode, filp);
74 if (result) 84 if (result) {
75 return result; 85 err = result;
86 goto setflags_out;
87 }
76 } 88 }
77 sd_attrs_to_i_attrs(flags, inode); 89 sd_attrs_to_i_attrs(flags, inode);
78 REISERFS_I(inode)->i_attrs = flags; 90 REISERFS_I(inode)->i_attrs = flags;
79 inode->i_ctime = CURRENT_TIME_SEC; 91 inode->i_ctime = CURRENT_TIME_SEC;
80 mark_inode_dirty(inode); 92 mark_inode_dirty(inode);
81 return 0; 93setflags_out:
94 mnt_drop_write(filp->f_path.mnt);
95 return err;
82 } 96 }
83 case REISERFS_IOC_GETVERSION: 97 case REISERFS_IOC_GETVERSION:
84 return put_user(inode->i_generation, (int __user *)arg); 98 return put_user(inode->i_generation, (int __user *)arg);
85 case REISERFS_IOC_SETVERSION: 99 case REISERFS_IOC_SETVERSION:
86 if (!is_owner_or_cap(inode)) 100 if (!is_owner_or_cap(inode))
87 return -EPERM; 101 return -EPERM;
88 if (IS_RDONLY(inode)) 102 err = mnt_want_write(filp->f_path.mnt);
89 return -EROFS; 103 if (err)
90 if (get_user(inode->i_generation, (int __user *)arg)) 104 return err;
91 return -EFAULT; 105 if (get_user(inode->i_generation, (int __user *)arg)) {
106 err = -EFAULT;
107 goto setversion_out;
108 }
92 inode->i_ctime = CURRENT_TIME_SEC; 109 inode->i_ctime = CURRENT_TIME_SEC;
93 mark_inode_dirty(inode); 110 mark_inode_dirty(inode);
94 return 0; 111setversion_out:
112 mnt_drop_write(filp->f_path.mnt);
113 return err;
95 default: 114 default:
96 return -ENOTTY; 115 return -ENOTTY;
97 } 116 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index bb05a3e51b93..060eb3f598e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -38,7 +38,7 @@
38#include <asm/system.h> 38#include <asm/system.h>
39 39
40#include <linux/time.h> 40#include <linux/time.h>
41#include <asm/semaphore.h> 41#include <linux/semaphore.h>
42 42
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/reiserfs_fs.h> 44#include <linux/reiserfs_fs.h>
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 344b9b96cc56..d7c4935c1034 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -44,7 +44,6 @@
44#include <net/checksum.h> 44#include <net/checksum.h>
45#include <linux/smp_lock.h> 45#include <linux/smp_lock.h>
46#include <linux/stat.h> 46#include <linux/stat.h>
47#include <asm/semaphore.h>
48 47
49#define FL_READONLY 128 48#define FL_READONLY 128
50#define FL_DIR_SEM_HELD 256 49#define FL_DIR_SEM_HELD 256
diff --git a/fs/select.c b/fs/select.c
index 5633fe980781..00f58c5c7e05 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -260,7 +260,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
260 wait = NULL; 260 wait = NULL;
261 if (retval || !*timeout || signal_pending(current)) 261 if (retval || !*timeout || signal_pending(current))
262 break; 262 break;
263 if(table.error) { 263 if (table.error) {
264 retval = table.error; 264 retval = table.error;
265 break; 265 break;
266 } 266 }
diff --git a/fs/super.c b/fs/super.c
index 09008dbd264e..1f8f05ede437 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
37#include <linux/idr.h> 37#include <linux/idr.h>
38#include <linux/kobject.h> 38#include <linux/kobject.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/file.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41 42
42 43
@@ -567,10 +568,29 @@ static void mark_files_ro(struct super_block *sb)
567{ 568{
568 struct file *f; 569 struct file *f;
569 570
571retry:
570 file_list_lock(); 572 file_list_lock();
571 list_for_each_entry(f, &sb->s_files, f_u.fu_list) { 573 list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
572 if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f)) 574 struct vfsmount *mnt;
573 f->f_mode &= ~FMODE_WRITE; 575 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
576 continue;
577 if (!file_count(f))
578 continue;
579 if (!(f->f_mode & FMODE_WRITE))
580 continue;
581 f->f_mode &= ~FMODE_WRITE;
582 if (file_check_writeable(f) != 0)
583 continue;
584 file_release_write(f);
585 mnt = mntget(f->f_path.mnt);
586 file_list_unlock();
587 /*
588 * This can sleep, so we can't hold
589 * the file_list_lock() spinlock.
590 */
591 mnt_drop_write(mnt);
592 mntput(mnt);
593 goto retry;
574 } 594 }
575 file_list_unlock(); 595 file_list_unlock();
576} 596}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 4948d9bc405d..a1c3a1fab7f0 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -20,6 +20,7 @@
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/completion.h> 21#include <linux/completion.h>
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/slab.h>
23#include "sysfs.h" 24#include "sysfs.h"
24 25
25DEFINE_MUTEX(sysfs_mutex); 26DEFINE_MUTEX(sysfs_mutex);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index baa663e69388..ade9a7e6a757 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h>
16#include <linux/namei.h> 17#include <linux/namei.h>
17#include <linux/poll.h> 18#include <linux/poll.h>
18#include <linux/list.h> 19#include <linux/list.h>
@@ -128,7 +129,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
128 ssize_t retval = 0; 129 ssize_t retval = 0;
129 130
130 mutex_lock(&buffer->mutex); 131 mutex_lock(&buffer->mutex);
131 if (buffer->needs_read_fill) { 132 if (buffer->needs_read_fill || *ppos == 0) {
132 retval = fill_read_buffer(file->f_path.dentry,buffer); 133 retval = fill_read_buffer(file->f_path.dentry,buffer);
133 if (retval) 134 if (retval)
134 goto out; 135 goto out;
@@ -409,8 +410,7 @@ static int sysfs_release(struct inode *inode, struct file *filp)
409 * return POLLERR|POLLPRI, and select will return the fd whether 410 * return POLLERR|POLLPRI, and select will return the fd whether
410 * it is waiting for read, write, or exceptions. 411 * it is waiting for read, write, or exceptions.
411 * Once poll/select indicates that the value has changed, you 412 * Once poll/select indicates that the value has changed, you
412 * need to close and re-open the file, as simply seeking and reading 413 * need to close and re-open the file, or seek to 0 and read again.
413 * again will not get new data, or reset the state of 'poll'.
414 * Reminder: this only works for attributes which actively support 414 * Reminder: this only works for attributes which actively support
415 * it, and it is not possible to test an attribute from userspace 415 * it, and it is not possible to test an attribute from userspace
416 * to see if it supports poll (Neither 'poll' nor 'select' return 416 * to see if it supports poll (Neither 'poll' nor 'select' return
diff --git a/fs/utimes.c b/fs/utimes.c
index b18da9c0b97f..a2bef77dc9c9 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -2,6 +2,7 @@
2#include <linux/file.h> 2#include <linux/file.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/linkage.h> 4#include <linux/linkage.h>
5#include <linux/mount.h>
5#include <linux/namei.h> 6#include <linux/namei.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
7#include <linux/stat.h> 8#include <linux/stat.h>
@@ -59,6 +60,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
59 struct inode *inode; 60 struct inode *inode;
60 struct iattr newattrs; 61 struct iattr newattrs;
61 struct file *f = NULL; 62 struct file *f = NULL;
63 struct vfsmount *mnt;
62 64
63 error = -EINVAL; 65 error = -EINVAL;
64 if (times && (!nsec_valid(times[0].tv_nsec) || 66 if (times && (!nsec_valid(times[0].tv_nsec) ||
@@ -79,18 +81,20 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
79 if (!f) 81 if (!f)
80 goto out; 82 goto out;
81 dentry = f->f_path.dentry; 83 dentry = f->f_path.dentry;
84 mnt = f->f_path.mnt;
82 } else { 85 } else {
83 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); 86 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
84 if (error) 87 if (error)
85 goto out; 88 goto out;
86 89
87 dentry = nd.path.dentry; 90 dentry = nd.path.dentry;
91 mnt = nd.path.mnt;
88 } 92 }
89 93
90 inode = dentry->d_inode; 94 inode = dentry->d_inode;
91 95
92 error = -EROFS; 96 error = mnt_want_write(mnt);
93 if (IS_RDONLY(inode)) 97 if (error)
94 goto dput_and_out; 98 goto dput_and_out;
95 99
96 /* Don't worry, the checks are done in inode_change_ok() */ 100 /* Don't worry, the checks are done in inode_change_ok() */
@@ -98,7 +102,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
98 if (times) { 102 if (times) {
99 error = -EPERM; 103 error = -EPERM;
100 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 104 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
101 goto dput_and_out; 105 goto mnt_drop_write_and_out;
102 106
103 if (times[0].tv_nsec == UTIME_OMIT) 107 if (times[0].tv_nsec == UTIME_OMIT)
104 newattrs.ia_valid &= ~ATTR_ATIME; 108 newattrs.ia_valid &= ~ATTR_ATIME;
@@ -118,22 +122,24 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
118 } else { 122 } else {
119 error = -EACCES; 123 error = -EACCES;
120 if (IS_IMMUTABLE(inode)) 124 if (IS_IMMUTABLE(inode))
121 goto dput_and_out; 125 goto mnt_drop_write_and_out;
122 126
123 if (!is_owner_or_cap(inode)) { 127 if (!is_owner_or_cap(inode)) {
124 if (f) { 128 if (f) {
125 if (!(f->f_mode & FMODE_WRITE)) 129 if (!(f->f_mode & FMODE_WRITE))
126 goto dput_and_out; 130 goto mnt_drop_write_and_out;
127 } else { 131 } else {
128 error = vfs_permission(&nd, MAY_WRITE); 132 error = vfs_permission(&nd, MAY_WRITE);
129 if (error) 133 if (error)
130 goto dput_and_out; 134 goto mnt_drop_write_and_out;
131 } 135 }
132 } 136 }
133 } 137 }
134 mutex_lock(&inode->i_mutex); 138 mutex_lock(&inode->i_mutex);
135 error = notify_change(dentry, &newattrs); 139 error = notify_change(dentry, &newattrs);
136 mutex_unlock(&inode->i_mutex); 140 mutex_unlock(&inode->i_mutex);
141mnt_drop_write_and_out:
142 mnt_drop_write(mnt);
137dput_and_out: 143dput_and_out:
138 if (f) 144 if (f)
139 fput(f); 145 fput(f);
diff --git a/fs/xattr.c b/fs/xattr.c
index 3acab1615460..f7062da505d4 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -11,6 +11,7 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/file.h> 12#include <linux/file.h>
13#include <linux/xattr.h> 13#include <linux/xattr.h>
14#include <linux/mount.h>
14#include <linux/namei.h> 15#include <linux/namei.h>
15#include <linux/security.h> 16#include <linux/security.h>
16#include <linux/syscalls.h> 17#include <linux/syscalls.h>
@@ -32,8 +33,6 @@ xattr_permission(struct inode *inode, const char *name, int mask)
32 * filesystem or on an immutable / append-only inode. 33 * filesystem or on an immutable / append-only inode.
33 */ 34 */
34 if (mask & MAY_WRITE) { 35 if (mask & MAY_WRITE) {
35 if (IS_RDONLY(inode))
36 return -EROFS;
37 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 36 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
38 return -EPERM; 37 return -EPERM;
39 } 38 }
@@ -262,7 +261,11 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
262 error = user_path_walk(path, &nd); 261 error = user_path_walk(path, &nd);
263 if (error) 262 if (error)
264 return error; 263 return error;
265 error = setxattr(nd.path.dentry, name, value, size, flags); 264 error = mnt_want_write(nd.path.mnt);
265 if (!error) {
266 error = setxattr(nd.path.dentry, name, value, size, flags);
267 mnt_drop_write(nd.path.mnt);
268 }
266 path_put(&nd.path); 269 path_put(&nd.path);
267 return error; 270 return error;
268} 271}
@@ -277,7 +280,11 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
277 error = user_path_walk_link(path, &nd); 280 error = user_path_walk_link(path, &nd);
278 if (error) 281 if (error)
279 return error; 282 return error;
280 error = setxattr(nd.path.dentry, name, value, size, flags); 283 error = mnt_want_write(nd.path.mnt);
284 if (!error) {
285 error = setxattr(nd.path.dentry, name, value, size, flags);
286 mnt_drop_write(nd.path.mnt);
287 }
281 path_put(&nd.path); 288 path_put(&nd.path);
282 return error; 289 return error;
283} 290}
@@ -295,7 +302,12 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
295 return error; 302 return error;
296 dentry = f->f_path.dentry; 303 dentry = f->f_path.dentry;
297 audit_inode(NULL, dentry); 304 audit_inode(NULL, dentry);
298 error = setxattr(dentry, name, value, size, flags); 305 error = mnt_want_write(f->f_path.mnt);
306 if (!error) {
307 error = setxattr(dentry, name, value, size, flags);
308 mnt_drop_write(f->f_path.mnt);
309 }
310out_fput:
299 fput(f); 311 fput(f);
300 return error; 312 return error;
301} 313}
@@ -482,7 +494,11 @@ sys_removexattr(char __user *path, char __user *name)
482 error = user_path_walk(path, &nd); 494 error = user_path_walk(path, &nd);
483 if (error) 495 if (error)
484 return error; 496 return error;
485 error = removexattr(nd.path.dentry, name); 497 error = mnt_want_write(nd.path.mnt);
498 if (!error) {
499 error = removexattr(nd.path.dentry, name);
500 mnt_drop_write(nd.path.mnt);
501 }
486 path_put(&nd.path); 502 path_put(&nd.path);
487 return error; 503 return error;
488} 504}
@@ -496,7 +512,11 @@ sys_lremovexattr(char __user *path, char __user *name)
496 error = user_path_walk_link(path, &nd); 512 error = user_path_walk_link(path, &nd);
497 if (error) 513 if (error)
498 return error; 514 return error;
499 error = removexattr(nd.path.dentry, name); 515 error = mnt_want_write(nd.path.mnt);
516 if (!error) {
517 error = removexattr(nd.path.dentry, name);
518 mnt_drop_write(nd.path.mnt);
519 }
500 path_put(&nd.path); 520 path_put(&nd.path);
501 return error; 521 return error;
502} 522}
@@ -513,7 +533,11 @@ sys_fremovexattr(int fd, char __user *name)
513 return error; 533 return error;
514 dentry = f->f_path.dentry; 534 dentry = f->f_path.dentry;
515 audit_inode(NULL, dentry); 535 audit_inode(NULL, dentry);
516 error = removexattr(dentry, name); 536 error = mnt_want_write(f->f_path.mnt);
537 if (!error) {
538 error = removexattr(dentry, name);
539 mnt_drop_write(f->f_path.mnt);
540 }
517 fput(f); 541 fput(f);
518 return error; 542 return error;
519} 543}
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index 2009e6d922ce..3abe7e9ceb33 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -20,8 +20,8 @@
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/wait.h> 22#include <linux/wait.h>
23#include <linux/semaphore.h>
23#include <asm/atomic.h> 24#include <asm/atomic.h>
24#include <asm/semaphore.h>
25 25
26/* 26/*
27 * sema_t structure just maps to struct semaphore in Linux kernel. 27 * sema_t structure just maps to struct semaphore in Linux kernel.
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index bf7759793856..4ddb86b73c6b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -535,8 +535,6 @@ xfs_attrmulti_attr_set(
535 char *kbuf; 535 char *kbuf;
536 int error = EFAULT; 536 int error = EFAULT;
537 537
538 if (IS_RDONLY(inode))
539 return -EROFS;
540 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 538 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
541 return EPERM; 539 return EPERM;
542 if (len > XATTR_SIZE_MAX) 540 if (len > XATTR_SIZE_MAX)
@@ -562,8 +560,6 @@ xfs_attrmulti_attr_remove(
562 char *name, 560 char *name,
563 __uint32_t flags) 561 __uint32_t flags)
564{ 562{
565 if (IS_RDONLY(inode))
566 return -EROFS;
567 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 563 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
568 return EPERM; 564 return EPERM;
569 return xfs_attr_remove(XFS_I(inode), name, flags); 565 return xfs_attr_remove(XFS_I(inode), name, flags);
@@ -573,6 +569,7 @@ STATIC int
573xfs_attrmulti_by_handle( 569xfs_attrmulti_by_handle(
574 xfs_mount_t *mp, 570 xfs_mount_t *mp,
575 void __user *arg, 571 void __user *arg,
572 struct file *parfilp,
576 struct inode *parinode) 573 struct inode *parinode)
577{ 574{
578 int error; 575 int error;
@@ -626,13 +623,21 @@ xfs_attrmulti_by_handle(
626 &ops[i].am_length, ops[i].am_flags); 623 &ops[i].am_length, ops[i].am_flags);
627 break; 624 break;
628 case ATTR_OP_SET: 625 case ATTR_OP_SET:
626 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
627 if (ops[i].am_error)
628 break;
629 ops[i].am_error = xfs_attrmulti_attr_set(inode, 629 ops[i].am_error = xfs_attrmulti_attr_set(inode,
630 attr_name, ops[i].am_attrvalue, 630 attr_name, ops[i].am_attrvalue,
631 ops[i].am_length, ops[i].am_flags); 631 ops[i].am_length, ops[i].am_flags);
632 mnt_drop_write(parfilp->f_path.mnt);
632 break; 633 break;
633 case ATTR_OP_REMOVE: 634 case ATTR_OP_REMOVE:
635 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
636 if (ops[i].am_error)
637 break;
634 ops[i].am_error = xfs_attrmulti_attr_remove(inode, 638 ops[i].am_error = xfs_attrmulti_attr_remove(inode,
635 attr_name, ops[i].am_flags); 639 attr_name, ops[i].am_flags);
640 mnt_drop_write(parfilp->f_path.mnt);
636 break; 641 break;
637 default: 642 default:
638 ops[i].am_error = EINVAL; 643 ops[i].am_error = EINVAL;
@@ -1133,7 +1138,7 @@ xfs_ioctl(
1133 return xfs_attrlist_by_handle(mp, arg, inode); 1138 return xfs_attrlist_by_handle(mp, arg, inode);
1134 1139
1135 case XFS_IOC_ATTRMULTI_BY_HANDLE: 1140 case XFS_IOC_ATTRMULTI_BY_HANDLE:
1136 return xfs_attrmulti_by_handle(mp, arg, inode); 1141 return xfs_attrmulti_by_handle(mp, arg, filp, inode);
1137 1142
1138 case XFS_IOC_SWAPEXT: { 1143 case XFS_IOC_SWAPEXT: {
1139 error = xfs_swapext((struct xfs_swapext __user *)arg); 1144 error = xfs_swapext((struct xfs_swapext __user *)arg);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 0c958cf77758..a1237dad6430 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -155,13 +155,6 @@ xfs_ichgtime_fast(
155 */ 155 */
156 ASSERT((flags & XFS_ICHGTIME_ACC) == 0); 156 ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
157 157
158 /*
159 * We're not supposed to change timestamps in readonly-mounted
160 * filesystems. Throw it away if anyone asks us.
161 */
162 if (unlikely(IS_RDONLY(inode)))
163 return;
164
165 if (flags & XFS_ICHGTIME_MOD) { 158 if (flags & XFS_ICHGTIME_MOD) {
166 tvp = &inode->i_mtime; 159 tvp = &inode->i_mtime;
167 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; 160 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 21c0dbc74093..1ebd8004469c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,6 +51,7 @@
51#include "xfs_vnodeops.h" 51#include "xfs_vnodeops.h"
52 52
53#include <linux/capability.h> 53#include <linux/capability.h>
54#include <linux/mount.h>
54#include <linux/writeback.h> 55#include <linux/writeback.h>
55 56
56 57
@@ -670,10 +671,16 @@ start:
670 if (new_size > xip->i_size) 671 if (new_size > xip->i_size)
671 xip->i_new_size = new_size; 672 xip->i_new_size = new_size;
672 673
673 if (likely(!(ioflags & IO_INVIS))) { 674 /*
675 * We're not supposed to change timestamps in readonly-mounted
676 * filesystems. Throw it away if anyone asks us.
677 */
678 if (likely(!(ioflags & IO_INVIS) &&
679 !mnt_want_write(file->f_path.mnt))) {
674 file_update_time(file); 680 file_update_time(file);
675 xfs_ichgtime_fast(xip, inode, 681 xfs_ichgtime_fast(xip, inode,
676 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 682 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
683 mnt_drop_write(file->f_path.mnt);
677 } 684 }
678 685
679 /* 686 /*