aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/Locking4
-rw-r--r--Documentation/sysctl/fs.txt42
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c77
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--drivers/base/devtmpfs.c9
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c2
-rw-r--r--drivers/staging/bcm/Misc.c31
-rw-r--r--drivers/staging/gdm72xx/sdio_boot.c7
-rw-r--r--drivers/staging/gdm72xx/usb_boot.c22
-rw-r--r--drivers/target/target_core_file.c32
-rw-r--r--drivers/usb/gadget/storage_common.c12
-rw-r--r--drivers/usb/gadget/u_uac1.c6
-rw-r--r--drivers/video/fb_defio.c2
-rw-r--r--fs/9p/vfs_file.c3
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/ioctl.c15
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/buffer.c28
-rw-r--r--fs/ceph/addr.c3
-rw-r--r--fs/ecryptfs/inode.c30
-rw-r--r--fs/exec.c19
-rw-r--r--fs/ext2/inode.c5
-rw-r--r--fs/ext2/super.c33
-rw-r--r--fs/ext4/inode.c15
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/super.c31
-rw-r--r--fs/fat/file.c15
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/file.c18
-rw-r--r--fs/gfs2/trans.c4
-rw-r--r--fs/inode.c12
-rw-r--r--fs/internal.h4
-rw-r--r--fs/lockd/clntproc.c14
-rw-r--r--fs/lockd/svc4proc.c1
-rw-r--r--fs/lockd/svclock.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/namei.c313
-rw-r--r--fs/namespace.c97
-rw-r--r--fs/nfsd/nfs4recover.c9
-rw-r--r--fs/nfsd/nfsfh.c1
-rw-r--r--fs/nfsd/nfsproc.c9
-rw-r--r--fs/nfsd/vfs.c79
-rw-r--r--fs/nfsd/vfs.h11
-rw-r--r--fs/nilfs2/file.c18
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/segment.c5
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/file.c11
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c7
-rw-r--r--fs/ocfs2/mmap.c2
-rw-r--r--fs/ocfs2/refcounttree.c11
-rw-r--r--fs/open.c15
-rw-r--r--fs/pipe.c75
-rw-r--r--fs/splice.c3
-rw-r--r--fs/super.c252
-rw-r--r--fs/sysfs/bin.c2
-rw-r--r--fs/xfs/xfs_aops.c18
-rw-r--r--fs/xfs/xfs_file.c10
-rw-r--r--fs/xfs/xfs_ioctl.c55
-rw-r--r--fs/xfs/xfs_ioctl32.c12
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_sync.c2
-rw-r--r--fs/xfs/xfs_trans.c17
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--include/linux/audit.h4
-rw-r--r--include/linux/fs.h154
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/namei.h1
-rw-r--r--include/linux/nfsd/nfsfh.h1
-rw-r--r--include/linux/pipe_fs_i.h2
-rw-r--r--kernel/audit.c21
-rw-r--r--kernel/sysctl.c18
-rw-r--r--lib/percpu_counter.c14
-rw-r--r--mm/filemap.c31
-rw-r--r--mm/filemap_xip.c6
-rw-r--r--mm/memory.c14
-rw-r--r--net/unix/af_unix.c93
-rw-r--r--sound/sound_firmware.c8
84 files changed, 1326 insertions, 639 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 7f647e17830c..0f103e39b4f6 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -138,8 +138,8 @@ evict_inode:
138put_super: write 138put_super: write
139write_super: read 139write_super: read
140sync_fs: read 140sync_fs: read
141freeze_fs: read 141freeze_fs: write
142unfreeze_fs: read 142unfreeze_fs: write
143statfs: maybe(read) (see below) 143statfs: maybe(read) (see below)
144remount_fs: write 144remount_fs: write
145umount_begin: no 145umount_begin: no
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 8c235b6e4246..88152f214f48 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs:
32- nr_open 32- nr_open
33- overflowuid 33- overflowuid
34- overflowgid 34- overflowgid
35- protected_hardlinks
36- protected_symlinks
35- suid_dumpable 37- suid_dumpable
36- super-max 38- super-max
37- super-nr 39- super-nr
@@ -157,6 +159,46 @@ The default is 65534.
157 159
158============================================================== 160==============================================================
159 161
162protected_hardlinks:
163
164A long-standing class of security issues is the hardlink-based
165time-of-check-time-of-use race, most commonly seen in world-writable
166directories like /tmp. The common method of exploitation of this flaw
167is to cross privilege boundaries when following a given hardlink (i.e. a
168root process follows a hardlink created by another user). Additionally,
169on systems without separated partitions, this stops unauthorized users
170from "pinning" vulnerable setuid/setgid files against being upgraded by
171the administrator, or linking to special files.
172
173When set to "0", hardlink creation behavior is unrestricted.
174
175When set to "1" hardlinks cannot be created by users if they do not
176already own the source file, or do not have read/write access to it.
177
178This protection is based on the restrictions in Openwall and grsecurity.
179
180==============================================================
181
182protected_symlinks:
183
184A long-standing class of security issues is the symlink-based
185time-of-check-time-of-use race, most commonly seen in world-writable
186directories like /tmp. The common method of exploitation of this flaw
187is to cross privilege boundaries when following a given symlink (i.e. a
188root process follows a symlink belonging to another user). For a likely
189incomplete list of hundreds of examples across the years, please see:
190http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
191
192When set to "0", symlink following behavior is unrestricted.
193
194When set to "1" symlinks are permitted to be followed only when outside
195a sticky world-writable directory, or when the uid of the symlink and
196follower match, or when the directory owner matches the symlink's owner.
197
198This protection is based on the restrictions in Openwall and grsecurity.
199
200==============================================================
201
160suid_dumpable: 202suid_dumpable:
161 203
162This value can be used to query and set the core dump mode for setuid 204This value can be used to query and set the core dump mode for setuid
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index d544d7816df3..dba1ce235da5 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -186,10 +186,13 @@ static void spufs_prune_dir(struct dentry *dir)
186static int spufs_rmdir(struct inode *parent, struct dentry *dir) 186static int spufs_rmdir(struct inode *parent, struct dentry *dir)
187{ 187{
188 /* remove all entries */ 188 /* remove all entries */
189 int res;
189 spufs_prune_dir(dir); 190 spufs_prune_dir(dir);
190 d_drop(dir); 191 d_drop(dir);
191 192 res = simple_rmdir(parent, dir);
192 return simple_rmdir(parent, dir); 193 /* We have to give up the mm_struct */
194 spu_forget(SPUFS_I(dir->d_inode)->i_ctx);
195 return res;
193} 196}
194 197
195static int spufs_fill_dir(struct dentry *dir, 198static int spufs_fill_dir(struct dentry *dir,
@@ -245,9 +248,6 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
245 mutex_unlock(&parent->i_mutex); 248 mutex_unlock(&parent->i_mutex);
246 WARN_ON(ret); 249 WARN_ON(ret);
247 250
248 /* We have to give up the mm_struct */
249 spu_forget(ctx);
250
251 return dcache_dir_close(inode, file); 251 return dcache_dir_close(inode, file);
252} 252}
253 253
@@ -450,28 +450,24 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
450 struct spu_context *neighbor; 450 struct spu_context *neighbor;
451 struct path path = {.mnt = mnt, .dentry = dentry}; 451 struct path path = {.mnt = mnt, .dentry = dentry};
452 452
453 ret = -EPERM;
454 if ((flags & SPU_CREATE_NOSCHED) && 453 if ((flags & SPU_CREATE_NOSCHED) &&
455 !capable(CAP_SYS_NICE)) 454 !capable(CAP_SYS_NICE))
456 goto out_unlock; 455 return -EPERM;
457 456
458 ret = -EINVAL;
459 if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE)) 457 if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
460 == SPU_CREATE_ISOLATE) 458 == SPU_CREATE_ISOLATE)
461 goto out_unlock; 459 return -EINVAL;
462 460
463 ret = -ENODEV;
464 if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) 461 if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
465 goto out_unlock; 462 return -ENODEV;
466 463
467 gang = NULL; 464 gang = NULL;
468 neighbor = NULL; 465 neighbor = NULL;
469 affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); 466 affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
470 if (affinity) { 467 if (affinity) {
471 gang = SPUFS_I(inode)->i_gang; 468 gang = SPUFS_I(inode)->i_gang;
472 ret = -EINVAL;
473 if (!gang) 469 if (!gang)
474 goto out_unlock; 470 return -EINVAL;
475 mutex_lock(&gang->aff_mutex); 471 mutex_lock(&gang->aff_mutex);
476 neighbor = spufs_assert_affinity(flags, gang, aff_filp); 472 neighbor = spufs_assert_affinity(flags, gang, aff_filp);
477 if (IS_ERR(neighbor)) { 473 if (IS_ERR(neighbor)) {
@@ -492,22 +488,12 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
492 } 488 }
493 489
494 ret = spufs_context_open(&path); 490 ret = spufs_context_open(&path);
495 if (ret < 0) { 491 if (ret < 0)
496 WARN_ON(spufs_rmdir(inode, dentry)); 492 WARN_ON(spufs_rmdir(inode, dentry));
497 if (affinity)
498 mutex_unlock(&gang->aff_mutex);
499 mutex_unlock(&inode->i_mutex);
500 spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
501 goto out;
502 }
503 493
504out_aff_unlock: 494out_aff_unlock:
505 if (affinity) 495 if (affinity)
506 mutex_unlock(&gang->aff_mutex); 496 mutex_unlock(&gang->aff_mutex);
507out_unlock:
508 mutex_unlock(&inode->i_mutex);
509out:
510 dput(dentry);
511 return ret; 497 return ret;
512} 498}
513 499
@@ -580,18 +566,13 @@ static int spufs_create_gang(struct inode *inode,
580 int ret; 566 int ret;
581 567
582 ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO); 568 ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
583 if (ret) 569 if (!ret) {
584 goto out; 570 ret = spufs_gang_open(&path);
585 571 if (ret < 0) {
586 ret = spufs_gang_open(&path); 572 int err = simple_rmdir(inode, dentry);
587 if (ret < 0) { 573 WARN_ON(err);
588 int err = simple_rmdir(inode, dentry); 574 }
589 WARN_ON(err);
590 } 575 }
591
592out:
593 mutex_unlock(&inode->i_mutex);
594 dput(dentry);
595 return ret; 576 return ret;
596} 577}
597 578
@@ -601,40 +582,32 @@ static struct file_system_type spufs_type;
601long spufs_create(struct path *path, struct dentry *dentry, 582long spufs_create(struct path *path, struct dentry *dentry,
602 unsigned int flags, umode_t mode, struct file *filp) 583 unsigned int flags, umode_t mode, struct file *filp)
603{ 584{
585 struct inode *dir = path->dentry->d_inode;
604 int ret; 586 int ret;
605 587
606 ret = -EINVAL;
607 /* check if we are on spufs */ 588 /* check if we are on spufs */
608 if (path->dentry->d_sb->s_type != &spufs_type) 589 if (path->dentry->d_sb->s_type != &spufs_type)
609 goto out; 590 return -EINVAL;
610 591
611 /* don't accept undefined flags */ 592 /* don't accept undefined flags */
612 if (flags & (~SPU_CREATE_FLAG_ALL)) 593 if (flags & (~SPU_CREATE_FLAG_ALL))
613 goto out; 594 return -EINVAL;
614 595
615 /* only threads can be underneath a gang */ 596 /* only threads can be underneath a gang */
616 if (path->dentry != path->dentry->d_sb->s_root) { 597 if (path->dentry != path->dentry->d_sb->s_root)
617 if ((flags & SPU_CREATE_GANG) || 598 if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
618 !SPUFS_I(path->dentry->d_inode)->i_gang) 599 return -EINVAL;
619 goto out;
620 }
621 600
622 mode &= ~current_umask(); 601 mode &= ~current_umask();
623 602
624 if (flags & SPU_CREATE_GANG) 603 if (flags & SPU_CREATE_GANG)
625 ret = spufs_create_gang(path->dentry->d_inode, 604 ret = spufs_create_gang(dir, dentry, path->mnt, mode);
626 dentry, path->mnt, mode);
627 else 605 else
628 ret = spufs_create_context(path->dentry->d_inode, 606 ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
629 dentry, path->mnt, flags, mode,
630 filp); 607 filp);
631 if (ret >= 0) 608 if (ret >= 0)
632 fsnotify_mkdir(path->dentry->d_inode, dentry); 609 fsnotify_mkdir(dir, dentry);
633 return ret;
634 610
635out:
636 mutex_unlock(&path->dentry->d_inode->i_mutex);
637 dput(dentry);
638 return ret; 611 return ret;
639} 612}
640 613
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 5665dcc382c7..5b7d8ffbf890 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
70 ret = PTR_ERR(dentry); 70 ret = PTR_ERR(dentry);
71 if (!IS_ERR(dentry)) { 71 if (!IS_ERR(dentry)) {
72 ret = spufs_create(&path, dentry, flags, mode, neighbor); 72 ret = spufs_create(&path, dentry, flags, mode, neighbor);
73 path_put(&path); 73 done_path_create(&path, dentry);
74 } 74 }
75 75
76 return ret; 76 return ret;
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index d91a3a0b2325..deb4a456cf83 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -156,9 +156,7 @@ static int dev_mkdir(const char *name, umode_t mode)
156 if (!err) 156 if (!err)
157 /* mark as kernel-created inode */ 157 /* mark as kernel-created inode */
158 dentry->d_inode->i_private = &thread; 158 dentry->d_inode->i_private = &thread;
159 dput(dentry); 159 done_path_create(&path, dentry);
160 mutex_unlock(&path.dentry->d_inode->i_mutex);
161 path_put(&path);
162 return err; 160 return err;
163} 161}
164 162
@@ -218,10 +216,7 @@ static int handle_create(const char *nodename, umode_t mode, struct device *dev)
218 /* mark as kernel-created inode */ 216 /* mark as kernel-created inode */
219 dentry->d_inode->i_private = &thread; 217 dentry->d_inode->i_private = &thread;
220 } 218 }
221 dput(dentry); 219 done_path_create(&path, dentry);
222
223 mutex_unlock(&path.dentry->d_inode->i_mutex);
224 path_put(&path);
225 return err; 220 return err;
226} 221}
227 222
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
index 57bf1d7ee80f..9ab24528f9b9 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
@@ -1188,7 +1188,7 @@ exit:
1188 kfree(buf); 1188 kfree(buf);
1189 /* close file before return */ 1189 /* close file before return */
1190 if (fp) 1190 if (fp)
1191 filp_close(fp, current->files); 1191 filp_close(fp, NULL);
1192 /* restore previous address limit */ 1192 /* restore previous address limit */
1193 set_fs(old_fs); 1193 set_fs(old_fs);
1194 1194
diff --git a/drivers/staging/bcm/Misc.c b/drivers/staging/bcm/Misc.c
index 9a60d4cd2184..f545716c666d 100644
--- a/drivers/staging/bcm/Misc.c
+++ b/drivers/staging/bcm/Misc.c
@@ -157,12 +157,7 @@ static int create_worker_threads(struct bcm_mini_adapter *psAdapter)
157 157
158static struct file *open_firmware_file(struct bcm_mini_adapter *Adapter, const char *path) 158static struct file *open_firmware_file(struct bcm_mini_adapter *Adapter, const char *path)
159{ 159{
160 struct file *flp = NULL; 160 struct file *flp = filp_open(path, O_RDONLY, S_IRWXU);
161 mm_segment_t oldfs;
162 oldfs = get_fs();
163 set_fs(get_ds());
164 flp = filp_open(path, O_RDONLY, S_IRWXU);
165 set_fs(oldfs);
166 if (IS_ERR(flp)) { 161 if (IS_ERR(flp)) {
167 pr_err(DRV_NAME "Unable To Open File %s, err %ld", path, PTR_ERR(flp)); 162 pr_err(DRV_NAME "Unable To Open File %s, err %ld", path, PTR_ERR(flp));
168 flp = NULL; 163 flp = NULL;
@@ -183,14 +178,12 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u
183{ 178{
184 int errorno = 0; 179 int errorno = 0;
185 struct file *flp = NULL; 180 struct file *flp = NULL;
186 mm_segment_t oldfs;
187 struct timeval tv = {0}; 181 struct timeval tv = {0};
188 182
189 flp = open_firmware_file(Adapter, path); 183 flp = open_firmware_file(Adapter, path);
190 if (!flp) { 184 if (!flp) {
191 errorno = -ENOENT;
192 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Unable to Open %s\n", path); 185 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Unable to Open %s\n", path);
193 goto exit_download; 186 return -ENOENT;
194 } 187 }
195 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Opened file is = %s and length =0x%lx to be downloaded at =0x%x", path, (unsigned long)flp->f_dentry->d_inode->i_size, loc); 188 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Opened file is = %s and length =0x%lx to be downloaded at =0x%x", path, (unsigned long)flp->f_dentry->d_inode->i_size, loc);
196 do_gettimeofday(&tv); 189 do_gettimeofday(&tv);
@@ -201,10 +194,7 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u
201 errorno = -EIO; 194 errorno = -EIO;
202 goto exit_download; 195 goto exit_download;
203 } 196 }
204 oldfs = get_fs();
205 set_fs(get_ds());
206 vfs_llseek(flp, 0, 0); 197 vfs_llseek(flp, 0, 0);
207 set_fs(oldfs);
208 if (Adapter->bcm_file_readback_from_chip(Adapter->pvInterfaceAdapter, flp, loc)) { 198 if (Adapter->bcm_file_readback_from_chip(Adapter->pvInterfaceAdapter, flp, loc)) {
209 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Failed to read back firmware!"); 199 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Failed to read back firmware!");
210 errorno = -EIO; 200 errorno = -EIO;
@@ -212,12 +202,7 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u
212 } 202 }
213 203
214exit_download: 204exit_download:
215 oldfs = get_fs(); 205 filp_close(flp, NULL);
216 set_fs(get_ds());
217 if (flp && !(IS_ERR(flp)))
218 filp_close(flp, current->files);
219 set_fs(oldfs);
220
221 return errorno; 206 return errorno;
222} 207}
223 208
@@ -1056,10 +1041,8 @@ OUT:
1056static int bcm_parse_target_params(struct bcm_mini_adapter *Adapter) 1041static int bcm_parse_target_params(struct bcm_mini_adapter *Adapter)
1057{ 1042{
1058 struct file *flp = NULL; 1043 struct file *flp = NULL;
1059 mm_segment_t oldfs = {0};
1060 char *buff; 1044 char *buff;
1061 int len = 0; 1045 int len = 0;
1062 loff_t pos = 0;
1063 1046
1064 buff = kmalloc(BUFFER_1K, GFP_KERNEL); 1047 buff = kmalloc(BUFFER_1K, GFP_KERNEL);
1065 if (!buff) 1048 if (!buff)
@@ -1079,20 +1062,16 @@ static int bcm_parse_target_params(struct bcm_mini_adapter *Adapter)
1079 Adapter->pstargetparams = NULL; 1062 Adapter->pstargetparams = NULL;
1080 return -ENOENT; 1063 return -ENOENT;
1081 } 1064 }
1082 oldfs = get_fs(); 1065 len = kernel_read(flp, 0, buff, BUFFER_1K);
1083 set_fs(get_ds()); 1066 filp_close(flp, NULL);
1084 len = vfs_read(flp, (void __user __force *)buff, BUFFER_1K, &pos);
1085 set_fs(oldfs);
1086 1067
1087 if (len != sizeof(STARGETPARAMS)) { 1068 if (len != sizeof(STARGETPARAMS)) {
1088 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Mismatch in Target Param Structure!\n"); 1069 BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Mismatch in Target Param Structure!\n");
1089 kfree(buff); 1070 kfree(buff);
1090 kfree(Adapter->pstargetparams); 1071 kfree(Adapter->pstargetparams);
1091 Adapter->pstargetparams = NULL; 1072 Adapter->pstargetparams = NULL;
1092 filp_close(flp, current->files);
1093 return -ENOENT; 1073 return -ENOENT;
1094 } 1074 }
1095 filp_close(flp, current->files);
1096 1075
1097 /* Check for autolink in config params */ 1076 /* Check for autolink in config params */
1098 /* 1077 /*
diff --git a/drivers/staging/gdm72xx/sdio_boot.c b/drivers/staging/gdm72xx/sdio_boot.c
index 760efee23d4a..65624bca8b3a 100644
--- a/drivers/staging/gdm72xx/sdio_boot.c
+++ b/drivers/staging/gdm72xx/sdio_boot.c
@@ -66,9 +66,8 @@ static int download_image(struct sdio_func *func, char *img_name)
66 return -ENOENT; 66 return -ENOENT;
67 } 67 }
68 68
69 if (filp->f_dentry) 69 inode = filp->f_dentry->d_inode;
70 inode = filp->f_dentry->d_inode; 70 if (!S_ISREG(inode->i_mode)) {
71 if (!inode || !S_ISREG(inode->i_mode)) {
72 printk(KERN_ERR "Invalid file type: %s\n", img_name); 71 printk(KERN_ERR "Invalid file type: %s\n", img_name);
73 ret = -EINVAL; 72 ret = -EINVAL;
74 goto out; 73 goto out;
@@ -123,7 +122,7 @@ static int download_image(struct sdio_func *func, char *img_name)
123 pno++; 122 pno++;
124 } 123 }
125out: 124out:
126 filp_close(filp, current->files); 125 filp_close(filp, NULL);
127 return ret; 126 return ret;
128} 127}
129 128
diff --git a/drivers/staging/gdm72xx/usb_boot.c b/drivers/staging/gdm72xx/usb_boot.c
index fef290c38db6..e3dbd5a552ca 100644
--- a/drivers/staging/gdm72xx/usb_boot.c
+++ b/drivers/staging/gdm72xx/usb_boot.c
@@ -173,14 +173,12 @@ int usb_boot(struct usb_device *usbdev, u16 pid)
173 filp = filp_open(img_name, O_RDONLY | O_LARGEFILE, 0); 173 filp = filp_open(img_name, O_RDONLY | O_LARGEFILE, 0);
174 if (IS_ERR(filp)) { 174 if (IS_ERR(filp)) {
175 printk(KERN_ERR "Can't find %s.\n", img_name); 175 printk(KERN_ERR "Can't find %s.\n", img_name);
176 set_fs(fs);
177 ret = PTR_ERR(filp); 176 ret = PTR_ERR(filp);
178 goto restore_fs; 177 goto restore_fs;
179 } 178 }
180 179
181 if (filp->f_dentry) 180 inode = filp->f_dentry->d_inode;
182 inode = filp->f_dentry->d_inode; 181 if (!S_ISREG(inode->i_mode)) {
183 if (!inode || !S_ISREG(inode->i_mode)) {
184 printk(KERN_ERR "Invalid file type: %s\n", img_name); 182 printk(KERN_ERR "Invalid file type: %s\n", img_name);
185 ret = -EINVAL; 183 ret = -EINVAL;
186 goto out; 184 goto out;
@@ -262,7 +260,7 @@ int usb_boot(struct usb_device *usbdev, u16 pid)
262 ret = -EINVAL; 260 ret = -EINVAL;
263 } 261 }
264out: 262out:
265 filp_close(filp, current->files); 263 filp_close(filp, NULL);
266 264
267restore_fs: 265restore_fs:
268 set_fs(fs); 266 set_fs(fs);
@@ -322,13 +320,11 @@ static int em_download_image(struct usb_device *usbdev, char *path,
322 goto restore_fs; 320 goto restore_fs;
323 } 321 }
324 322
325 if (filp->f_dentry) { 323 inode = filp->f_dentry->d_inode;
326 inode = filp->f_dentry->d_inode; 324 if (!S_ISREG(inode->i_mode)) {
327 if (!inode || !S_ISREG(inode->i_mode)) { 325 printk(KERN_ERR "Invalid file type: %s\n", path);
328 printk(KERN_ERR "Invalid file type: %s\n", path); 326 ret = -EINVAL;
329 ret = -EINVAL; 327 goto out;
330 goto out;
331 }
332 } 328 }
333 329
334 buf = kmalloc(DOWNLOAD_CHUCK + pad_size, GFP_KERNEL); 330 buf = kmalloc(DOWNLOAD_CHUCK + pad_size, GFP_KERNEL);
@@ -364,7 +360,7 @@ static int em_download_image(struct usb_device *usbdev, char *path,
364 goto out; 360 goto out;
365 361
366out: 362out:
367 filp_close(filp, current->files); 363 filp_close(filp, NULL);
368 364
369restore_fs: 365restore_fs:
370 set_fs(fs); 366 set_fs(fs);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 9e2100551c78..cbb5aaf3e567 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -109,46 +109,29 @@ static struct se_device *fd_create_virtdevice(
109 struct se_subsystem_dev *se_dev, 109 struct se_subsystem_dev *se_dev,
110 void *p) 110 void *p)
111{ 111{
112 char *dev_p = NULL;
113 struct se_device *dev; 112 struct se_device *dev;
114 struct se_dev_limits dev_limits; 113 struct se_dev_limits dev_limits;
115 struct queue_limits *limits; 114 struct queue_limits *limits;
116 struct fd_dev *fd_dev = p; 115 struct fd_dev *fd_dev = p;
117 struct fd_host *fd_host = hba->hba_ptr; 116 struct fd_host *fd_host = hba->hba_ptr;
118 mm_segment_t old_fs;
119 struct file *file; 117 struct file *file;
120 struct inode *inode = NULL; 118 struct inode *inode = NULL;
121 int dev_flags = 0, flags, ret = -EINVAL; 119 int dev_flags = 0, flags, ret = -EINVAL;
122 120
123 memset(&dev_limits, 0, sizeof(struct se_dev_limits)); 121 memset(&dev_limits, 0, sizeof(struct se_dev_limits));
124 122
125 old_fs = get_fs();
126 set_fs(get_ds());
127 dev_p = getname(fd_dev->fd_dev_name);
128 set_fs(old_fs);
129
130 if (IS_ERR(dev_p)) {
131 pr_err("getname(%s) failed: %lu\n",
132 fd_dev->fd_dev_name, IS_ERR(dev_p));
133 ret = PTR_ERR(dev_p);
134 goto fail;
135 }
136 /* 123 /*
137 * Use O_DSYNC by default instead of O_SYNC to forgo syncing 124 * Use O_DSYNC by default instead of O_SYNC to forgo syncing
138 * of pure timestamp updates. 125 * of pure timestamp updates.
139 */ 126 */
140 flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC; 127 flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC;
141 128
142 file = filp_open(dev_p, flags, 0600); 129 file = filp_open(fd_dev->fd_dev_name, flags, 0600);
143 if (IS_ERR(file)) { 130 if (IS_ERR(file)) {
144 pr_err("filp_open(%s) failed\n", dev_p); 131 pr_err("filp_open(%s) failed\n", fd_dev->fd_dev_name);
145 ret = PTR_ERR(file); 132 ret = PTR_ERR(file);
146 goto fail; 133 goto fail;
147 } 134 }
148 if (!file || !file->f_dentry) {
149 pr_err("filp_open(%s) failed\n", dev_p);
150 goto fail;
151 }
152 fd_dev->fd_file = file; 135 fd_dev->fd_file = file;
153 /* 136 /*
154 * If using a block backend with this struct file, we extract 137 * If using a block backend with this struct file, we extract
@@ -212,14 +195,12 @@ static struct se_device *fd_create_virtdevice(
212 " %llu total bytes\n", fd_host->fd_host_id, fd_dev->fd_dev_id, 195 " %llu total bytes\n", fd_host->fd_host_id, fd_dev->fd_dev_id,
213 fd_dev->fd_dev_name, fd_dev->fd_dev_size); 196 fd_dev->fd_dev_name, fd_dev->fd_dev_size);
214 197
215 putname(dev_p);
216 return dev; 198 return dev;
217fail: 199fail:
218 if (fd_dev->fd_file) { 200 if (fd_dev->fd_file) {
219 filp_close(fd_dev->fd_file, NULL); 201 filp_close(fd_dev->fd_file, NULL);
220 fd_dev->fd_file = NULL; 202 fd_dev->fd_file = NULL;
221 } 203 }
222 putname(dev_p);
223 return ERR_PTR(ret); 204 return ERR_PTR(ret);
224} 205}
225 206
@@ -452,14 +433,11 @@ static ssize_t fd_set_configfs_dev_params(
452 token = match_token(ptr, tokens, args); 433 token = match_token(ptr, tokens, args);
453 switch (token) { 434 switch (token) {
454 case Opt_fd_dev_name: 435 case Opt_fd_dev_name:
455 arg_p = match_strdup(&args[0]); 436 if (match_strlcpy(fd_dev->fd_dev_name, &args[0],
456 if (!arg_p) { 437 FD_MAX_DEV_NAME) == 0) {
457 ret = -ENOMEM; 438 ret = -EINVAL;
458 break; 439 break;
459 } 440 }
460 snprintf(fd_dev->fd_dev_name, FD_MAX_DEV_NAME,
461 "%s", arg_p);
462 kfree(arg_p);
463 pr_debug("FILEIO: Referencing Path: %s\n", 441 pr_debug("FILEIO: Referencing Path: %s\n",
464 fd_dev->fd_dev_name); 442 fd_dev->fd_dev_name);
465 fd_dev->fbd_flags |= FBDF_HAS_PATH; 443 fd_dev->fbd_flags |= FBDF_HAS_PATH;
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index ae8b18869b8c..8d9bcd8207c8 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -656,9 +656,8 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
656 if (!(filp->f_mode & FMODE_WRITE)) 656 if (!(filp->f_mode & FMODE_WRITE))
657 ro = 1; 657 ro = 1;
658 658
659 if (filp->f_path.dentry) 659 inode = filp->f_path.dentry->d_inode;
660 inode = filp->f_path.dentry->d_inode; 660 if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
661 if (!inode || (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
662 LINFO(curlun, "invalid file type: %s\n", filename); 661 LINFO(curlun, "invalid file type: %s\n", filename);
663 goto out; 662 goto out;
664 } 663 }
@@ -667,7 +666,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
667 * If we can't read the file, it's no good. 666 * If we can't read the file, it's no good.
668 * If we can't write the file, use it read-only. 667 * If we can't write the file, use it read-only.
669 */ 668 */
670 if (!filp->f_op || !(filp->f_op->read || filp->f_op->aio_read)) { 669 if (!(filp->f_op->read || filp->f_op->aio_read)) {
671 LINFO(curlun, "file not readable: %s\n", filename); 670 LINFO(curlun, "file not readable: %s\n", filename);
672 goto out; 671 goto out;
673 } 672 }
@@ -712,7 +711,6 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
712 if (fsg_lun_is_open(curlun)) 711 if (fsg_lun_is_open(curlun))
713 fsg_lun_close(curlun); 712 fsg_lun_close(curlun);
714 713
715 get_file(filp);
716 curlun->blksize = blksize; 714 curlun->blksize = blksize;
717 curlun->blkbits = blkbits; 715 curlun->blkbits = blkbits;
718 curlun->ro = ro; 716 curlun->ro = ro;
@@ -720,10 +718,10 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
720 curlun->file_length = size; 718 curlun->file_length = size;
721 curlun->num_sectors = num_sectors; 719 curlun->num_sectors = num_sectors;
722 LDBG(curlun, "open backing file: %s\n", filename); 720 LDBG(curlun, "open backing file: %s\n", filename);
723 rc = 0; 721 return 0;
724 722
725out: 723out:
726 filp_close(filp, current->files); 724 fput(filp);
727 return rc; 725 return rc;
728} 726}
729 727
diff --git a/drivers/usb/gadget/u_uac1.c b/drivers/usb/gadget/u_uac1.c
index af9898982059..e0c5e88e03ed 100644
--- a/drivers/usb/gadget/u_uac1.c
+++ b/drivers/usb/gadget/u_uac1.c
@@ -275,17 +275,17 @@ static int gaudio_close_snd_dev(struct gaudio *gau)
275 /* Close control device */ 275 /* Close control device */
276 snd = &gau->control; 276 snd = &gau->control;
277 if (snd->filp) 277 if (snd->filp)
278 filp_close(snd->filp, current->files); 278 filp_close(snd->filp, NULL);
279 279
280 /* Close PCM playback device and setup substream */ 280 /* Close PCM playback device and setup substream */
281 snd = &gau->playback; 281 snd = &gau->playback;
282 if (snd->filp) 282 if (snd->filp)
283 filp_close(snd->filp, current->files); 283 filp_close(snd->filp, NULL);
284 284
285 /* Close PCM capture device and setup substream */ 285 /* Close PCM capture device and setup substream */
286 snd = &gau->capture; 286 snd = &gau->capture;
287 if (snd->filp) 287 if (snd->filp)
288 filp_close(snd->filp, current->files); 288 filp_close(snd->filp, NULL);
289 289
290 return 0; 290 return 0;
291} 291}
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 1ddeb11659d4..64cda560c488 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -104,6 +104,8 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
104 deferred framebuffer IO. then if userspace touches a page 104 deferred framebuffer IO. then if userspace touches a page
105 again, we repeat the same scheme */ 105 again, we repeat the same scheme */
106 106
107 file_update_time(vma->vm_file);
108
107 /* protect against the workqueue changing the page list */ 109 /* protect against the workqueue changing the page list */
108 mutex_lock(&fbdefio->lock); 110 mutex_lock(&fbdefio->lock);
109 111
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index fc06fd27065e..dd6f7ee1e312 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
610 p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", 610 p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
611 page, (unsigned long)filp->private_data); 611 page, (unsigned long)filp->private_data);
612 612
613 /* Update file times before taking page lock */
614 file_update_time(filp);
615
613 v9inode = V9FS_I(inode); 616 v9inode = V9FS_I(inode);
614 /* make sure the cache has finished storing the page */ 617 /* make sure the cache has finished storing the page */
615 v9fs_fscache_wait_on_page_write(inode, page); 618 v9fs_fscache_wait_on_page_write(inode, page);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fadeba6a5db9..62e0cafd6e25 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1614,8 +1614,6 @@ static int cleaner_kthread(void *arg)
1614 struct btrfs_root *root = arg; 1614 struct btrfs_root *root = arg;
1615 1615
1616 do { 1616 do {
1617 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1618
1619 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1617 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1620 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1618 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1621 btrfs_run_delayed_iputs(root); 1619 btrfs_run_delayed_iputs(root);
@@ -1647,7 +1645,6 @@ static int transaction_kthread(void *arg)
1647 do { 1645 do {
1648 cannot_commit = false; 1646 cannot_commit = false;
1649 delay = HZ * 30; 1647 delay = HZ * 30;
1650 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1651 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1648 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1652 1649
1653 spin_lock(&root->fs_info->trans_lock); 1650 spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9aa01ec2138d..5caf285c6e4d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1379 ssize_t err = 0; 1379 ssize_t err = 0;
1380 size_t count, ocount; 1380 size_t count, ocount;
1381 1381
1382 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 1382 sb_start_write(inode->i_sb);
1383 1383
1384 mutex_lock(&inode->i_mutex); 1384 mutex_lock(&inode->i_mutex);
1385 1385
@@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1469 num_written = err; 1469 num_written = err;
1470 } 1470 }
1471out: 1471out:
1472 sb_end_write(inode->i_sb);
1472 current->backing_dev_info = NULL; 1473 current->backing_dev_info = NULL;
1473 return num_written ? num_written : err; 1474 return num_written ? num_written : err;
1474} 1475}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 48bdfd2591c2..83baec24946d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6629,6 +6629,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6629 u64 page_start; 6629 u64 page_start;
6630 u64 page_end; 6630 u64 page_end;
6631 6631
6632 sb_start_pagefault(inode->i_sb);
6632 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 6633 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
6633 if (!ret) { 6634 if (!ret) {
6634 ret = file_update_time(vma->vm_file); 6635 ret = file_update_time(vma->vm_file);
@@ -6718,12 +6719,15 @@ again:
6718 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); 6719 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
6719 6720
6720out_unlock: 6721out_unlock:
6721 if (!ret) 6722 if (!ret) {
6723 sb_end_pagefault(inode->i_sb);
6722 return VM_FAULT_LOCKED; 6724 return VM_FAULT_LOCKED;
6725 }
6723 unlock_page(page); 6726 unlock_page(page);
6724out: 6727out:
6725 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 6728 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
6726out_noreserve: 6729out_noreserve:
6730 sb_end_pagefault(inode->i_sb);
6727 return ret; 6731 return ret;
6728} 6732}
6729 6733
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 43f0012016e3..bc2f6ffff3cf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -195,6 +195,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
195 if (!inode_owner_or_capable(inode)) 195 if (!inode_owner_or_capable(inode))
196 return -EACCES; 196 return -EACCES;
197 197
198 ret = mnt_want_write_file(file);
199 if (ret)
200 return ret;
201
198 mutex_lock(&inode->i_mutex); 202 mutex_lock(&inode->i_mutex);
199 203
200 ip_oldflags = ip->flags; 204 ip_oldflags = ip->flags;
@@ -209,10 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
209 } 213 }
210 } 214 }
211 215
212 ret = mnt_want_write_file(file);
213 if (ret)
214 goto out_unlock;
215
216 if (flags & FS_SYNC_FL) 216 if (flags & FS_SYNC_FL)
217 ip->flags |= BTRFS_INODE_SYNC; 217 ip->flags |= BTRFS_INODE_SYNC;
218 else 218 else
@@ -275,9 +275,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
275 inode->i_flags = i_oldflags; 275 inode->i_flags = i_oldflags;
276 } 276 }
277 277
278 mnt_drop_write_file(file);
279 out_unlock: 278 out_unlock:
280 mutex_unlock(&inode->i_mutex); 279 mutex_unlock(&inode->i_mutex);
280 mnt_drop_write_file(file);
281 return ret; 281 return ret;
282} 282}
283 283
@@ -664,6 +664,10 @@ static noinline int btrfs_mksubvol(struct path *parent,
664 struct dentry *dentry; 664 struct dentry *dentry;
665 int error; 665 int error;
666 666
667 error = mnt_want_write(parent->mnt);
668 if (error)
669 return error;
670
667 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 671 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
668 672
669 dentry = lookup_one_len(name, parent->dentry, namelen); 673 dentry = lookup_one_len(name, parent->dentry, namelen);
@@ -699,6 +703,7 @@ out_dput:
699 dput(dentry); 703 dput(dentry);
700out_unlock: 704out_unlock:
701 mutex_unlock(&dir->i_mutex); 705 mutex_unlock(&dir->i_mutex);
706 mnt_drop_write(parent->mnt);
702 return error; 707 return error;
703} 708}
704 709
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7ac7cdcc294e..17be3dedacba 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -335,6 +335,8 @@ again:
335 if (!h) 335 if (!h)
336 return ERR_PTR(-ENOMEM); 336 return ERR_PTR(-ENOMEM);
337 337
338 sb_start_intwrite(root->fs_info->sb);
339
338 if (may_wait_transaction(root, type)) 340 if (may_wait_transaction(root, type))
339 wait_current_trans(root); 341 wait_current_trans(root);
340 342
@@ -345,6 +347,7 @@ again:
345 } while (ret == -EBUSY); 347 } while (ret == -EBUSY);
346 348
347 if (ret < 0) { 349 if (ret < 0) {
350 sb_end_intwrite(root->fs_info->sb);
348 kmem_cache_free(btrfs_trans_handle_cachep, h); 351 kmem_cache_free(btrfs_trans_handle_cachep, h);
349 return ERR_PTR(ret); 352 return ERR_PTR(ret);
350 } 353 }
@@ -548,6 +551,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
548 btrfs_trans_release_metadata(trans, root); 551 btrfs_trans_release_metadata(trans, root);
549 trans->block_rsv = NULL; 552 trans->block_rsv = NULL;
550 553
554 sb_end_intwrite(root->fs_info->sb);
555
551 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && 556 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
552 should_end_transaction(trans, root)) { 557 should_end_transaction(trans, root)) {
553 trans->transaction->blocked = 1; 558 trans->transaction->blocked = 1;
@@ -1578,6 +1583,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1578 put_transaction(cur_trans); 1583 put_transaction(cur_trans);
1579 put_transaction(cur_trans); 1584 put_transaction(cur_trans);
1580 1585
1586 sb_end_intwrite(root->fs_info->sb);
1587
1581 trace_btrfs_transaction_commit(root); 1588 trace_btrfs_transaction_commit(root);
1582 1589
1583 btrfs_scrub_continue(root); 1590 btrfs_scrub_continue(root);
diff --git a/fs/buffer.c b/fs/buffer.c
index c7062c896d7c..9f6d2e41281d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write);
2306 * beyond EOF, then the page is guaranteed safe against truncation until we 2306 * beyond EOF, then the page is guaranteed safe against truncation until we
2307 * unlock the page. 2307 * unlock the page.
2308 * 2308 *
2309 * Direct callers of this function should call vfs_check_frozen() so that page 2309 * Direct callers of this function should protect against filesystem freezing
2310 * fault does not busyloop until the fs is thawed. 2310 * using sb_start_write() - sb_end_write() functions.
2311 */ 2311 */
2312int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 2312int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2313 get_block_t get_block) 2313 get_block_t get_block)
@@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2318 loff_t size; 2318 loff_t size;
2319 int ret; 2319 int ret;
2320 2320
2321 /*
2322 * Update file times before taking page lock. We may end up failing the
2323 * fault so this update may be superfluous but who really cares...
2324 */
2325 file_update_time(vma->vm_file);
2326
2321 lock_page(page); 2327 lock_page(page);
2322 size = i_size_read(inode); 2328 size = i_size_read(inode);
2323 if ((page->mapping != inode->i_mapping) || 2329 if ((page->mapping != inode->i_mapping) ||
@@ -2339,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2339 2345
2340 if (unlikely(ret < 0)) 2346 if (unlikely(ret < 0))
2341 goto out_unlock; 2347 goto out_unlock;
2342 /*
2343 * Freezing in progress? We check after the page is marked dirty and
2344 * with page lock held so if the test here fails, we are sure freezing
2345 * code will wait during syncing until the page fault is done - at that
2346 * point page will be dirty and unlocked so freezing code will write it
2347 * and writeprotect it again.
2348 */
2349 set_page_dirty(page); 2348 set_page_dirty(page);
2350 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2351 ret = -EAGAIN;
2352 goto out_unlock;
2353 }
2354 wait_on_page_writeback(page); 2349 wait_on_page_writeback(page);
2355 return 0; 2350 return 0;
2356out_unlock: 2351out_unlock:
@@ -2365,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2365 int ret; 2360 int ret;
2366 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; 2361 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2367 2362
2368 /* 2363 sb_start_pagefault(sb);
2369 * This check is racy but catches the common case. The check in
2370 * __block_page_mkwrite() is reliable.
2371 */
2372 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2373 ret = __block_page_mkwrite(vma, vmf, get_block); 2364 ret = __block_page_mkwrite(vma, vmf, get_block);
2365 sb_end_pagefault(sb);
2374 return block_page_mkwrite_return(ret); 2366 return block_page_mkwrite_return(ret);
2375} 2367}
2376EXPORT_SYMBOL(block_page_mkwrite); 2368EXPORT_SYMBOL(block_page_mkwrite);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8b67304e4b80..452e71a1b753 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1184 loff_t size, len; 1184 loff_t size, len;
1185 int ret; 1185 int ret;
1186 1186
1187 /* Update time before taking page lock */
1188 file_update_time(vma->vm_file);
1189
1187 size = i_size_read(inode); 1190 size = i_size_read(inode);
1188 if (off + PAGE_CACHE_SIZE <= size) 1191 if (off + PAGE_CACHE_SIZE <= size)
1189 len = PAGE_CACHE_SIZE; 1192 len = PAGE_CACHE_SIZE;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ffa2be57804d..c3ca12c33ca2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -318,21 +318,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
318 struct vfsmount *lower_mnt; 318 struct vfsmount *lower_mnt;
319 int rc = 0; 319 int rc = 0;
320 320
321 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
322 fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
323 BUG_ON(!lower_dentry->d_count);
324
325 dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); 321 dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
326 ecryptfs_set_dentry_private(dentry, dentry_info);
327 if (!dentry_info) { 322 if (!dentry_info) {
328 printk(KERN_ERR "%s: Out of memory whilst attempting " 323 printk(KERN_ERR "%s: Out of memory whilst attempting "
329 "to allocate ecryptfs_dentry_info struct\n", 324 "to allocate ecryptfs_dentry_info struct\n",
330 __func__); 325 __func__);
331 dput(lower_dentry); 326 dput(lower_dentry);
332 mntput(lower_mnt);
333 d_drop(dentry);
334 return -ENOMEM; 327 return -ENOMEM;
335 } 328 }
329
330 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
331 fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
332 BUG_ON(!lower_dentry->d_count);
333
334 ecryptfs_set_dentry_private(dentry, dentry_info);
336 ecryptfs_set_dentry_lower(dentry, lower_dentry); 335 ecryptfs_set_dentry_lower(dentry, lower_dentry);
337 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); 336 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
338 337
@@ -381,12 +380,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
381 struct dentry *lower_dir_dentry, *lower_dentry; 380 struct dentry *lower_dir_dentry, *lower_dentry;
382 int rc = 0; 381 int rc = 0;
383 382
384 if ((ecryptfs_dentry->d_name.len == 1
385 && !strcmp(ecryptfs_dentry->d_name.name, "."))
386 || (ecryptfs_dentry->d_name.len == 2
387 && !strcmp(ecryptfs_dentry->d_name.name, ".."))) {
388 goto out_d_drop;
389 }
390 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 383 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
391 mutex_lock(&lower_dir_dentry->d_inode->i_mutex); 384 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
392 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, 385 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
@@ -397,8 +390,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
397 rc = PTR_ERR(lower_dentry); 390 rc = PTR_ERR(lower_dentry);
398 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 391 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
399 "[%d] on lower_dentry = [%s]\n", __func__, rc, 392 "[%d] on lower_dentry = [%s]\n", __func__, rc,
400 encrypted_and_encoded_name); 393 ecryptfs_dentry->d_name.name);
401 goto out_d_drop; 394 goto out;
402 } 395 }
403 if (lower_dentry->d_inode) 396 if (lower_dentry->d_inode)
404 goto interpose; 397 goto interpose;
@@ -415,7 +408,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
415 if (rc) { 408 if (rc) {
416 printk(KERN_ERR "%s: Error attempting to encrypt and encode " 409 printk(KERN_ERR "%s: Error attempting to encrypt and encode "
417 "filename; rc = [%d]\n", __func__, rc); 410 "filename; rc = [%d]\n", __func__, rc);
418 goto out_d_drop; 411 goto out;
419 } 412 }
420 mutex_lock(&lower_dir_dentry->d_inode->i_mutex); 413 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
421 lower_dentry = lookup_one_len(encrypted_and_encoded_name, 414 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
@@ -427,14 +420,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
427 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 420 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
428 "[%d] on lower_dentry = [%s]\n", __func__, rc, 421 "[%d] on lower_dentry = [%s]\n", __func__, rc,
429 encrypted_and_encoded_name); 422 encrypted_and_encoded_name);
430 goto out_d_drop; 423 goto out;
431 } 424 }
432interpose: 425interpose:
433 rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, 426 rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry,
434 ecryptfs_dir_inode); 427 ecryptfs_dir_inode);
435 goto out;
436out_d_drop:
437 d_drop(ecryptfs_dentry);
438out: 428out:
439 kfree(encrypted_and_encoded_name); 429 kfree(encrypted_and_encoded_name);
440 return ERR_PTR(rc); 430 return ERR_PTR(rc);
diff --git a/fs/exec.c b/fs/exec.c
index 3684353ebd5f..574cf4de4ec3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file)
2069 */ 2069 */
2070static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 2070static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2071{ 2071{
2072 struct file *rp, *wp; 2072 struct file *files[2];
2073 struct fdtable *fdt; 2073 struct fdtable *fdt;
2074 struct coredump_params *cp = (struct coredump_params *)info->data; 2074 struct coredump_params *cp = (struct coredump_params *)info->data;
2075 struct files_struct *cf = current->files; 2075 struct files_struct *cf = current->files;
2076 int err = create_pipe_files(files, 0);
2077 if (err)
2078 return err;
2076 2079
2077 wp = create_write_pipe(0); 2080 cp->file = files[1];
2078 if (IS_ERR(wp))
2079 return PTR_ERR(wp);
2080
2081 rp = create_read_pipe(wp, 0);
2082 if (IS_ERR(rp)) {
2083 free_write_pipe(wp);
2084 return PTR_ERR(rp);
2085 }
2086
2087 cp->file = wp;
2088 2081
2089 sys_close(0); 2082 sys_close(0);
2090 fd_install(0, rp); 2083 fd_install(0, files[0]);
2091 spin_lock(&cf->file_lock); 2084 spin_lock(&cf->file_lock);
2092 fdt = files_fdtable(cf); 2085 fdt = files_fdtable(cf);
2093 __set_open_fd(0, fdt); 2086 __set_open_fd(0, fdt);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 264d315f6c47..6363ac66fafa 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode)
79 truncate_inode_pages(&inode->i_data, 0); 79 truncate_inode_pages(&inode->i_data, 0);
80 80
81 if (want_delete) { 81 if (want_delete) {
82 sb_start_intwrite(inode->i_sb);
82 /* set dtime */ 83 /* set dtime */
83 EXT2_I(inode)->i_dtime = get_seconds(); 84 EXT2_I(inode)->i_dtime = get_seconds();
84 mark_inode_dirty(inode); 85 mark_inode_dirty(inode);
@@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode)
98 if (unlikely(rsv)) 99 if (unlikely(rsv))
99 kfree(rsv); 100 kfree(rsv);
100 101
101 if (want_delete) 102 if (want_delete) {
102 ext2_free_inode(inode); 103 ext2_free_inode(inode);
104 sb_end_intwrite(inode->i_sb);
105 }
103} 106}
104 107
105typedef struct { 108typedef struct {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f311d27b16f..af74d9e27b71 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb,
42static int ext2_remount (struct super_block * sb, int * flags, char * data); 42static int ext2_remount (struct super_block * sb, int * flags, char * data);
43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); 43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
44static int ext2_sync_fs(struct super_block *sb, int wait); 44static int ext2_sync_fs(struct super_block *sb, int wait);
45static int ext2_freeze(struct super_block *sb);
46static int ext2_unfreeze(struct super_block *sb);
45 47
46void ext2_error(struct super_block *sb, const char *function, 48void ext2_error(struct super_block *sb, const char *function,
47 const char *fmt, ...) 49 const char *fmt, ...)
@@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = {
305 .evict_inode = ext2_evict_inode, 307 .evict_inode = ext2_evict_inode,
306 .put_super = ext2_put_super, 308 .put_super = ext2_put_super,
307 .sync_fs = ext2_sync_fs, 309 .sync_fs = ext2_sync_fs,
310 .freeze_fs = ext2_freeze,
311 .unfreeze_fs = ext2_unfreeze,
308 .statfs = ext2_statfs, 312 .statfs = ext2_statfs,
309 .remount_fs = ext2_remount, 313 .remount_fs = ext2_remount,
310 .show_options = ext2_show_options, 314 .show_options = ext2_show_options,
@@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
1200 return 0; 1204 return 0;
1201} 1205}
1202 1206
1207static int ext2_freeze(struct super_block *sb)
1208{
1209 struct ext2_sb_info *sbi = EXT2_SB(sb);
1210
1211 /*
1212 * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared
1213 * because we have unattached inodes and thus filesystem is not fully
1214 * consistent.
1215 */
1216 if (atomic_long_read(&sb->s_remove_count)) {
1217 ext2_sync_fs(sb, 1);
1218 return 0;
1219 }
1220 /* Set EXT2_FS_VALID flag */
1221 spin_lock(&sbi->s_lock);
1222 sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state);
1223 spin_unlock(&sbi->s_lock);
1224 ext2_sync_super(sb, sbi->s_es, 1);
1225
1226 return 0;
1227}
1228
1229static int ext2_unfreeze(struct super_block *sb)
1230{
1231 /* Just write sb to clear EXT2_VALID_FS flag */
1232 ext2_write_super(sb);
1233
1234 return 0;
1235}
1203 1236
1204void ext2_write_super(struct super_block *sb) 1237void ext2_write_super(struct super_block *sb)
1205{ 1238{
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89b59cb7f9b8..6324f74e0342 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode)
233 if (is_bad_inode(inode)) 233 if (is_bad_inode(inode))
234 goto no_delete; 234 goto no_delete;
235 235
236 /*
237 * Protect us against freezing - iput() caller didn't have to have any
238 * protection against it
239 */
240 sb_start_intwrite(inode->i_sb);
236 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); 241 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
237 if (IS_ERR(handle)) { 242 if (IS_ERR(handle)) {
238 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 243 ext4_std_error(inode->i_sb, PTR_ERR(handle));
@@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode)
242 * cleaned up. 247 * cleaned up.
243 */ 248 */
244 ext4_orphan_del(NULL, inode); 249 ext4_orphan_del(NULL, inode);
250 sb_end_intwrite(inode->i_sb);
245 goto no_delete; 251 goto no_delete;
246 } 252 }
247 253
@@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode)
273 stop_handle: 279 stop_handle:
274 ext4_journal_stop(handle); 280 ext4_journal_stop(handle);
275 ext4_orphan_del(NULL, inode); 281 ext4_orphan_del(NULL, inode);
282 sb_end_intwrite(inode->i_sb);
276 goto no_delete; 283 goto no_delete;
277 } 284 }
278 } 285 }
@@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode)
301 else 308 else
302 ext4_free_inode(handle, inode); 309 ext4_free_inode(handle, inode);
303 ext4_journal_stop(handle); 310 ext4_journal_stop(handle);
311 sb_end_intwrite(inode->i_sb);
304 return; 312 return;
305no_delete: 313no_delete:
306 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ 314 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -4779,11 +4787,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4779 get_block_t *get_block; 4787 get_block_t *get_block;
4780 int retries = 0; 4788 int retries = 0;
4781 4789
4782 /* 4790 sb_start_pagefault(inode->i_sb);
4783 * This check is racy but catches the common case. We rely on
4784 * __block_page_mkwrite() to do a reliable check.
4785 */
4786 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
4787 /* Delalloc case is easy... */ 4791 /* Delalloc case is easy... */
4788 if (test_opt(inode->i_sb, DELALLOC) && 4792 if (test_opt(inode->i_sb, DELALLOC) &&
4789 !ext4_should_journal_data(inode) && 4793 !ext4_should_journal_data(inode) &&
@@ -4851,5 +4855,6 @@ retry_alloc:
4851out_ret: 4855out_ret:
4852 ret = block_page_mkwrite_return(ret); 4856 ret = block_page_mkwrite_return(ret);
4853out: 4857out:
4858 sb_end_pagefault(inode->i_sb);
4854 return ret; 4859 return ret;
4855} 4860}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index f99a1311e847..fe7c63f4717e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
44{ 44{
45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
46 46
47 /*
48 * We protect against freezing so that we don't create dirty buffers
49 * on frozen filesystem.
50 */
51 sb_start_write(sb);
47 ext4_mmp_csum_set(sb, mmp); 52 ext4_mmp_csum_set(sb, mmp);
48 mark_buffer_dirty(bh); 53 mark_buffer_dirty(bh);
49 lock_buffer(bh); 54 lock_buffer(bh);
@@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
51 get_bh(bh); 56 get_bh(bh);
52 submit_bh(WRITE_SYNC, bh); 57 submit_bh(WRITE_SYNC, bh);
53 wait_on_buffer(bh); 58 wait_on_buffer(bh);
59 sb_end_write(sb);
54 if (unlikely(!buffer_uptodate(bh))) 60 if (unlikely(!buffer_uptodate(bh)))
55 return 1; 61 return 1;
56 62
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2d51cd9af225..d76ec8277d3f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -331,33 +331,17 @@ static void ext4_put_nojournal(handle_t *handle)
331 * journal_end calls result in the superblock being marked dirty, so 331 * journal_end calls result in the superblock being marked dirty, so
332 * that sync() will call the filesystem's write_super callback if 332 * that sync() will call the filesystem's write_super callback if
333 * appropriate. 333 * appropriate.
334 *
335 * To avoid j_barrier hold in userspace when a user calls freeze(),
336 * ext4 prevents a new handle from being started by s_frozen, which
337 * is in an upper layer.
338 */ 334 */
339handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 335handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
340{ 336{
341 journal_t *journal; 337 journal_t *journal;
342 handle_t *handle;
343 338
344 trace_ext4_journal_start(sb, nblocks, _RET_IP_); 339 trace_ext4_journal_start(sb, nblocks, _RET_IP_);
345 if (sb->s_flags & MS_RDONLY) 340 if (sb->s_flags & MS_RDONLY)
346 return ERR_PTR(-EROFS); 341 return ERR_PTR(-EROFS);
347 342
343 WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
348 journal = EXT4_SB(sb)->s_journal; 344 journal = EXT4_SB(sb)->s_journal;
349 handle = ext4_journal_current_handle();
350
351 /*
352 * If a handle has been started, it should be allowed to
353 * finish, otherwise deadlock could happen between freeze
354 * and others(e.g. truncate) due to the restart of the
355 * journal handle if the filesystem is forzen and active
356 * handles are not stopped.
357 */
358 if (!handle)
359 vfs_check_frozen(sb, SB_FREEZE_TRANS);
360
361 if (!journal) 345 if (!journal)
362 return ext4_get_nojournal(); 346 return ext4_get_nojournal();
363 /* 347 /*
@@ -2747,6 +2731,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2747 sb = elr->lr_super; 2731 sb = elr->lr_super;
2748 ngroups = EXT4_SB(sb)->s_groups_count; 2732 ngroups = EXT4_SB(sb)->s_groups_count;
2749 2733
2734 sb_start_write(sb);
2750 for (group = elr->lr_next_group; group < ngroups; group++) { 2735 for (group = elr->lr_next_group; group < ngroups; group++) {
2751 gdp = ext4_get_group_desc(sb, group, NULL); 2736 gdp = ext4_get_group_desc(sb, group, NULL);
2752 if (!gdp) { 2737 if (!gdp) {
@@ -2773,6 +2758,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2773 elr->lr_next_sched = jiffies + elr->lr_timeout; 2758 elr->lr_next_sched = jiffies + elr->lr_timeout;
2774 elr->lr_next_group = group + 1; 2759 elr->lr_next_group = group + 1;
2775 } 2760 }
2761 sb_end_write(sb);
2776 2762
2777 return ret; 2763 return ret;
2778} 2764}
@@ -4460,10 +4446,8 @@ int ext4_force_commit(struct super_block *sb)
4460 return 0; 4446 return 0;
4461 4447
4462 journal = EXT4_SB(sb)->s_journal; 4448 journal = EXT4_SB(sb)->s_journal;
4463 if (journal) { 4449 if (journal)
4464 vfs_check_frozen(sb, SB_FREEZE_TRANS);
4465 ret = ext4_journal_force_commit(journal); 4450 ret = ext4_journal_force_commit(journal);
4466 }
4467 4451
4468 return ret; 4452 return ret;
4469} 4453}
@@ -4493,9 +4477,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4493 * gives us a chance to flush the journal completely and mark the fs clean. 4477 * gives us a chance to flush the journal completely and mark the fs clean.
4494 * 4478 *
4495 * Note that only this function cannot bring a filesystem to be in a clean 4479 * Note that only this function cannot bring a filesystem to be in a clean
4496 * state independently, because ext4 prevents a new handle from being started 4480 * state independently. It relies on upper layer to stop all data & metadata
4497 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from 4481 * modifications.
4498 * the upper layer.
4499 */ 4482 */
4500static int ext4_freeze(struct super_block *sb) 4483static int ext4_freeze(struct super_block *sb)
4501{ 4484{
@@ -4522,7 +4505,7 @@ static int ext4_freeze(struct super_block *sb)
4522 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4505 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4523 error = ext4_commit_super(sb, 1); 4506 error = ext4_commit_super(sb, 1);
4524out: 4507out:
4525 /* we rely on s_frozen to stop further updates */ 4508 /* we rely on upper layer to stop further updates */
4526 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4509 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4527 return error; 4510 return error;
4528} 4511}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a71fe3715ee8..e007b8bd8e5e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
43 if (err) 43 if (err)
44 goto out; 44 goto out;
45 45
46 mutex_lock(&inode->i_mutex);
47 err = mnt_want_write_file(file); 46 err = mnt_want_write_file(file);
48 if (err) 47 if (err)
49 goto out_unlock_inode; 48 goto out;
49 mutex_lock(&inode->i_mutex);
50 50
51 /* 51 /*
52 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also 52 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
73 /* The root directory has no attributes */ 73 /* The root directory has no attributes */
74 if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { 74 if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
75 err = -EINVAL; 75 err = -EINVAL;
76 goto out_drop_write; 76 goto out_unlock_inode;
77 } 77 }
78 78
79 if (sbi->options.sys_immutable && 79 if (sbi->options.sys_immutable &&
80 ((attr | oldattr) & ATTR_SYS) && 80 ((attr | oldattr) & ATTR_SYS) &&
81 !capable(CAP_LINUX_IMMUTABLE)) { 81 !capable(CAP_LINUX_IMMUTABLE)) {
82 err = -EPERM; 82 err = -EPERM;
83 goto out_drop_write; 83 goto out_unlock_inode;
84 } 84 }
85 85
86 /* 86 /*
@@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
90 */ 90 */
91 err = security_inode_setattr(file->f_path.dentry, &ia); 91 err = security_inode_setattr(file->f_path.dentry, &ia);
92 if (err) 92 if (err)
93 goto out_drop_write; 93 goto out_unlock_inode;
94 94
95 /* This MUST be done before doing anything irreversible... */ 95 /* This MUST be done before doing anything irreversible... */
96 err = fat_setattr(file->f_path.dentry, &ia); 96 err = fat_setattr(file->f_path.dentry, &ia);
97 if (err) 97 if (err)
98 goto out_drop_write; 98 goto out_unlock_inode;
99 99
100 fsnotify_change(file->f_path.dentry, ia.ia_valid); 100 fsnotify_change(file->f_path.dentry, ia.ia_valid);
101 if (sbi->options.sys_immutable) { 101 if (sbi->options.sys_immutable) {
@@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
107 107
108 fat_save_attrs(inode, attr); 108 fat_save_attrs(inode, attr);
109 mark_inode_dirty(inode); 109 mark_inode_dirty(inode);
110out_drop_write:
111 mnt_drop_write_file(file);
112out_unlock_inode: 110out_unlock_inode:
113 mutex_unlock(&inode->i_mutex); 111 mutex_unlock(&inode->i_mutex);
112 mnt_drop_write_file(file);
114out: 113out:
115 return err; 114 return err;
116} 115}
diff --git a/fs/file_table.c b/fs/file_table.c
index b3fc4d67a26b..701985e4ccda 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly;
43 43
44static struct percpu_counter nr_files __cacheline_aligned_in_smp; 44static struct percpu_counter nr_files __cacheline_aligned_in_smp;
45 45
46static inline void file_free_rcu(struct rcu_head *head) 46static void file_free_rcu(struct rcu_head *head)
47{ 47{
48 struct file *f = container_of(head, struct file, f_u.fu_rcuhead); 48 struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
49 49
@@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file)
217 return; 217 return;
218 if (file_check_writeable(file) != 0) 218 if (file_check_writeable(file) != 0)
219 return; 219 return;
220 mnt_drop_write(mnt); 220 __mnt_drop_write(mnt);
221 file_release_write(file); 221 file_release_write(file);
222} 222}
223 223
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b321a688cde7..93d8d6c9494d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -944,9 +944,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
944 return err; 944 return err;
945 945
946 count = ocount; 946 count = ocount;
947 947 sb_start_write(inode->i_sb);
948 mutex_lock(&inode->i_mutex); 948 mutex_lock(&inode->i_mutex);
949 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
950 949
951 /* We can write back this queue in page reclaim */ 950 /* We can write back this queue in page reclaim */
952 current->backing_dev_info = mapping->backing_dev_info; 951 current->backing_dev_info = mapping->backing_dev_info;
@@ -1004,6 +1003,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1004out: 1003out:
1005 current->backing_dev_info = NULL; 1004 current->backing_dev_info = NULL;
1006 mutex_unlock(&inode->i_mutex); 1005 mutex_unlock(&inode->i_mutex);
1006 sb_end_write(inode->i_sb);
1007 1007
1008 return written ? written : err; 1008 return written ? written : err;
1009} 1009}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9aa6af13823c..d1d791ef38de 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -373,11 +373,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
373 loff_t size; 373 loff_t size;
374 int ret; 374 int ret;
375 375
376 /* Wait if fs is frozen. This is racy so we check again later on 376 sb_start_pagefault(inode->i_sb);
377 * and retry if the fs has been frozen after the page lock has 377
378 * been acquired 378 /* Update file times before taking page lock */
379 */ 379 file_update_time(vma->vm_file);
380 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
381 380
382 ret = gfs2_rs_alloc(ip); 381 ret = gfs2_rs_alloc(ip);
383 if (ret) 382 if (ret)
@@ -462,14 +461,9 @@ out:
462 gfs2_holder_uninit(&gh); 461 gfs2_holder_uninit(&gh);
463 if (ret == 0) { 462 if (ret == 0) {
464 set_page_dirty(page); 463 set_page_dirty(page);
465 /* This check must be post dropping of transaction lock */ 464 wait_on_page_writeback(page);
466 if (inode->i_sb->s_frozen == SB_UNFROZEN) {
467 wait_on_page_writeback(page);
468 } else {
469 ret = -EAGAIN;
470 unlock_page(page);
471 }
472 } 465 }
466 sb_end_pagefault(inode->i_sb);
473 return block_page_mkwrite_return(ret); 467 return block_page_mkwrite_return(ret);
474} 468}
475 469
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ad3e2fb763d7..adbd27875ef9 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
50 if (revokes) 50 if (revokes)
51 tr->tr_reserved += gfs2_struct2blk(sdp, revokes, 51 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
52 sizeof(u64)); 52 sizeof(u64));
53 sb_start_intwrite(sdp->sd_vfs);
53 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); 54 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
54 55
55 error = gfs2_glock_nq(&tr->tr_t_gh); 56 error = gfs2_glock_nq(&tr->tr_t_gh);
@@ -68,6 +69,7 @@ fail_gunlock:
68 gfs2_glock_dq(&tr->tr_t_gh); 69 gfs2_glock_dq(&tr->tr_t_gh);
69 70
70fail_holder_uninit: 71fail_holder_uninit:
72 sb_end_intwrite(sdp->sd_vfs);
71 gfs2_holder_uninit(&tr->tr_t_gh); 73 gfs2_holder_uninit(&tr->tr_t_gh);
72 kfree(tr); 74 kfree(tr);
73 75
@@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
116 gfs2_holder_uninit(&tr->tr_t_gh); 118 gfs2_holder_uninit(&tr->tr_t_gh);
117 kfree(tr); 119 kfree(tr);
118 } 120 }
121 sb_end_intwrite(sdp->sd_vfs);
119 return; 122 return;
120 } 123 }
121 124
@@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
136 139
137 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) 140 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
138 gfs2_log_flush(sdp, NULL); 141 gfs2_log_flush(sdp, NULL);
142 sb_end_intwrite(sdp->sd_vfs);
139} 143}
140 144
141/** 145/**
diff --git a/fs/inode.c b/fs/inode.c
index 3cc504320467..ac8d904b3f16 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1542,9 +1542,11 @@ void touch_atime(struct path *path)
1542 if (timespec_equal(&inode->i_atime, &now)) 1542 if (timespec_equal(&inode->i_atime, &now))
1543 return; 1543 return;
1544 1544
1545 if (mnt_want_write(mnt)) 1545 if (!sb_start_write_trylock(inode->i_sb))
1546 return; 1546 return;
1547 1547
1548 if (__mnt_want_write(mnt))
1549 goto skip_update;
1548 /* 1550 /*
1549 * File systems can error out when updating inodes if they need to 1551 * File systems can error out when updating inodes if they need to
1550 * allocate new space to modify an inode (such is the case for 1552 * allocate new space to modify an inode (such is the case for
@@ -1555,7 +1557,9 @@ void touch_atime(struct path *path)
1555 * of the fs read only, e.g. subvolumes in Btrfs. 1557 * of the fs read only, e.g. subvolumes in Btrfs.
1556 */ 1558 */
1557 update_time(inode, &now, S_ATIME); 1559 update_time(inode, &now, S_ATIME);
1558 mnt_drop_write(mnt); 1560 __mnt_drop_write(mnt);
1561skip_update:
1562 sb_end_write(inode->i_sb);
1559} 1563}
1560EXPORT_SYMBOL(touch_atime); 1564EXPORT_SYMBOL(touch_atime);
1561 1565
@@ -1662,11 +1666,11 @@ int file_update_time(struct file *file)
1662 return 0; 1666 return 0;
1663 1667
1664 /* Finally allowed to write? Takes lock. */ 1668 /* Finally allowed to write? Takes lock. */
1665 if (mnt_want_write_file(file)) 1669 if (__mnt_want_write_file(file))
1666 return 0; 1670 return 0;
1667 1671
1668 ret = update_time(inode, &now, sync_it); 1672 ret = update_time(inode, &now, sync_it);
1669 mnt_drop_write_file(file); 1673 __mnt_drop_write_file(file);
1670 1674
1671 return ret; 1675 return ret;
1672} 1676}
diff --git a/fs/internal.h b/fs/internal.h
index a6fd56c68b11..371bcc4b1697 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -61,6 +61,10 @@ extern void __init mnt_init(void);
61 61
62extern struct lglock vfsmount_lock; 62extern struct lglock vfsmount_lock;
63 63
64extern int __mnt_want_write(struct vfsmount *);
65extern int __mnt_want_write_file(struct file *);
66extern void __mnt_drop_write(struct vfsmount *);
67extern void __mnt_drop_write_file(struct file *);
64 68
65/* 69/*
66 * fs_struct.c 70 * fs_struct.c
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 8392cb85bd54..05d29124c6ab 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -156,12 +156,16 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
156 struct nlm_rqst *call; 156 struct nlm_rqst *call;
157 int status; 157 int status;
158 158
159 nlm_get_host(host);
160 call = nlm_alloc_call(host); 159 call = nlm_alloc_call(host);
161 if (call == NULL) 160 if (call == NULL)
162 return -ENOMEM; 161 return -ENOMEM;
163 162
164 nlmclnt_locks_init_private(fl, host); 163 nlmclnt_locks_init_private(fl, host);
164 if (!fl->fl_u.nfs_fl.owner) {
165 /* lockowner allocation has failed */
166 nlmclnt_release_call(call);
167 return -ENOMEM;
168 }
165 /* Set up the argument struct */ 169 /* Set up the argument struct */
166 nlmclnt_setlockargs(call, fl); 170 nlmclnt_setlockargs(call, fl);
167 171
@@ -185,9 +189,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc);
185 189
186/* 190/*
187 * Allocate an NLM RPC call struct 191 * Allocate an NLM RPC call struct
188 *
189 * Note: the caller must hold a reference to host. In case of failure,
190 * this reference will be released.
191 */ 192 */
192struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) 193struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
193{ 194{
@@ -199,7 +200,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
199 atomic_set(&call->a_count, 1); 200 atomic_set(&call->a_count, 1);
200 locks_init_lock(&call->a_args.lock.fl); 201 locks_init_lock(&call->a_args.lock.fl);
201 locks_init_lock(&call->a_res.lock.fl); 202 locks_init_lock(&call->a_res.lock.fl);
202 call->a_host = host; 203 call->a_host = nlm_get_host(host);
203 return call; 204 return call;
204 } 205 }
205 if (signalled()) 206 if (signalled())
@@ -207,7 +208,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
207 printk("nlm_alloc_call: failed, waiting for memory\n"); 208 printk("nlm_alloc_call: failed, waiting for memory\n");
208 schedule_timeout_interruptible(5*HZ); 209 schedule_timeout_interruptible(5*HZ);
209 } 210 }
210 nlmclnt_release_host(host);
211 return NULL; 211 return NULL;
212} 212}
213 213
@@ -750,7 +750,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
750 dprintk("lockd: blocking lock attempt was interrupted by a signal.\n" 750 dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
751 " Attempting to cancel lock.\n"); 751 " Attempting to cancel lock.\n");
752 752
753 req = nlm_alloc_call(nlm_get_host(host)); 753 req = nlm_alloc_call(host);
754 if (!req) 754 if (!req)
755 return -ENOMEM; 755 return -ENOMEM;
756 req->a_flags = RPC_TASK_ASYNC; 756 req->a_flags = RPC_TASK_ASYNC;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4a43d253c045..b147d1ae71fd 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -257,6 +257,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
257 return rpc_system_err; 257 return rpc_system_err;
258 258
259 call = nlm_alloc_call(host); 259 call = nlm_alloc_call(host);
260 nlmsvc_release_host(host);
260 if (call == NULL) 261 if (call == NULL)
261 return rpc_system_err; 262 return rpc_system_err;
262 263
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index afe4488c33d8..fb1a2bedbe97 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
219 struct nlm_block *block; 219 struct nlm_block *block;
220 struct nlm_rqst *call = NULL; 220 struct nlm_rqst *call = NULL;
221 221
222 nlm_get_host(host);
223 call = nlm_alloc_call(host); 222 call = nlm_alloc_call(host);
224 if (call == NULL) 223 if (call == NULL)
225 return NULL; 224 return NULL;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index de8f2caa2235..3009a365e082 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -297,6 +297,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
297 return rpc_system_err; 297 return rpc_system_err;
298 298
299 call = nlm_alloc_call(host); 299 call = nlm_alloc_call(host);
300 nlmsvc_release_host(host);
300 if (call == NULL) 301 if (call == NULL)
301 return rpc_system_err; 302 return rpc_system_err;
302 303
diff --git a/fs/namei.c b/fs/namei.c
index 2ccc35c4dc24..1b464390dde8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -650,6 +650,121 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
650 path_put(link); 650 path_put(link);
651} 651}
652 652
653int sysctl_protected_symlinks __read_mostly = 1;
654int sysctl_protected_hardlinks __read_mostly = 1;
655
656/**
657 * may_follow_link - Check symlink following for unsafe situations
658 * @link: The path of the symlink
659 *
660 * In the case of the sysctl_protected_symlinks sysctl being enabled,
661 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
662 * in a sticky world-writable directory. This is to protect privileged
663 * processes from failing races against path names that may change out
664 * from under them by way of other users creating malicious symlinks.
665 * It will permit symlinks to be followed only when outside a sticky
666 * world-writable directory, or when the uid of the symlink and follower
667 * match, or when the directory owner matches the symlink's owner.
668 *
669 * Returns 0 if following the symlink is allowed, -ve on error.
670 */
671static inline int may_follow_link(struct path *link, struct nameidata *nd)
672{
673 const struct inode *inode;
674 const struct inode *parent;
675
676 if (!sysctl_protected_symlinks)
677 return 0;
678
679 /* Allowed if owner and follower match. */
680 inode = link->dentry->d_inode;
681 if (current_cred()->fsuid == inode->i_uid)
682 return 0;
683
684 /* Allowed if parent directory not sticky and world-writable. */
685 parent = nd->path.dentry->d_inode;
686 if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
687 return 0;
688
689 /* Allowed if parent directory and link owner match. */
690 if (parent->i_uid == inode->i_uid)
691 return 0;
692
693 path_put_conditional(link, nd);
694 path_put(&nd->path);
695 audit_log_link_denied("follow_link", link);
696 return -EACCES;
697}
698
699/**
700 * safe_hardlink_source - Check for safe hardlink conditions
701 * @inode: the source inode to hardlink from
702 *
703 * Return false if at least one of the following conditions:
704 * - inode is not a regular file
705 * - inode is setuid
706 * - inode is setgid and group-exec
707 * - access failure for read and write
708 *
709 * Otherwise returns true.
710 */
711static bool safe_hardlink_source(struct inode *inode)
712{
713 umode_t mode = inode->i_mode;
714
715 /* Special files should not get pinned to the filesystem. */
716 if (!S_ISREG(mode))
717 return false;
718
719 /* Setuid files should not get pinned to the filesystem. */
720 if (mode & S_ISUID)
721 return false;
722
723 /* Executable setgid files should not get pinned to the filesystem. */
724 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
725 return false;
726
727 /* Hardlinking to unreadable or unwritable sources is dangerous. */
728 if (inode_permission(inode, MAY_READ | MAY_WRITE))
729 return false;
730
731 return true;
732}
733
734/**
735 * may_linkat - Check permissions for creating a hardlink
736 * @link: the source to hardlink from
737 *
738 * Block hardlink when all of:
739 * - sysctl_protected_hardlinks enabled
740 * - fsuid does not match inode
741 * - hardlink source is unsafe (see safe_hardlink_source() above)
742 * - not CAP_FOWNER
743 *
744 * Returns 0 if successful, -ve on error.
745 */
746static int may_linkat(struct path *link)
747{
748 const struct cred *cred;
749 struct inode *inode;
750
751 if (!sysctl_protected_hardlinks)
752 return 0;
753
754 cred = current_cred();
755 inode = link->dentry->d_inode;
756
757 /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
758 * otherwise, it must be a safe source.
759 */
760 if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) ||
761 capable(CAP_FOWNER))
762 return 0;
763
764 audit_log_link_denied("linkat", link);
765 return -EPERM;
766}
767
653static __always_inline int 768static __always_inline int
654follow_link(struct path *link, struct nameidata *nd, void **p) 769follow_link(struct path *link, struct nameidata *nd, void **p)
655{ 770{
@@ -1818,6 +1933,9 @@ static int path_lookupat(int dfd, const char *name,
1818 while (err > 0) { 1933 while (err > 0) {
1819 void *cookie; 1934 void *cookie;
1820 struct path link = path; 1935 struct path link = path;
1936 err = may_follow_link(&link, nd);
1937 if (unlikely(err))
1938 break;
1821 nd->flags |= LOOKUP_PARENT; 1939 nd->flags |= LOOKUP_PARENT;
1822 err = follow_link(&link, nd, &cookie); 1940 err = follow_link(&link, nd, &cookie);
1823 if (err) 1941 if (err)
@@ -2277,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
2277static int atomic_open(struct nameidata *nd, struct dentry *dentry, 2395static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2278 struct path *path, struct file *file, 2396 struct path *path, struct file *file,
2279 const struct open_flags *op, 2397 const struct open_flags *op,
2280 bool *want_write, bool need_lookup, 2398 bool got_write, bool need_lookup,
2281 int *opened) 2399 int *opened)
2282{ 2400{
2283 struct inode *dir = nd->path.dentry->d_inode; 2401 struct inode *dir = nd->path.dentry->d_inode;
@@ -2300,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2300 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) 2418 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2301 mode &= ~current_umask(); 2419 mode &= ~current_umask();
2302 2420
2303 if (open_flag & O_EXCL) { 2421 if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) {
2304 open_flag &= ~O_TRUNC; 2422 open_flag &= ~O_TRUNC;
2305 *opened |= FILE_CREATED; 2423 *opened |= FILE_CREATED;
2306 } 2424 }
@@ -2314,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2314 * Another problem is returing the "right" error value (e.g. for an 2432 * Another problem is returing the "right" error value (e.g. for an
2315 * O_EXCL open we want to return EEXIST not EROFS). 2433 * O_EXCL open we want to return EEXIST not EROFS).
2316 */ 2434 */
2317 if ((open_flag & (O_CREAT | O_TRUNC)) || 2435 if (((open_flag & (O_CREAT | O_TRUNC)) ||
2318 (open_flag & O_ACCMODE) != O_RDONLY) { 2436 (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) {
2319 error = mnt_want_write(nd->path.mnt); 2437 if (!(open_flag & O_CREAT)) {
2320 if (!error) {
2321 *want_write = true;
2322 } else if (!(open_flag & O_CREAT)) {
2323 /* 2438 /*
2324 * No O_CREATE -> atomicity not a requirement -> fall 2439 * No O_CREATE -> atomicity not a requirement -> fall
2325 * back to lookup + open 2440 * back to lookup + open
@@ -2327,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2327 goto no_open; 2442 goto no_open;
2328 } else if (open_flag & (O_EXCL | O_TRUNC)) { 2443 } else if (open_flag & (O_EXCL | O_TRUNC)) {
2329 /* Fall back and fail with the right error */ 2444 /* Fall back and fail with the right error */
2330 create_error = error; 2445 create_error = -EROFS;
2331 goto no_open; 2446 goto no_open;
2332 } else { 2447 } else {
2333 /* No side effects, safe to clear O_CREAT */ 2448 /* No side effects, safe to clear O_CREAT */
2334 create_error = error; 2449 create_error = -EROFS;
2335 open_flag &= ~O_CREAT; 2450 open_flag &= ~O_CREAT;
2336 } 2451 }
2337 } 2452 }
@@ -2438,7 +2553,7 @@ looked_up:
2438static int lookup_open(struct nameidata *nd, struct path *path, 2553static int lookup_open(struct nameidata *nd, struct path *path,
2439 struct file *file, 2554 struct file *file,
2440 const struct open_flags *op, 2555 const struct open_flags *op,
2441 bool *want_write, int *opened) 2556 bool got_write, int *opened)
2442{ 2557{
2443 struct dentry *dir = nd->path.dentry; 2558 struct dentry *dir = nd->path.dentry;
2444 struct inode *dir_inode = dir->d_inode; 2559 struct inode *dir_inode = dir->d_inode;
@@ -2456,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
2456 goto out_no_open; 2571 goto out_no_open;
2457 2572
2458 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { 2573 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
2459 return atomic_open(nd, dentry, path, file, op, want_write, 2574 return atomic_open(nd, dentry, path, file, op, got_write,
2460 need_lookup, opened); 2575 need_lookup, opened);
2461 } 2576 }
2462 2577
@@ -2480,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path,
2480 * a permanent write count is taken through 2595 * a permanent write count is taken through
2481 * the 'struct file' in finish_open(). 2596 * the 'struct file' in finish_open().
2482 */ 2597 */
2483 error = mnt_want_write(nd->path.mnt); 2598 if (!got_write) {
2484 if (error) 2599 error = -EROFS;
2485 goto out_dput; 2600 goto out_dput;
2486 *want_write = true; 2601 }
2487 *opened |= FILE_CREATED; 2602 *opened |= FILE_CREATED;
2488 error = security_path_mknod(&nd->path, dentry, mode, 0); 2603 error = security_path_mknod(&nd->path, dentry, mode, 0);
2489 if (error) 2604 if (error)
@@ -2513,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path,
2513 struct dentry *dir = nd->path.dentry; 2628 struct dentry *dir = nd->path.dentry;
2514 int open_flag = op->open_flag; 2629 int open_flag = op->open_flag;
2515 bool will_truncate = (open_flag & O_TRUNC) != 0; 2630 bool will_truncate = (open_flag & O_TRUNC) != 0;
2516 bool want_write = false; 2631 bool got_write = false;
2517 int acc_mode = op->acc_mode; 2632 int acc_mode = op->acc_mode;
2518 struct inode *inode; 2633 struct inode *inode;
2519 bool symlink_ok = false; 2634 bool symlink_ok = false;
@@ -2582,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path,
2582 } 2697 }
2583 2698
2584retry_lookup: 2699retry_lookup:
2700 if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
2701 error = mnt_want_write(nd->path.mnt);
2702 if (!error)
2703 got_write = true;
2704 /*
2705 * do _not_ fail yet - we might not need that or fail with
2706 * a different error; let lookup_open() decide; we'll be
2707 * dropping this one anyway.
2708 */
2709 }
2585 mutex_lock(&dir->d_inode->i_mutex); 2710 mutex_lock(&dir->d_inode->i_mutex);
2586 error = lookup_open(nd, path, file, op, &want_write, opened); 2711 error = lookup_open(nd, path, file, op, got_write, opened);
2587 mutex_unlock(&dir->d_inode->i_mutex); 2712 mutex_unlock(&dir->d_inode->i_mutex);
2588 2713
2589 if (error <= 0) { 2714 if (error <= 0) {
@@ -2608,22 +2733,23 @@ retry_lookup:
2608 } 2733 }
2609 2734
2610 /* 2735 /*
2611 * It already exists. 2736 * create/update audit record if it already exists.
2612 */ 2737 */
2613 audit_inode(pathname, path->dentry); 2738 if (path->dentry->d_inode)
2739 audit_inode(pathname, path->dentry);
2614 2740
2615 /* 2741 /*
2616 * If atomic_open() acquired write access it is dropped now due to 2742 * If atomic_open() acquired write access it is dropped now due to
2617 * possible mount and symlink following (this might be optimized away if 2743 * possible mount and symlink following (this might be optimized away if
2618 * necessary...) 2744 * necessary...)
2619 */ 2745 */
2620 if (want_write) { 2746 if (got_write) {
2621 mnt_drop_write(nd->path.mnt); 2747 mnt_drop_write(nd->path.mnt);
2622 want_write = false; 2748 got_write = false;
2623 } 2749 }
2624 2750
2625 error = -EEXIST; 2751 error = -EEXIST;
2626 if (open_flag & O_EXCL) 2752 if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
2627 goto exit_dput; 2753 goto exit_dput;
2628 2754
2629 error = follow_managed(path, nd->flags); 2755 error = follow_managed(path, nd->flags);
@@ -2684,7 +2810,7 @@ finish_open:
2684 error = mnt_want_write(nd->path.mnt); 2810 error = mnt_want_write(nd->path.mnt);
2685 if (error) 2811 if (error)
2686 goto out; 2812 goto out;
2687 want_write = true; 2813 got_write = true;
2688 } 2814 }
2689finish_open_created: 2815finish_open_created:
2690 error = may_open(&nd->path, acc_mode, open_flag); 2816 error = may_open(&nd->path, acc_mode, open_flag);
@@ -2711,7 +2837,7 @@ opened:
2711 goto exit_fput; 2837 goto exit_fput;
2712 } 2838 }
2713out: 2839out:
2714 if (want_write) 2840 if (got_write)
2715 mnt_drop_write(nd->path.mnt); 2841 mnt_drop_write(nd->path.mnt);
2716 path_put(&save_parent); 2842 path_put(&save_parent);
2717 terminate_walk(nd); 2843 terminate_walk(nd);
@@ -2735,9 +2861,9 @@ stale_open:
2735 nd->inode = dir->d_inode; 2861 nd->inode = dir->d_inode;
2736 save_parent.mnt = NULL; 2862 save_parent.mnt = NULL;
2737 save_parent.dentry = NULL; 2863 save_parent.dentry = NULL;
2738 if (want_write) { 2864 if (got_write) {
2739 mnt_drop_write(nd->path.mnt); 2865 mnt_drop_write(nd->path.mnt);
2740 want_write = false; 2866 got_write = false;
2741 } 2867 }
2742 retried = true; 2868 retried = true;
2743 goto retry_lookup; 2869 goto retry_lookup;
@@ -2777,6 +2903,9 @@ static struct file *path_openat(int dfd, const char *pathname,
2777 error = -ELOOP; 2903 error = -ELOOP;
2778 break; 2904 break;
2779 } 2905 }
2906 error = may_follow_link(&link, nd);
2907 if (unlikely(error))
2908 break;
2780 nd->flags |= LOOKUP_PARENT; 2909 nd->flags |= LOOKUP_PARENT;
2781 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); 2910 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2782 error = follow_link(&link, nd, &cookie); 2911 error = follow_link(&link, nd, &cookie);
@@ -2846,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
2846{ 2975{
2847 struct dentry *dentry = ERR_PTR(-EEXIST); 2976 struct dentry *dentry = ERR_PTR(-EEXIST);
2848 struct nameidata nd; 2977 struct nameidata nd;
2978 int err2;
2849 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); 2979 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
2850 if (error) 2980 if (error)
2851 return ERR_PTR(error); 2981 return ERR_PTR(error);
@@ -2859,16 +2989,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
2859 nd.flags &= ~LOOKUP_PARENT; 2989 nd.flags &= ~LOOKUP_PARENT;
2860 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; 2990 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
2861 2991
2992 /* don't fail immediately if it's r/o, at least try to report other errors */
2993 err2 = mnt_want_write(nd.path.mnt);
2862 /* 2994 /*
2863 * Do the final lookup. 2995 * Do the final lookup.
2864 */ 2996 */
2865 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2997 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2866 dentry = lookup_hash(&nd); 2998 dentry = lookup_hash(&nd);
2867 if (IS_ERR(dentry)) 2999 if (IS_ERR(dentry))
2868 goto fail; 3000 goto unlock;
2869 3001
3002 error = -EEXIST;
2870 if (dentry->d_inode) 3003 if (dentry->d_inode)
2871 goto eexist; 3004 goto fail;
2872 /* 3005 /*
2873 * Special case - lookup gave negative, but... we had foo/bar/ 3006 * Special case - lookup gave negative, but... we had foo/bar/
2874 * From the vfs_mknod() POV we just have a negative dentry - 3007 * From the vfs_mknod() POV we just have a negative dentry -
@@ -2876,23 +3009,37 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
2876 * been asking for (non-existent) directory. -ENOENT for you. 3009 * been asking for (non-existent) directory. -ENOENT for you.
2877 */ 3010 */
2878 if (unlikely(!is_dir && nd.last.name[nd.last.len])) { 3011 if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
2879 dput(dentry); 3012 error = -ENOENT;
2880 dentry = ERR_PTR(-ENOENT); 3013 goto fail;
3014 }
3015 if (unlikely(err2)) {
3016 error = err2;
2881 goto fail; 3017 goto fail;
2882 } 3018 }
2883 *path = nd.path; 3019 *path = nd.path;
2884 return dentry; 3020 return dentry;
2885eexist:
2886 dput(dentry);
2887 dentry = ERR_PTR(-EEXIST);
2888fail: 3021fail:
3022 dput(dentry);
3023 dentry = ERR_PTR(error);
3024unlock:
2889 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3025 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3026 if (!err2)
3027 mnt_drop_write(nd.path.mnt);
2890out: 3028out:
2891 path_put(&nd.path); 3029 path_put(&nd.path);
2892 return dentry; 3030 return dentry;
2893} 3031}
2894EXPORT_SYMBOL(kern_path_create); 3032EXPORT_SYMBOL(kern_path_create);
2895 3033
3034void done_path_create(struct path *path, struct dentry *dentry)
3035{
3036 dput(dentry);
3037 mutex_unlock(&path->dentry->d_inode->i_mutex);
3038 mnt_drop_write(path->mnt);
3039 path_put(path);
3040}
3041EXPORT_SYMBOL(done_path_create);
3042
2896struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) 3043struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
2897{ 3044{
2898 char *tmp = getname(pathname); 3045 char *tmp = getname(pathname);
@@ -2956,8 +3103,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2956 struct path path; 3103 struct path path;
2957 int error; 3104 int error;
2958 3105
2959 if (S_ISDIR(mode)) 3106 error = may_mknod(mode);
2960 return -EPERM; 3107 if (error)
3108 return error;
2961 3109
2962 dentry = user_path_create(dfd, filename, &path, 0); 3110 dentry = user_path_create(dfd, filename, &path, 0);
2963 if (IS_ERR(dentry)) 3111 if (IS_ERR(dentry))
@@ -2965,15 +3113,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2965 3113
2966 if (!IS_POSIXACL(path.dentry->d_inode)) 3114 if (!IS_POSIXACL(path.dentry->d_inode))
2967 mode &= ~current_umask(); 3115 mode &= ~current_umask();
2968 error = may_mknod(mode);
2969 if (error)
2970 goto out_dput;
2971 error = mnt_want_write(path.mnt);
2972 if (error)
2973 goto out_dput;
2974 error = security_path_mknod(&path, dentry, mode, dev); 3116 error = security_path_mknod(&path, dentry, mode, dev);
2975 if (error) 3117 if (error)
2976 goto out_drop_write; 3118 goto out;
2977 switch (mode & S_IFMT) { 3119 switch (mode & S_IFMT) {
2978 case 0: case S_IFREG: 3120 case 0: case S_IFREG:
2979 error = vfs_create(path.dentry->d_inode,dentry,mode,true); 3121 error = vfs_create(path.dentry->d_inode,dentry,mode,true);
@@ -2986,13 +3128,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2986 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); 3128 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
2987 break; 3129 break;
2988 } 3130 }
2989out_drop_write: 3131out:
2990 mnt_drop_write(path.mnt); 3132 done_path_create(&path, dentry);
2991out_dput:
2992 dput(dentry);
2993 mutex_unlock(&path.dentry->d_inode->i_mutex);
2994 path_put(&path);
2995
2996 return error; 3133 return error;
2997} 3134}
2998 3135
@@ -3038,19 +3175,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
3038 3175
3039 if (!IS_POSIXACL(path.dentry->d_inode)) 3176 if (!IS_POSIXACL(path.dentry->d_inode))
3040 mode &= ~current_umask(); 3177 mode &= ~current_umask();
3041 error = mnt_want_write(path.mnt);
3042 if (error)
3043 goto out_dput;
3044 error = security_path_mkdir(&path, dentry, mode); 3178 error = security_path_mkdir(&path, dentry, mode);
3045 if (error) 3179 if (!error)
3046 goto out_drop_write; 3180 error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
3047 error = vfs_mkdir(path.dentry->d_inode, dentry, mode); 3181 done_path_create(&path, dentry);
3048out_drop_write:
3049 mnt_drop_write(path.mnt);
3050out_dput:
3051 dput(dentry);
3052 mutex_unlock(&path.dentry->d_inode->i_mutex);
3053 path_put(&path);
3054 return error; 3182 return error;
3055} 3183}
3056 3184
@@ -3144,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
3144 } 3272 }
3145 3273
3146 nd.flags &= ~LOOKUP_PARENT; 3274 nd.flags &= ~LOOKUP_PARENT;
3275 error = mnt_want_write(nd.path.mnt);
3276 if (error)
3277 goto exit1;
3147 3278
3148 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3279 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3149 dentry = lookup_hash(&nd); 3280 dentry = lookup_hash(&nd);
@@ -3154,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname)
3154 error = -ENOENT; 3285 error = -ENOENT;
3155 goto exit3; 3286 goto exit3;
3156 } 3287 }
3157 error = mnt_want_write(nd.path.mnt);
3158 if (error)
3159 goto exit3;
3160 error = security_path_rmdir(&nd.path, dentry); 3288 error = security_path_rmdir(&nd.path, dentry);
3161 if (error) 3289 if (error)
3162 goto exit4; 3290 goto exit3;
3163 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 3291 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
3164exit4:
3165 mnt_drop_write(nd.path.mnt);
3166exit3: 3292exit3:
3167 dput(dentry); 3293 dput(dentry);
3168exit2: 3294exit2:
3169 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3295 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3296 mnt_drop_write(nd.path.mnt);
3170exit1: 3297exit1:
3171 path_put(&nd.path); 3298 path_put(&nd.path);
3172 putname(name); 3299 putname(name);
@@ -3233,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
3233 goto exit1; 3360 goto exit1;
3234 3361
3235 nd.flags &= ~LOOKUP_PARENT; 3362 nd.flags &= ~LOOKUP_PARENT;
3363 error = mnt_want_write(nd.path.mnt);
3364 if (error)
3365 goto exit1;
3236 3366
3237 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3367 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3238 dentry = lookup_hash(&nd); 3368 dentry = lookup_hash(&nd);
@@ -3245,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname)
3245 if (!inode) 3375 if (!inode)
3246 goto slashes; 3376 goto slashes;
3247 ihold(inode); 3377 ihold(inode);
3248 error = mnt_want_write(nd.path.mnt);
3249 if (error)
3250 goto exit2;
3251 error = security_path_unlink(&nd.path, dentry); 3378 error = security_path_unlink(&nd.path, dentry);
3252 if (error) 3379 if (error)
3253 goto exit3; 3380 goto exit2;
3254 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 3381 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
3255exit3: 3382exit2:
3256 mnt_drop_write(nd.path.mnt);
3257 exit2:
3258 dput(dentry); 3383 dput(dentry);
3259 } 3384 }
3260 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3385 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3261 if (inode) 3386 if (inode)
3262 iput(inode); /* truncate the inode here */ 3387 iput(inode); /* truncate the inode here */
3388 mnt_drop_write(nd.path.mnt);
3263exit1: 3389exit1:
3264 path_put(&nd.path); 3390 path_put(&nd.path);
3265 putname(name); 3391 putname(name);
@@ -3324,19 +3450,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
3324 if (IS_ERR(dentry)) 3450 if (IS_ERR(dentry))
3325 goto out_putname; 3451 goto out_putname;
3326 3452
3327 error = mnt_want_write(path.mnt);
3328 if (error)
3329 goto out_dput;
3330 error = security_path_symlink(&path, dentry, from); 3453 error = security_path_symlink(&path, dentry, from);
3331 if (error) 3454 if (!error)
3332 goto out_drop_write; 3455 error = vfs_symlink(path.dentry->d_inode, dentry, from);
3333 error = vfs_symlink(path.dentry->d_inode, dentry, from); 3456 done_path_create(&path, dentry);
3334out_drop_write:
3335 mnt_drop_write(path.mnt);
3336out_dput:
3337 dput(dentry);
3338 mutex_unlock(&path.dentry->d_inode->i_mutex);
3339 path_put(&path);
3340out_putname: 3457out_putname:
3341 putname(from); 3458 putname(from);
3342 return error; 3459 return error;
@@ -3436,19 +3553,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3436 error = -EXDEV; 3553 error = -EXDEV;
3437 if (old_path.mnt != new_path.mnt) 3554 if (old_path.mnt != new_path.mnt)
3438 goto out_dput; 3555 goto out_dput;
3439 error = mnt_want_write(new_path.mnt); 3556 error = may_linkat(&old_path);
3440 if (error) 3557 if (unlikely(error))
3441 goto out_dput; 3558 goto out_dput;
3442 error = security_path_link(old_path.dentry, &new_path, new_dentry); 3559 error = security_path_link(old_path.dentry, &new_path, new_dentry);
3443 if (error) 3560 if (error)
3444 goto out_drop_write; 3561 goto out_dput;
3445 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); 3562 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
3446out_drop_write:
3447 mnt_drop_write(new_path.mnt);
3448out_dput: 3563out_dput:
3449 dput(new_dentry); 3564 done_path_create(&new_path, new_dentry);
3450 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
3451 path_put(&new_path);
3452out: 3565out:
3453 path_put(&old_path); 3566 path_put(&old_path);
3454 3567
@@ -3644,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
3644 if (newnd.last_type != LAST_NORM) 3757 if (newnd.last_type != LAST_NORM)
3645 goto exit2; 3758 goto exit2;
3646 3759
3760 error = mnt_want_write(oldnd.path.mnt);
3761 if (error)
3762 goto exit2;
3763
3647 oldnd.flags &= ~LOOKUP_PARENT; 3764 oldnd.flags &= ~LOOKUP_PARENT;
3648 newnd.flags &= ~LOOKUP_PARENT; 3765 newnd.flags &= ~LOOKUP_PARENT;
3649 newnd.flags |= LOOKUP_RENAME_TARGET; 3766 newnd.flags |= LOOKUP_RENAME_TARGET;
@@ -3679,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
3679 if (new_dentry == trap) 3796 if (new_dentry == trap)
3680 goto exit5; 3797 goto exit5;
3681 3798
3682 error = mnt_want_write(oldnd.path.mnt);
3683 if (error)
3684 goto exit5;
3685 error = security_path_rename(&oldnd.path, old_dentry, 3799 error = security_path_rename(&oldnd.path, old_dentry,
3686 &newnd.path, new_dentry); 3800 &newnd.path, new_dentry);
3687 if (error) 3801 if (error)
3688 goto exit6; 3802 goto exit5;
3689 error = vfs_rename(old_dir->d_inode, old_dentry, 3803 error = vfs_rename(old_dir->d_inode, old_dentry,
3690 new_dir->d_inode, new_dentry); 3804 new_dir->d_inode, new_dentry);
3691exit6:
3692 mnt_drop_write(oldnd.path.mnt);
3693exit5: 3805exit5:
3694 dput(new_dentry); 3806 dput(new_dentry);
3695exit4: 3807exit4:
3696 dput(old_dentry); 3808 dput(old_dentry);
3697exit3: 3809exit3:
3698 unlock_rename(new_dir, old_dir); 3810 unlock_rename(new_dir, old_dir);
3811 mnt_drop_write(oldnd.path.mnt);
3699exit2: 3812exit2:
3700 path_put(&newnd.path); 3813 path_put(&newnd.path);
3701 putname(to); 3814 putname(to);
diff --git a/fs/namespace.c b/fs/namespace.c
index c53d3381b0d0..4d31f73e2561 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt)
283} 283}
284 284
285/* 285/*
286 * Most r/o checks on a fs are for operations that take 286 * Most r/o & frozen checks on a fs are for operations that take discrete
287 * discrete amounts of time, like a write() or unlink(). 287 * amounts of time, like a write() or unlink(). We must keep track of when
288 * We must keep track of when those operations start 288 * those operations start (for permission checks) and when they end, so that we
289 * (for permission checks) and when they end, so that 289 * can determine when writes are able to occur to a filesystem.
290 * we can determine when writes are able to occur to
291 * a filesystem.
292 */ 290 */
293/** 291/**
294 * mnt_want_write - get write access to a mount 292 * __mnt_want_write - get write access to a mount without freeze protection
295 * @m: the mount on which to take a write 293 * @m: the mount on which to take a write
296 * 294 *
297 * This tells the low-level filesystem that a write is 295 * This tells the low-level filesystem that a write is about to be performed to
298 * about to be performed to it, and makes sure that 296 * it, and makes sure that writes are allowed (mnt it read-write) before
299 * writes are allowed before returning success. When 297 * returning success. This operation does not protect against filesystem being
300 * the write operation is finished, mnt_drop_write() 298 * frozen. When the write operation is finished, __mnt_drop_write() must be
301 * must be called. This is effectively a refcount. 299 * called. This is effectively a refcount.
302 */ 300 */
303int mnt_want_write(struct vfsmount *m) 301int __mnt_want_write(struct vfsmount *m)
304{ 302{
305 struct mount *mnt = real_mount(m); 303 struct mount *mnt = real_mount(m);
306 int ret = 0; 304 int ret = 0;
@@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m)
326 ret = -EROFS; 324 ret = -EROFS;
327 } 325 }
328 preempt_enable(); 326 preempt_enable();
327
328 return ret;
329}
330
331/**
332 * mnt_want_write - get write access to a mount
333 * @m: the mount on which to take a write
334 *
335 * This tells the low-level filesystem that a write is about to be performed to
336 * it, and makes sure that writes are allowed (mount is read-write, filesystem
337 * is not frozen) before returning success. When the write operation is
338 * finished, mnt_drop_write() must be called. This is effectively a refcount.
339 */
340int mnt_want_write(struct vfsmount *m)
341{
342 int ret;
343
344 sb_start_write(m->mnt_sb);
345 ret = __mnt_want_write(m);
346 if (ret)
347 sb_end_write(m->mnt_sb);
329 return ret; 348 return ret;
330} 349}
331EXPORT_SYMBOL_GPL(mnt_want_write); 350EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt)
355EXPORT_SYMBOL_GPL(mnt_clone_write); 374EXPORT_SYMBOL_GPL(mnt_clone_write);
356 375
357/** 376/**
358 * mnt_want_write_file - get write access to a file's mount 377 * __mnt_want_write_file - get write access to a file's mount
359 * @file: the file who's mount on which to take a write 378 * @file: the file who's mount on which to take a write
360 * 379 *
361 * This is like mnt_want_write, but it takes a file and can 380 * This is like __mnt_want_write, but it takes a file and can
362 * do some optimisations if the file is open for write already 381 * do some optimisations if the file is open for write already
363 */ 382 */
364int mnt_want_write_file(struct file *file) 383int __mnt_want_write_file(struct file *file)
365{ 384{
366 struct inode *inode = file->f_dentry->d_inode; 385 struct inode *inode = file->f_dentry->d_inode;
386
367 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) 387 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
368 return mnt_want_write(file->f_path.mnt); 388 return __mnt_want_write(file->f_path.mnt);
369 else 389 else
370 return mnt_clone_write(file->f_path.mnt); 390 return mnt_clone_write(file->f_path.mnt);
371} 391}
392
393/**
394 * mnt_want_write_file - get write access to a file's mount
395 * @file: the file who's mount on which to take a write
396 *
397 * This is like mnt_want_write, but it takes a file and can
398 * do some optimisations if the file is open for write already
399 */
400int mnt_want_write_file(struct file *file)
401{
402 int ret;
403
404 sb_start_write(file->f_path.mnt->mnt_sb);
405 ret = __mnt_want_write_file(file);
406 if (ret)
407 sb_end_write(file->f_path.mnt->mnt_sb);
408 return ret;
409}
372EXPORT_SYMBOL_GPL(mnt_want_write_file); 410EXPORT_SYMBOL_GPL(mnt_want_write_file);
373 411
374/** 412/**
375 * mnt_drop_write - give up write access to a mount 413 * __mnt_drop_write - give up write access to a mount
376 * @mnt: the mount on which to give up write access 414 * @mnt: the mount on which to give up write access
377 * 415 *
378 * Tells the low-level filesystem that we are done 416 * Tells the low-level filesystem that we are done
379 * performing writes to it. Must be matched with 417 * performing writes to it. Must be matched with
380 * mnt_want_write() call above. 418 * __mnt_want_write() call above.
381 */ 419 */
382void mnt_drop_write(struct vfsmount *mnt) 420void __mnt_drop_write(struct vfsmount *mnt)
383{ 421{
384 preempt_disable(); 422 preempt_disable();
385 mnt_dec_writers(real_mount(mnt)); 423 mnt_dec_writers(real_mount(mnt));
386 preempt_enable(); 424 preempt_enable();
387} 425}
426
427/**
428 * mnt_drop_write - give up write access to a mount
429 * @mnt: the mount on which to give up write access
430 *
431 * Tells the low-level filesystem that we are done performing writes to it and
432 * also allows filesystem to be frozen again. Must be matched with
433 * mnt_want_write() call above.
434 */
435void mnt_drop_write(struct vfsmount *mnt)
436{
437 __mnt_drop_write(mnt);
438 sb_end_write(mnt->mnt_sb);
439}
388EXPORT_SYMBOL_GPL(mnt_drop_write); 440EXPORT_SYMBOL_GPL(mnt_drop_write);
389 441
442void __mnt_drop_write_file(struct file *file)
443{
444 __mnt_drop_write(file->f_path.mnt);
445}
446
390void mnt_drop_write_file(struct file *file) 447void mnt_drop_write_file(struct file *file)
391{ 448{
392 mnt_drop_write(file->f_path.mnt); 449 mnt_drop_write(file->f_path.mnt);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5ff0b7b9fc08..43295d45cc2b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
154 if (status < 0) 154 if (status < 0)
155 return; 155 return;
156 156
157 status = mnt_want_write_file(rec_file);
158 if (status)
159 return;
160
157 dir = rec_file->f_path.dentry; 161 dir = rec_file->f_path.dentry;
158 /* lock the parent */ 162 /* lock the parent */
159 mutex_lock(&dir->d_inode->i_mutex); 163 mutex_lock(&dir->d_inode->i_mutex);
@@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
173 * as well be forgiving and just succeed silently. 177 * as well be forgiving and just succeed silently.
174 */ 178 */
175 goto out_put; 179 goto out_put;
176 status = mnt_want_write_file(rec_file);
177 if (status)
178 goto out_put;
179 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); 180 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
180 mnt_drop_write_file(rec_file);
181out_put: 181out_put:
182 dput(dentry); 182 dput(dentry);
183out_unlock: 183out_unlock:
@@ -189,6 +189,7 @@ out_unlock:
189 " (err %d); please check that %s exists" 189 " (err %d); please check that %s exists"
190 " and is writeable", status, 190 " and is writeable", status,
191 user_recovery_dirname); 191 user_recovery_dirname);
192 mnt_drop_write_file(rec_file);
192 nfs4_reset_creds(original_cred); 193 nfs4_reset_creds(original_cred);
193} 194}
194 195
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index cc793005a87c..032af381b3aa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp)
635 fhp->fh_post_saved = 0; 635 fhp->fh_post_saved = 0;
636#endif 636#endif
637 } 637 }
638 fh_drop_write(fhp);
638 if (exp) { 639 if (exp) {
639 exp_put(exp); 640 exp_put(exp);
640 fhp->fh_export = NULL; 641 fhp->fh_export = NULL;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e15dc45fc5ec..aad6d457b9e8 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
196 struct dentry *dchild; 196 struct dentry *dchild;
197 int type, mode; 197 int type, mode;
198 __be32 nfserr; 198 __be32 nfserr;
199 int hosterr;
199 dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); 200 dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size);
200 201
201 dprintk("nfsd: CREATE %s %.*s\n", 202 dprintk("nfsd: CREATE %s %.*s\n",
@@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
214 nfserr = nfserr_exist; 215 nfserr = nfserr_exist;
215 if (isdotent(argp->name, argp->len)) 216 if (isdotent(argp->name, argp->len))
216 goto done; 217 goto done;
218 hosterr = fh_want_write(dirfhp);
219 if (hosterr) {
220 nfserr = nfserrno(hosterr);
221 goto done;
222 }
223
217 fh_lock_nested(dirfhp, I_MUTEX_PARENT); 224 fh_lock_nested(dirfhp, I_MUTEX_PARENT);
218 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); 225 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
219 if (IS_ERR(dchild)) { 226 if (IS_ERR(dchild)) {
@@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
330out_unlock: 337out_unlock:
331 /* We don't really need to unlock, as fh_put does it. */ 338 /* We don't really need to unlock, as fh_put does it. */
332 fh_unlock(dirfhp); 339 fh_unlock(dirfhp);
333 340 fh_drop_write(dirfhp);
334done: 341done:
335 fh_put(dirfhp); 342 fh_put(dirfhp);
336 return nfsd_return_dirop(nfserr, resp); 343 return nfsd_return_dirop(nfserr, resp);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 702f64e820c3..a9269f142cc4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1284,6 +1284,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1284 * If it has, the parent directory should already be locked. 1284 * If it has, the parent directory should already be locked.
1285 */ 1285 */
1286 if (!resfhp->fh_dentry) { 1286 if (!resfhp->fh_dentry) {
1287 host_err = fh_want_write(fhp);
1288 if (host_err)
1289 goto out_nfserr;
1290
1287 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ 1291 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
1288 fh_lock_nested(fhp, I_MUTEX_PARENT); 1292 fh_lock_nested(fhp, I_MUTEX_PARENT);
1289 dchild = lookup_one_len(fname, dentry, flen); 1293 dchild = lookup_one_len(fname, dentry, flen);
@@ -1327,14 +1331,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1327 goto out; 1331 goto out;
1328 } 1332 }
1329 1333
1330 host_err = fh_want_write(fhp);
1331 if (host_err)
1332 goto out_nfserr;
1333
1334 /* 1334 /*
1335 * Get the dir op function pointer. 1335 * Get the dir op function pointer.
1336 */ 1336 */
1337 err = 0; 1337 err = 0;
1338 host_err = 0;
1338 switch (type) { 1339 switch (type) {
1339 case S_IFREG: 1340 case S_IFREG:
1340 host_err = vfs_create(dirp, dchild, iap->ia_mode, true); 1341 host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
@@ -1351,10 +1352,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1351 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1352 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1352 break; 1353 break;
1353 } 1354 }
1354 if (host_err < 0) { 1355 if (host_err < 0)
1355 fh_drop_write(fhp);
1356 goto out_nfserr; 1356 goto out_nfserr;
1357 }
1358 1357
1359 err = nfsd_create_setattr(rqstp, resfhp, iap); 1358 err = nfsd_create_setattr(rqstp, resfhp, iap);
1360 1359
@@ -1366,7 +1365,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1366 err2 = nfserrno(commit_metadata(fhp)); 1365 err2 = nfserrno(commit_metadata(fhp));
1367 if (err2) 1366 if (err2)
1368 err = err2; 1367 err = err2;
1369 fh_drop_write(fhp);
1370 /* 1368 /*
1371 * Update the file handle to get the new inode info. 1369 * Update the file handle to get the new inode info.
1372 */ 1370 */
@@ -1425,6 +1423,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1425 err = nfserr_notdir; 1423 err = nfserr_notdir;
1426 if (!dirp->i_op->lookup) 1424 if (!dirp->i_op->lookup)
1427 goto out; 1425 goto out;
1426
1427 host_err = fh_want_write(fhp);
1428 if (host_err)
1429 goto out_nfserr;
1430
1428 fh_lock_nested(fhp, I_MUTEX_PARENT); 1431 fh_lock_nested(fhp, I_MUTEX_PARENT);
1429 1432
1430 /* 1433 /*
@@ -1457,9 +1460,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1457 v_atime = verifier[1]&0x7fffffff; 1460 v_atime = verifier[1]&0x7fffffff;
1458 } 1461 }
1459 1462
1460 host_err = fh_want_write(fhp);
1461 if (host_err)
1462 goto out_nfserr;
1463 if (dchild->d_inode) { 1463 if (dchild->d_inode) {
1464 err = 0; 1464 err = 0;
1465 1465
@@ -1530,7 +1530,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1530 if (!err) 1530 if (!err)
1531 err = nfserrno(commit_metadata(fhp)); 1531 err = nfserrno(commit_metadata(fhp));
1532 1532
1533 fh_drop_write(fhp);
1534 /* 1533 /*
1535 * Update the filehandle to get the new inode info. 1534 * Update the filehandle to get the new inode info.
1536 */ 1535 */
@@ -1541,6 +1540,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1541 fh_unlock(fhp); 1540 fh_unlock(fhp);
1542 if (dchild && !IS_ERR(dchild)) 1541 if (dchild && !IS_ERR(dchild))
1543 dput(dchild); 1542 dput(dchild);
1543 fh_drop_write(fhp);
1544 return err; 1544 return err;
1545 1545
1546 out_nfserr: 1546 out_nfserr:
@@ -1621,6 +1621,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1621 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1621 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1622 if (err) 1622 if (err)
1623 goto out; 1623 goto out;
1624
1625 host_err = fh_want_write(fhp);
1626 if (host_err)
1627 goto out_nfserr;
1628
1624 fh_lock(fhp); 1629 fh_lock(fhp);
1625 dentry = fhp->fh_dentry; 1630 dentry = fhp->fh_dentry;
1626 dnew = lookup_one_len(fname, dentry, flen); 1631 dnew = lookup_one_len(fname, dentry, flen);
@@ -1628,10 +1633,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1628 if (IS_ERR(dnew)) 1633 if (IS_ERR(dnew))
1629 goto out_nfserr; 1634 goto out_nfserr;
1630 1635
1631 host_err = fh_want_write(fhp);
1632 if (host_err)
1633 goto out_nfserr;
1634
1635 if (unlikely(path[plen] != 0)) { 1636 if (unlikely(path[plen] != 0)) {
1636 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1637 char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1637 if (path_alloced == NULL) 1638 if (path_alloced == NULL)
@@ -1691,6 +1692,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1691 if (isdotent(name, len)) 1692 if (isdotent(name, len))
1692 goto out; 1693 goto out;
1693 1694
1695 host_err = fh_want_write(tfhp);
1696 if (host_err) {
1697 err = nfserrno(host_err);
1698 goto out;
1699 }
1700
1694 fh_lock_nested(ffhp, I_MUTEX_PARENT); 1701 fh_lock_nested(ffhp, I_MUTEX_PARENT);
1695 ddir = ffhp->fh_dentry; 1702 ddir = ffhp->fh_dentry;
1696 dirp = ddir->d_inode; 1703 dirp = ddir->d_inode;
@@ -1702,18 +1709,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1702 1709
1703 dold = tfhp->fh_dentry; 1710 dold = tfhp->fh_dentry;
1704 1711
1705 host_err = fh_want_write(tfhp);
1706 if (host_err) {
1707 err = nfserrno(host_err);
1708 goto out_dput;
1709 }
1710 err = nfserr_noent; 1712 err = nfserr_noent;
1711 if (!dold->d_inode) 1713 if (!dold->d_inode)
1712 goto out_drop_write; 1714 goto out_dput;
1713 host_err = nfsd_break_lease(dold->d_inode); 1715 host_err = nfsd_break_lease(dold->d_inode);
1714 if (host_err) { 1716 if (host_err) {
1715 err = nfserrno(host_err); 1717 err = nfserrno(host_err);
1716 goto out_drop_write; 1718 goto out_dput;
1717 } 1719 }
1718 host_err = vfs_link(dold, dirp, dnew); 1720 host_err = vfs_link(dold, dirp, dnew);
1719 if (!host_err) { 1721 if (!host_err) {
@@ -1726,12 +1728,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1726 else 1728 else
1727 err = nfserrno(host_err); 1729 err = nfserrno(host_err);
1728 } 1730 }
1729out_drop_write:
1730 fh_drop_write(tfhp);
1731out_dput: 1731out_dput:
1732 dput(dnew); 1732 dput(dnew);
1733out_unlock: 1733out_unlock:
1734 fh_unlock(ffhp); 1734 fh_unlock(ffhp);
1735 fh_drop_write(tfhp);
1735out: 1736out:
1736 return err; 1737 return err;
1737 1738
@@ -1774,6 +1775,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1774 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) 1775 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
1775 goto out; 1776 goto out;
1776 1777
1778 host_err = fh_want_write(ffhp);
1779 if (host_err) {
1780 err = nfserrno(host_err);
1781 goto out;
1782 }
1783
1777 /* cannot use fh_lock as we need deadlock protective ordering 1784 /* cannot use fh_lock as we need deadlock protective ordering
1778 * so do it by hand */ 1785 * so do it by hand */
1779 trap = lock_rename(tdentry, fdentry); 1786 trap = lock_rename(tdentry, fdentry);
@@ -1804,17 +1811,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1804 host_err = -EXDEV; 1811 host_err = -EXDEV;
1805 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) 1812 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1806 goto out_dput_new; 1813 goto out_dput_new;
1807 host_err = fh_want_write(ffhp);
1808 if (host_err)
1809 goto out_dput_new;
1810 1814
1811 host_err = nfsd_break_lease(odentry->d_inode); 1815 host_err = nfsd_break_lease(odentry->d_inode);
1812 if (host_err) 1816 if (host_err)
1813 goto out_drop_write; 1817 goto out_dput_new;
1814 if (ndentry->d_inode) { 1818 if (ndentry->d_inode) {
1815 host_err = nfsd_break_lease(ndentry->d_inode); 1819 host_err = nfsd_break_lease(ndentry->d_inode);
1816 if (host_err) 1820 if (host_err)
1817 goto out_drop_write; 1821 goto out_dput_new;
1818 } 1822 }
1819 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1823 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1820 if (!host_err) { 1824 if (!host_err) {
@@ -1822,8 +1826,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1822 if (!host_err) 1826 if (!host_err)
1823 host_err = commit_metadata(ffhp); 1827 host_err = commit_metadata(ffhp);
1824 } 1828 }
1825out_drop_write:
1826 fh_drop_write(ffhp);
1827 out_dput_new: 1829 out_dput_new:
1828 dput(ndentry); 1830 dput(ndentry);
1829 out_dput_old: 1831 out_dput_old:
@@ -1839,6 +1841,7 @@ out_drop_write:
1839 fill_post_wcc(tfhp); 1841 fill_post_wcc(tfhp);
1840 unlock_rename(tdentry, fdentry); 1842 unlock_rename(tdentry, fdentry);
1841 ffhp->fh_locked = tfhp->fh_locked = 0; 1843 ffhp->fh_locked = tfhp->fh_locked = 0;
1844 fh_drop_write(ffhp);
1842 1845
1843out: 1846out:
1844 return err; 1847 return err;
@@ -1864,6 +1867,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1864 if (err) 1867 if (err)
1865 goto out; 1868 goto out;
1866 1869
1870 host_err = fh_want_write(fhp);
1871 if (host_err)
1872 goto out_nfserr;
1873
1867 fh_lock_nested(fhp, I_MUTEX_PARENT); 1874 fh_lock_nested(fhp, I_MUTEX_PARENT);
1868 dentry = fhp->fh_dentry; 1875 dentry = fhp->fh_dentry;
1869 dirp = dentry->d_inode; 1876 dirp = dentry->d_inode;
@@ -1882,21 +1889,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1882 if (!type) 1889 if (!type)
1883 type = rdentry->d_inode->i_mode & S_IFMT; 1890 type = rdentry->d_inode->i_mode & S_IFMT;
1884 1891
1885 host_err = fh_want_write(fhp);
1886 if (host_err)
1887 goto out_put;
1888
1889 host_err = nfsd_break_lease(rdentry->d_inode); 1892 host_err = nfsd_break_lease(rdentry->d_inode);
1890 if (host_err) 1893 if (host_err)
1891 goto out_drop_write; 1894 goto out_put;
1892 if (type != S_IFDIR) 1895 if (type != S_IFDIR)
1893 host_err = vfs_unlink(dirp, rdentry); 1896 host_err = vfs_unlink(dirp, rdentry);
1894 else 1897 else
1895 host_err = vfs_rmdir(dirp, rdentry); 1898 host_err = vfs_rmdir(dirp, rdentry);
1896 if (!host_err) 1899 if (!host_err)
1897 host_err = commit_metadata(fhp); 1900 host_err = commit_metadata(fhp);
1898out_drop_write:
1899 fh_drop_write(fhp);
1900out_put: 1901out_put:
1901 dput(rdentry); 1902 dput(rdentry);
1902 1903
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index ec0611b2b738..359594c393d2 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
110 110
111static inline int fh_want_write(struct svc_fh *fh) 111static inline int fh_want_write(struct svc_fh *fh)
112{ 112{
113 return mnt_want_write(fh->fh_export->ex_path.mnt); 113 int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
114
115 if (!ret)
116 fh->fh_want_write = 1;
117 return ret;
114} 118}
115 119
116static inline void fh_drop_write(struct svc_fh *fh) 120static inline void fh_drop_write(struct svc_fh *fh)
117{ 121{
118 mnt_drop_write(fh->fh_export->ex_path.mnt); 122 if (fh->fh_want_write) {
123 fh->fh_want_write = 0;
124 mnt_drop_write(fh->fh_export->ex_path.mnt);
125 }
119} 126}
120 127
121#endif /* LINUX_NFSD_VFS_H */ 128#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 62cebc8e1a1f..a4d56ac02e6c 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
69 struct page *page = vmf->page; 69 struct page *page = vmf->page;
70 struct inode *inode = vma->vm_file->f_dentry->d_inode; 70 struct inode *inode = vma->vm_file->f_dentry->d_inode;
71 struct nilfs_transaction_info ti; 71 struct nilfs_transaction_info ti;
72 int ret; 72 int ret = 0;
73 73
74 if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) 74 if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
75 return VM_FAULT_SIGBUS; /* -ENOSPC */ 75 return VM_FAULT_SIGBUS; /* -ENOSPC */
76 76
77 sb_start_pagefault(inode->i_sb);
77 lock_page(page); 78 lock_page(page);
78 if (page->mapping != inode->i_mapping || 79 if (page->mapping != inode->i_mapping ||
79 page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { 80 page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
80 unlock_page(page); 81 unlock_page(page);
81 return VM_FAULT_NOPAGE; /* make the VM retry the fault */ 82 ret = -EFAULT; /* make the VM retry the fault */
83 goto out;
82 } 84 }
83 85
84 /* 86 /*
@@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
112 ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); 114 ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
113 /* never returns -ENOMEM, but may return -ENOSPC */ 115 /* never returns -ENOMEM, but may return -ENOSPC */
114 if (unlikely(ret)) 116 if (unlikely(ret))
115 return VM_FAULT_SIGBUS; 117 goto out;
116 118
117 ret = block_page_mkwrite(vma, vmf, nilfs_get_block); 119 ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
118 if (ret != VM_FAULT_LOCKED) { 120 if (ret) {
119 nilfs_transaction_abort(inode->i_sb); 121 nilfs_transaction_abort(inode->i_sb);
120 return ret; 122 goto out;
121 } 123 }
122 nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); 124 nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
123 nilfs_transaction_commit(inode->i_sb); 125 nilfs_transaction_commit(inode->i_sb);
124 126
125 mapped: 127 mapped:
126 wait_on_page_writeback(page); 128 wait_on_page_writeback(page);
127 return VM_FAULT_LOCKED; 129 out:
130 sb_end_pagefault(inode->i_sb);
131 return block_page_mkwrite_return(ret);
128} 132}
129 133
130static const struct vm_operations_struct nilfs_file_vm_ops = { 134static const struct vm_operations_struct nilfs_file_vm_ops = {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 0b6387c67e6c..fdb180769485 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
660 goto out_free; 660 goto out_free;
661 } 661 }
662 662
663 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
664
665 ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); 663 ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]);
666 if (ret < 0) 664 if (ret < 0)
667 printk(KERN_ERR "NILFS: GC failed during preparation: " 665 printk(KERN_ERR "NILFS: GC failed during preparation: "
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 88e11fb346b6..a5752a589932 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb,
189 if (ret > 0) 189 if (ret > 0)
190 return 0; 190 return 0;
191 191
192 vfs_check_frozen(sb, SB_FREEZE_WRITE); 192 sb_start_intwrite(sb);
193 193
194 nilfs = sb->s_fs_info; 194 nilfs = sb->s_fs_info;
195 down_read(&nilfs->ns_segctor_sem); 195 down_read(&nilfs->ns_segctor_sem);
@@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb,
205 current->journal_info = ti->ti_save; 205 current->journal_info = ti->ti_save;
206 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 206 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
207 kmem_cache_free(nilfs_transaction_cachep, ti); 207 kmem_cache_free(nilfs_transaction_cachep, ti);
208 sb_end_intwrite(sb);
208 return ret; 209 return ret;
209} 210}
210 211
@@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb)
246 err = nilfs_construct_segment(sb); 247 err = nilfs_construct_segment(sb);
247 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 248 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
248 kmem_cache_free(nilfs_transaction_cachep, ti); 249 kmem_cache_free(nilfs_transaction_cachep, ti);
250 sb_end_intwrite(sb);
249 return err; 251 return err;
250} 252}
251 253
@@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb)
264 current->journal_info = ti->ti_save; 266 current->journal_info = ti->ti_save;
265 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 267 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
266 kmem_cache_free(nilfs_transaction_cachep, ti); 268 kmem_cache_free(nilfs_transaction_cachep, ti);
269 sb_end_intwrite(sb);
267} 270}
268 271
269void nilfs_relax_pressure_in_lock(struct super_block *sb) 272void nilfs_relax_pressure_in_lock(struct super_block *sb)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7389d2d5e51d..1ecf46448f85 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2084 if (err) 2084 if (err)
2085 return err; 2085 return err;
2086 pos = *ppos; 2086 pos = *ppos;
2087 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2088 /* We can write back this queue in page reclaim. */ 2087 /* We can write back this queue in page reclaim. */
2089 current->backing_dev_info = mapping->backing_dev_info; 2088 current->backing_dev_info = mapping->backing_dev_info;
2090 written = 0; 2089 written = 0;
@@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2119 2118
2120 BUG_ON(iocb->ki_pos != pos); 2119 BUG_ON(iocb->ki_pos != pos);
2121 2120
2121 sb_start_write(inode->i_sb);
2122 mutex_lock(&inode->i_mutex); 2122 mutex_lock(&inode->i_mutex);
2123 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); 2123 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2124 mutex_unlock(&inode->i_mutex); 2124 mutex_unlock(&inode->i_mutex);
@@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2127 if (err < 0) 2127 if (err < 0)
2128 ret = err; 2128 ret = err;
2129 } 2129 }
2130 sb_end_write(inode->i_sb);
2130 return ret; 2131 return ret;
2131} 2132}
2132 2133
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7602783d7f41..46a1f6d75104 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1971{ 1971{
1972 struct inode *inode = file->f_path.dentry->d_inode; 1972 struct inode *inode = file->f_path.dentry->d_inode;
1973 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1973 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1974 int ret;
1974 1975
1975 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && 1976 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
1976 !ocfs2_writes_unwritten_extents(osb)) 1977 !ocfs2_writes_unwritten_extents(osb))
@@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1985 if (!(file->f_mode & FMODE_WRITE)) 1986 if (!(file->f_mode & FMODE_WRITE))
1986 return -EBADF; 1987 return -EBADF;
1987 1988
1988 return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); 1989 ret = mnt_want_write_file(file);
1990 if (ret)
1991 return ret;
1992 ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
1993 mnt_drop_write_file(file);
1994 return ret;
1989} 1995}
1990 1996
1991static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, 1997static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
@@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2261 if (iocb->ki_left == 0) 2267 if (iocb->ki_left == 0)
2262 return 0; 2268 return 0;
2263 2269
2264 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 2270 sb_start_write(inode->i_sb);
2265 2271
2266 appending = file->f_flags & O_APPEND ? 1 : 0; 2272 appending = file->f_flags & O_APPEND ? 1 : 0;
2267 direct_io = file->f_flags & O_DIRECT ? 1 : 0; 2273 direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2436,6 +2442,7 @@ out_sems:
2436 ocfs2_iocb_clear_sem_locked(iocb); 2442 ocfs2_iocb_clear_sem_locked(iocb);
2437 2443
2438 mutex_unlock(&inode->i_mutex); 2444 mutex_unlock(&inode->i_mutex);
2445 sb_end_write(inode->i_sb);
2439 2446
2440 if (written) 2447 if (written)
2441 ret = written; 2448 ret = written;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index d96f7f81d8dd..f20edcbfe700 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
928 if (get_user(new_clusters, (int __user *)arg)) 928 if (get_user(new_clusters, (int __user *)arg))
929 return -EFAULT; 929 return -EFAULT;
930 930
931 return ocfs2_group_extend(inode, new_clusters); 931 status = mnt_want_write_file(filp);
932 if (status)
933 return status;
934 status = ocfs2_group_extend(inode, new_clusters);
935 mnt_drop_write_file(filp);
936 return status;
932 case OCFS2_IOC_GROUP_ADD: 937 case OCFS2_IOC_GROUP_ADD:
933 case OCFS2_IOC_GROUP_ADD64: 938 case OCFS2_IOC_GROUP_ADD64:
934 if (!capable(CAP_SYS_RESOURCE)) 939 if (!capable(CAP_SYS_RESOURCE))
@@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
937 if (copy_from_user(&input, (int __user *) arg, sizeof(input))) 942 if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
938 return -EFAULT; 943 return -EFAULT;
939 944
940 return ocfs2_group_add(inode, &input); 945 status = mnt_want_write_file(filp);
946 if (status)
947 return status;
948 status = ocfs2_group_add(inode, &input);
949 mnt_drop_write_file(filp);
950 return status;
941 case OCFS2_IOC_REFLINK: 951 case OCFS2_IOC_REFLINK:
942 if (copy_from_user(&args, argp, sizeof(args))) 952 if (copy_from_user(&args, argp, sizeof(args)))
943 return -EFAULT; 953 return -EFAULT;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 0a42ae96dca7..2dd36af79e26 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
355 if (journal_current_handle()) 355 if (journal_current_handle())
356 return jbd2_journal_start(journal, max_buffs); 356 return jbd2_journal_start(journal, max_buffs);
357 357
358 sb_start_intwrite(osb->sb);
359
358 down_read(&osb->journal->j_trans_barrier); 360 down_read(&osb->journal->j_trans_barrier);
359 361
360 handle = jbd2_journal_start(journal, max_buffs); 362 handle = jbd2_journal_start(journal, max_buffs);
361 if (IS_ERR(handle)) { 363 if (IS_ERR(handle)) {
362 up_read(&osb->journal->j_trans_barrier); 364 up_read(&osb->journal->j_trans_barrier);
365 sb_end_intwrite(osb->sb);
363 366
364 mlog_errno(PTR_ERR(handle)); 367 mlog_errno(PTR_ERR(handle));
365 368
@@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
388 if (ret < 0) 391 if (ret < 0)
389 mlog_errno(ret); 392 mlog_errno(ret);
390 393
391 if (!nested) 394 if (!nested) {
392 up_read(&journal->j_trans_barrier); 395 up_read(&journal->j_trans_barrier);
396 sb_end_intwrite(osb->sb);
397 }
393 398
394 return ret; 399 return ret;
395} 400}
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9cd41083e991..d150372fd81d 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
136 sigset_t oldset; 136 sigset_t oldset;
137 int ret; 137 int ret;
138 138
139 sb_start_pagefault(inode->i_sb);
139 ocfs2_block_signals(&oldset); 140 ocfs2_block_signals(&oldset);
140 141
141 /* 142 /*
@@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
165 166
166out: 167out:
167 ocfs2_unblock_signals(&oldset); 168 ocfs2_unblock_signals(&oldset);
169 sb_end_pagefault(inode->i_sb);
168 return ret; 170 return ret;
169} 171}
170 172
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9f32d7cbb7a3..30a055049e16 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4466,20 +4466,11 @@ int ocfs2_reflink_ioctl(struct inode *inode,
4466 goto out_dput; 4466 goto out_dput;
4467 } 4467 }
4468 4468
4469 error = mnt_want_write(new_path.mnt);
4470 if (error) {
4471 mlog_errno(error);
4472 goto out_dput;
4473 }
4474
4475 error = ocfs2_vfs_reflink(old_path.dentry, 4469 error = ocfs2_vfs_reflink(old_path.dentry,
4476 new_path.dentry->d_inode, 4470 new_path.dentry->d_inode,
4477 new_dentry, preserve); 4471 new_dentry, preserve);
4478 mnt_drop_write(new_path.mnt);
4479out_dput: 4472out_dput:
4480 dput(new_dentry); 4473 done_path_create(&new_path, new_dentry);
4481 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
4482 path_put(&new_path);
4483out: 4474out:
4484 path_put(&old_path); 4475 path_put(&old_path);
4485 4476
diff --git a/fs/open.c b/fs/open.c
index 1e914b397e12..f3d96e7e7b19 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
164 if (IS_APPEND(inode)) 164 if (IS_APPEND(inode))
165 goto out_putf; 165 goto out_putf;
166 166
167 sb_start_write(inode->i_sb);
167 error = locks_verify_truncate(inode, file, length); 168 error = locks_verify_truncate(inode, file, length);
168 if (!error) 169 if (!error)
169 error = security_path_truncate(&file->f_path); 170 error = security_path_truncate(&file->f_path);
170 if (!error) 171 if (!error)
171 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
173 sb_end_write(inode->i_sb);
172out_putf: 174out_putf:
173 fput(file); 175 fput(file);
174out: 176out:
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
266 if (!file->f_op->fallocate) 268 if (!file->f_op->fallocate)
267 return -EOPNOTSUPP; 269 return -EOPNOTSUPP;
268 270
269 return file->f_op->fallocate(file, mode, offset, len); 271 sb_start_write(inode->i_sb);
272 ret = file->f_op->fallocate(file, mode, offset, len);
273 sb_end_write(inode->i_sb);
274 return ret;
270} 275}
271 276
272SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
@@ -620,7 +625,7 @@ static inline int __get_file_write_access(struct inode *inode,
620 /* 625 /*
621 * Balanced in __fput() 626 * Balanced in __fput()
622 */ 627 */
623 error = mnt_want_write(mnt); 628 error = __mnt_want_write(mnt);
624 if (error) 629 if (error)
625 put_write_access(inode); 630 put_write_access(inode);
626 } 631 }
@@ -654,6 +659,7 @@ static int do_dentry_open(struct file *f,
654 if (unlikely(f->f_flags & O_PATH)) 659 if (unlikely(f->f_flags & O_PATH))
655 f->f_mode = FMODE_PATH; 660 f->f_mode = FMODE_PATH;
656 661
662 path_get(&f->f_path);
657 inode = f->f_path.dentry->d_inode; 663 inode = f->f_path.dentry->d_inode;
658 if (f->f_mode & FMODE_WRITE) { 664 if (f->f_mode & FMODE_WRITE) {
659 error = __get_file_write_access(inode, f->f_path.mnt); 665 error = __get_file_write_access(inode, f->f_path.mnt);
@@ -739,9 +745,7 @@ int finish_open(struct file *file, struct dentry *dentry,
739 int error; 745 int error;
740 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ 746 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
741 747
742 mntget(file->f_path.mnt); 748 file->f_path.dentry = dentry;
743 file->f_path.dentry = dget(dentry);
744
745 error = do_dentry_open(file, open, current_cred()); 749 error = do_dentry_open(file, open, current_cred());
746 if (!error) 750 if (!error)
747 *opened |= FILE_OPENED; 751 *opened |= FILE_OPENED;
@@ -784,7 +788,6 @@ struct file *dentry_open(const struct path *path, int flags,
784 788
785 f->f_flags = flags; 789 f->f_flags = flags;
786 f->f_path = *path; 790 f->f_path = *path;
787 path_get(&f->f_path);
788 error = do_dentry_open(f, NULL, cred); 791 error = do_dentry_open(f, NULL, cred);
789 if (!error) { 792 if (!error) {
790 error = open_check_o_direct(f); 793 error = open_check_o_direct(f);
diff --git a/fs/pipe.c b/fs/pipe.c
index 95cbd6b227e6..8d85d7068c1e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1016,18 +1016,16 @@ fail_inode:
1016 return NULL; 1016 return NULL;
1017} 1017}
1018 1018
1019struct file *create_write_pipe(int flags) 1019int create_pipe_files(struct file **res, int flags)
1020{ 1020{
1021 int err; 1021 int err;
1022 struct inode *inode; 1022 struct inode *inode = get_pipe_inode();
1023 struct file *f; 1023 struct file *f;
1024 struct path path; 1024 struct path path;
1025 struct qstr name = { .name = "" }; 1025 static struct qstr name = { .name = "" };
1026 1026
1027 err = -ENFILE;
1028 inode = get_pipe_inode();
1029 if (!inode) 1027 if (!inode)
1030 goto err; 1028 return -ENFILE;
1031 1029
1032 err = -ENOMEM; 1030 err = -ENOMEM;
1033 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); 1031 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
@@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags)
1041 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); 1039 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
1042 if (!f) 1040 if (!f)
1043 goto err_dentry; 1041 goto err_dentry;
1044 f->f_mapping = inode->i_mapping;
1045 1042
1046 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); 1043 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
1047 f->f_version = 0;
1048 1044
1049 return f; 1045 res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
1046 if (!res[0])
1047 goto err_file;
1048
1049 path_get(&path);
1050 res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1051 res[1] = f;
1052 return 0;
1050 1053
1051 err_dentry: 1054err_file:
1055 put_filp(f);
1056err_dentry:
1052 free_pipe_info(inode); 1057 free_pipe_info(inode);
1053 path_put(&path); 1058 path_put(&path);
1054 return ERR_PTR(err); 1059 return err;
1055 1060
1056 err_inode: 1061err_inode:
1057 free_pipe_info(inode); 1062 free_pipe_info(inode);
1058 iput(inode); 1063 iput(inode);
1059 err: 1064 return err;
1060 return ERR_PTR(err);
1061}
1062
1063void free_write_pipe(struct file *f)
1064{
1065 free_pipe_info(f->f_dentry->d_inode);
1066 path_put(&f->f_path);
1067 put_filp(f);
1068}
1069
1070struct file *create_read_pipe(struct file *wrf, int flags)
1071{
1072 /* Grab pipe from the writer */
1073 struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
1074 &read_pipefifo_fops);
1075 if (!f)
1076 return ERR_PTR(-ENFILE);
1077
1078 path_get(&wrf->f_path);
1079 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1080
1081 return f;
1082} 1065}
1083 1066
1084int do_pipe_flags(int *fd, int flags) 1067int do_pipe_flags(int *fd, int flags)
1085{ 1068{
1086 struct file *fw, *fr; 1069 struct file *files[2];
1087 int error; 1070 int error;
1088 int fdw, fdr; 1071 int fdw, fdr;
1089 1072
1090 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) 1073 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
1091 return -EINVAL; 1074 return -EINVAL;
1092 1075
1093 fw = create_write_pipe(flags); 1076 error = create_pipe_files(files, flags);
1094 if (IS_ERR(fw)) 1077 if (error)
1095 return PTR_ERR(fw); 1078 return error;
1096 fr = create_read_pipe(fw, flags);
1097 error = PTR_ERR(fr);
1098 if (IS_ERR(fr))
1099 goto err_write_pipe;
1100 1079
1101 error = get_unused_fd_flags(flags); 1080 error = get_unused_fd_flags(flags);
1102 if (error < 0) 1081 if (error < 0)
@@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags)
1109 fdw = error; 1088 fdw = error;
1110 1089
1111 audit_fd_pair(fdr, fdw); 1090 audit_fd_pair(fdr, fdw);
1112 fd_install(fdr, fr); 1091 fd_install(fdr, files[0]);
1113 fd_install(fdw, fw); 1092 fd_install(fdw, files[1]);
1114 fd[0] = fdr; 1093 fd[0] = fdr;
1115 fd[1] = fdw; 1094 fd[1] = fdw;
1116 1095
@@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags)
1119 err_fdr: 1098 err_fdr:
1120 put_unused_fd(fdr); 1099 put_unused_fd(fdr);
1121 err_read_pipe: 1100 err_read_pipe:
1122 path_put(&fr->f_path); 1101 fput(files[0]);
1123 put_filp(fr); 1102 fput(files[1]);
1124 err_write_pipe:
1125 free_write_pipe(fw);
1126 return error; 1103 return error;
1127} 1104}
1128 1105
diff --git a/fs/splice.c b/fs/splice.c
index 7bf08fa22ec9..41514dd89462 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
996 }; 996 };
997 ssize_t ret; 997 ssize_t ret;
998 998
999 sb_start_write(inode->i_sb);
1000
999 pipe_lock(pipe); 1001 pipe_lock(pipe);
1000 1002
1001 splice_from_pipe_begin(&sd); 1003 splice_from_pipe_begin(&sd);
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1034 *ppos += ret; 1036 *ppos += ret;
1035 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 1037 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
1036 } 1038 }
1039 sb_end_write(inode->i_sb);
1037 1040
1038 return ret; 1041 return ret;
1039} 1042}
diff --git a/fs/super.c b/fs/super.c
index 4bf714459a4b..b05cf47463d0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,12 +33,19 @@
33#include <linux/rculist_bl.h> 33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h> 34#include <linux/cleancache.h>
35#include <linux/fsnotify.h> 35#include <linux/fsnotify.h>
36#include <linux/lockdep.h>
36#include "internal.h" 37#include "internal.h"
37 38
38 39
39LIST_HEAD(super_blocks); 40LIST_HEAD(super_blocks);
40DEFINE_SPINLOCK(sb_lock); 41DEFINE_SPINLOCK(sb_lock);
41 42
43static char *sb_writers_name[SB_FREEZE_LEVELS] = {
44 "sb_writers",
45 "sb_pagefaults",
46 "sb_internal",
47};
48
42/* 49/*
43 * One thing we have to be careful of with a per-sb shrinker is that we don't 50 * One thing we have to be careful of with a per-sb shrinker is that we don't
44 * drop the last active reference to the superblock from within the shrinker. 51 * drop the last active reference to the superblock from within the shrinker.
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
102 return total_objects; 109 return total_objects;
103} 110}
104 111
112static int init_sb_writers(struct super_block *s, struct file_system_type *type)
113{
114 int err;
115 int i;
116
117 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
118 err = percpu_counter_init(&s->s_writers.counter[i], 0);
119 if (err < 0)
120 goto err_out;
121 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
122 &type->s_writers_key[i], 0);
123 }
124 init_waitqueue_head(&s->s_writers.wait);
125 init_waitqueue_head(&s->s_writers.wait_unfrozen);
126 return 0;
127err_out:
128 while (--i >= 0)
129 percpu_counter_destroy(&s->s_writers.counter[i]);
130 return err;
131}
132
133static void destroy_sb_writers(struct super_block *s)
134{
135 int i;
136
137 for (i = 0; i < SB_FREEZE_LEVELS; i++)
138 percpu_counter_destroy(&s->s_writers.counter[i]);
139}
140
105/** 141/**
106 * alloc_super - create new superblock 142 * alloc_super - create new superblock
107 * @type: filesystem type superblock should belong to 143 * @type: filesystem type superblock should belong to
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
117 153
118 if (s) { 154 if (s) {
119 if (security_sb_alloc(s)) { 155 if (security_sb_alloc(s)) {
156 /*
157 * We cannot call security_sb_free() without
158 * security_sb_alloc() succeeding. So bail out manually
159 */
120 kfree(s); 160 kfree(s);
121 s = NULL; 161 s = NULL;
122 goto out; 162 goto out;
123 } 163 }
124#ifdef CONFIG_SMP 164#ifdef CONFIG_SMP
125 s->s_files = alloc_percpu(struct list_head); 165 s->s_files = alloc_percpu(struct list_head);
126 if (!s->s_files) { 166 if (!s->s_files)
127 security_sb_free(s); 167 goto err_out;
128 kfree(s); 168 else {
129 s = NULL;
130 goto out;
131 } else {
132 int i; 169 int i;
133 170
134 for_each_possible_cpu(i) 171 for_each_possible_cpu(i)
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
137#else 174#else
138 INIT_LIST_HEAD(&s->s_files); 175 INIT_LIST_HEAD(&s->s_files);
139#endif 176#endif
177 if (init_sb_writers(s, type))
178 goto err_out;
140 s->s_flags = flags; 179 s->s_flags = flags;
141 s->s_bdi = &default_backing_dev_info; 180 s->s_bdi = &default_backing_dev_info;
142 INIT_HLIST_NODE(&s->s_instances); 181 INIT_HLIST_NODE(&s->s_instances);
@@ -178,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
178 mutex_init(&s->s_dquot.dqio_mutex); 217 mutex_init(&s->s_dquot.dqio_mutex);
179 mutex_init(&s->s_dquot.dqonoff_mutex); 218 mutex_init(&s->s_dquot.dqonoff_mutex);
180 init_rwsem(&s->s_dquot.dqptr_sem); 219 init_rwsem(&s->s_dquot.dqptr_sem);
181 init_waitqueue_head(&s->s_wait_unfrozen);
182 s->s_maxbytes = MAX_NON_LFS; 220 s->s_maxbytes = MAX_NON_LFS;
183 s->s_op = &default_op; 221 s->s_op = &default_op;
184 s->s_time_gran = 1000000000; 222 s->s_time_gran = 1000000000;
@@ -190,6 +228,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
190 } 228 }
191out: 229out:
192 return s; 230 return s;
231err_out:
232 security_sb_free(s);
233#ifdef CONFIG_SMP
234 if (s->s_files)
235 free_percpu(s->s_files);
236#endif
237 destroy_sb_writers(s);
238 kfree(s);
239 s = NULL;
240 goto out;
193} 241}
194 242
195/** 243/**
@@ -203,6 +251,7 @@ static inline void destroy_super(struct super_block *s)
203#ifdef CONFIG_SMP 251#ifdef CONFIG_SMP
204 free_percpu(s->s_files); 252 free_percpu(s->s_files);
205#endif 253#endif
254 destroy_sb_writers(s);
206 security_sb_free(s); 255 security_sb_free(s);
207 WARN_ON(!list_empty(&s->s_mounts)); 256 WARN_ON(!list_empty(&s->s_mounts));
208 kfree(s->s_subtype); 257 kfree(s->s_subtype);
@@ -651,10 +700,11 @@ struct super_block *get_super_thawed(struct block_device *bdev)
651{ 700{
652 while (1) { 701 while (1) {
653 struct super_block *s = get_super(bdev); 702 struct super_block *s = get_super(bdev);
654 if (!s || s->s_frozen == SB_UNFROZEN) 703 if (!s || s->s_writers.frozen == SB_UNFROZEN)
655 return s; 704 return s;
656 up_read(&s->s_umount); 705 up_read(&s->s_umount);
657 vfs_check_frozen(s, SB_FREEZE_WRITE); 706 wait_event(s->s_writers.wait_unfrozen,
707 s->s_writers.frozen == SB_UNFROZEN);
658 put_super(s); 708 put_super(s);
659 } 709 }
660} 710}
@@ -732,7 +782,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
732 int retval; 782 int retval;
733 int remount_ro; 783 int remount_ro;
734 784
735 if (sb->s_frozen != SB_UNFROZEN) 785 if (sb->s_writers.frozen != SB_UNFROZEN)
736 return -EBUSY; 786 return -EBUSY;
737 787
738#ifdef CONFIG_BLOCK 788#ifdef CONFIG_BLOCK
@@ -1163,6 +1213,120 @@ out:
1163 return ERR_PTR(error); 1213 return ERR_PTR(error);
1164} 1214}
1165 1215
1216/*
1217 * This is an internal function, please use sb_end_{write,pagefault,intwrite}
1218 * instead.
1219 */
1220void __sb_end_write(struct super_block *sb, int level)
1221{
1222 percpu_counter_dec(&sb->s_writers.counter[level-1]);
1223 /*
1224 * Make sure s_writers are updated before we wake up waiters in
1225 * freeze_super().
1226 */
1227 smp_mb();
1228 if (waitqueue_active(&sb->s_writers.wait))
1229 wake_up(&sb->s_writers.wait);
1230 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
1231}
1232EXPORT_SYMBOL(__sb_end_write);
1233
1234#ifdef CONFIG_LOCKDEP
1235/*
1236 * We want lockdep to tell us about possible deadlocks with freezing but
1237 * it's it bit tricky to properly instrument it. Getting a freeze protection
1238 * works as getting a read lock but there are subtle problems. XFS for example
1239 * gets freeze protection on internal level twice in some cases, which is OK
1240 * only because we already hold a freeze protection also on higher level. Due
1241 * to these cases we have to tell lockdep we are doing trylock when we
1242 * already hold a freeze protection for a higher freeze level.
1243 */
1244static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
1245 unsigned long ip)
1246{
1247 int i;
1248
1249 if (!trylock) {
1250 for (i = 0; i < level - 1; i++)
1251 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1252 trylock = true;
1253 break;
1254 }
1255 }
1256 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
1257}
1258#endif
1259
1260/*
1261 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
1262 * instead.
1263 */
1264int __sb_start_write(struct super_block *sb, int level, bool wait)
1265{
1266retry:
1267 if (unlikely(sb->s_writers.frozen >= level)) {
1268 if (!wait)
1269 return 0;
1270 wait_event(sb->s_writers.wait_unfrozen,
1271 sb->s_writers.frozen < level);
1272 }
1273
1274#ifdef CONFIG_LOCKDEP
1275 acquire_freeze_lock(sb, level, !wait, _RET_IP_);
1276#endif
1277 percpu_counter_inc(&sb->s_writers.counter[level-1]);
1278 /*
1279 * Make sure counter is updated before we check for frozen.
1280 * freeze_super() first sets frozen and then checks the counter.
1281 */
1282 smp_mb();
1283 if (unlikely(sb->s_writers.frozen >= level)) {
1284 __sb_end_write(sb, level);
1285 goto retry;
1286 }
1287 return 1;
1288}
1289EXPORT_SYMBOL(__sb_start_write);
1290
1291/**
1292 * sb_wait_write - wait until all writers to given file system finish
1293 * @sb: the super for which we wait
1294 * @level: type of writers we wait for (normal vs page fault)
1295 *
1296 * This function waits until there are no writers of given type to given file
1297 * system. Caller of this function should make sure there can be no new writers
1298 * of type @level before calling this function. Otherwise this function can
1299 * livelock.
1300 */
1301static void sb_wait_write(struct super_block *sb, int level)
1302{
1303 s64 writers;
1304
1305 /*
1306 * We just cycle-through lockdep here so that it does not complain
1307 * about returning with lock to userspace
1308 */
1309 rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
1310 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
1311
1312 do {
1313 DEFINE_WAIT(wait);
1314
1315 /*
1316 * We use a barrier in prepare_to_wait() to separate setting
1317 * of frozen and checking of the counter
1318 */
1319 prepare_to_wait(&sb->s_writers.wait, &wait,
1320 TASK_UNINTERRUPTIBLE);
1321
1322 writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
1323 if (writers)
1324 schedule();
1325
1326 finish_wait(&sb->s_writers.wait, &wait);
1327 } while (writers);
1328}
1329
1166/** 1330/**
1167 * freeze_super - lock the filesystem and force it into a consistent state 1331 * freeze_super - lock the filesystem and force it into a consistent state
1168 * @sb: the super to lock 1332 * @sb: the super to lock
@@ -1170,6 +1334,31 @@ out:
1170 * Syncs the super to make sure the filesystem is consistent and calls the fs's 1334 * Syncs the super to make sure the filesystem is consistent and calls the fs's
1171 * freeze_fs. Subsequent calls to this without first thawing the fs will return 1335 * freeze_fs. Subsequent calls to this without first thawing the fs will return
1172 * -EBUSY. 1336 * -EBUSY.
1337 *
1338 * During this function, sb->s_writers.frozen goes through these values:
1339 *
1340 * SB_UNFROZEN: File system is normal, all writes progress as usual.
1341 *
1342 * SB_FREEZE_WRITE: The file system is in the process of being frozen. New
1343 * writes should be blocked, though page faults are still allowed. We wait for
1344 * all writes to complete and then proceed to the next stage.
1345 *
1346 * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
1347 * but internal fs threads can still modify the filesystem (although they
1348 * should not dirty new pages or inodes), writeback can run etc. After waiting
1349 * for all running page faults we sync the filesystem which will clean all
1350 * dirty pages and inodes (no new dirty pages or inodes can be created when
1351 * sync is running).
1352 *
1353 * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
1354 * modification are blocked (e.g. XFS preallocation truncation on inode
1355 * reclaim). This is usually implemented by blocking new transactions for
1356 * filesystems that have them and need this additional guard. After all
1357 * internal writers are finished we call ->freeze_fs() to finish filesystem
1358 * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
1359 * mostly auxiliary for filesystems to verify they do not modify frozen fs.
1360 *
1361 * sb->s_writers.frozen is protected by sb->s_umount.
1173 */ 1362 */
1174int freeze_super(struct super_block *sb) 1363int freeze_super(struct super_block *sb)
1175{ 1364{
@@ -1177,7 +1366,7 @@ int freeze_super(struct super_block *sb)
1177 1366
1178 atomic_inc(&sb->s_active); 1367 atomic_inc(&sb->s_active);
1179 down_write(&sb->s_umount); 1368 down_write(&sb->s_umount);
1180 if (sb->s_frozen) { 1369 if (sb->s_writers.frozen != SB_UNFROZEN) {
1181 deactivate_locked_super(sb); 1370 deactivate_locked_super(sb);
1182 return -EBUSY; 1371 return -EBUSY;
1183 } 1372 }
@@ -1188,33 +1377,53 @@ int freeze_super(struct super_block *sb)
1188 } 1377 }
1189 1378
1190 if (sb->s_flags & MS_RDONLY) { 1379 if (sb->s_flags & MS_RDONLY) {
1191 sb->s_frozen = SB_FREEZE_TRANS; 1380 /* Nothing to do really... */
1192 smp_wmb(); 1381 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1193 up_write(&sb->s_umount); 1382 up_write(&sb->s_umount);
1194 return 0; 1383 return 0;
1195 } 1384 }
1196 1385
1197 sb->s_frozen = SB_FREEZE_WRITE; 1386 /* From now on, no new normal writers can start */
1387 sb->s_writers.frozen = SB_FREEZE_WRITE;
1388 smp_wmb();
1389
1390 /* Release s_umount to preserve sb_start_write -> s_umount ordering */
1391 up_write(&sb->s_umount);
1392
1393 sb_wait_write(sb, SB_FREEZE_WRITE);
1394
1395 /* Now we go and block page faults... */
1396 down_write(&sb->s_umount);
1397 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1198 smp_wmb(); 1398 smp_wmb();
1199 1399
1400 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1401
1402 /* All writers are done so after syncing there won't be dirty data */
1200 sync_filesystem(sb); 1403 sync_filesystem(sb);
1201 1404
1202 sb->s_frozen = SB_FREEZE_TRANS; 1405 /* Now wait for internal filesystem counter */
1406 sb->s_writers.frozen = SB_FREEZE_FS;
1203 smp_wmb(); 1407 smp_wmb();
1408 sb_wait_write(sb, SB_FREEZE_FS);
1204 1409
1205 sync_blockdev(sb->s_bdev);
1206 if (sb->s_op->freeze_fs) { 1410 if (sb->s_op->freeze_fs) {
1207 ret = sb->s_op->freeze_fs(sb); 1411 ret = sb->s_op->freeze_fs(sb);
1208 if (ret) { 1412 if (ret) {
1209 printk(KERN_ERR 1413 printk(KERN_ERR
1210 "VFS:Filesystem freeze failed\n"); 1414 "VFS:Filesystem freeze failed\n");
1211 sb->s_frozen = SB_UNFROZEN; 1415 sb->s_writers.frozen = SB_UNFROZEN;
1212 smp_wmb(); 1416 smp_wmb();
1213 wake_up(&sb->s_wait_unfrozen); 1417 wake_up(&sb->s_writers.wait_unfrozen);
1214 deactivate_locked_super(sb); 1418 deactivate_locked_super(sb);
1215 return ret; 1419 return ret;
1216 } 1420 }
1217 } 1421 }
1422 /*
1423 * This is just for debugging purposes so that fs can warn if it
1424 * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
1425 */
1426 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1218 up_write(&sb->s_umount); 1427 up_write(&sb->s_umount);
1219 return 0; 1428 return 0;
1220} 1429}
@@ -1231,7 +1440,7 @@ int thaw_super(struct super_block *sb)
1231 int error; 1440 int error;
1232 1441
1233 down_write(&sb->s_umount); 1442 down_write(&sb->s_umount);
1234 if (sb->s_frozen == SB_UNFROZEN) { 1443 if (sb->s_writers.frozen == SB_UNFROZEN) {
1235 up_write(&sb->s_umount); 1444 up_write(&sb->s_umount);
1236 return -EINVAL; 1445 return -EINVAL;
1237 } 1446 }
@@ -1244,16 +1453,15 @@ int thaw_super(struct super_block *sb)
1244 if (error) { 1453 if (error) {
1245 printk(KERN_ERR 1454 printk(KERN_ERR
1246 "VFS:Filesystem thaw failed\n"); 1455 "VFS:Filesystem thaw failed\n");
1247 sb->s_frozen = SB_FREEZE_TRANS;
1248 up_write(&sb->s_umount); 1456 up_write(&sb->s_umount);
1249 return error; 1457 return error;
1250 } 1458 }
1251 } 1459 }
1252 1460
1253out: 1461out:
1254 sb->s_frozen = SB_UNFROZEN; 1462 sb->s_writers.frozen = SB_UNFROZEN;
1255 smp_wmb(); 1463 smp_wmb();
1256 wake_up(&sb->s_wait_unfrozen); 1464 wake_up(&sb->s_writers.wait_unfrozen);
1257 deactivate_locked_super(sb); 1465 deactivate_locked_super(sb);
1258 1466
1259 return 0; 1467 return 0;
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index a4759833d62d..614b2b544880 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
228 ret = 0; 228 ret = 0;
229 if (bb->vm_ops->page_mkwrite) 229 if (bb->vm_ops->page_mkwrite)
230 ret = bb->vm_ops->page_mkwrite(vma, vmf); 230 ret = bb->vm_ops->page_mkwrite(vma, vmf);
231 else
232 file_update_time(file);
231 233
232 sysfs_put_active(attr_sd); 234 sysfs_put_active(attr_sd);
233 return ret; 235 return ret;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 15052ff916ec..e562dd43f41f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -124,6 +124,12 @@ xfs_setfilesize_trans_alloc(
124 ioend->io_append_trans = tp; 124 ioend->io_append_trans = tp;
125 125
126 /* 126 /*
127 * We will pass freeze protection with a transaction. So tell lockdep
128 * we released it.
129 */
130 rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
131 1, _THIS_IP_);
132 /*
127 * We hand off the transaction to the completion thread now, so 133 * We hand off the transaction to the completion thread now, so
128 * clear the flag here. 134 * clear the flag here.
129 */ 135 */
@@ -199,6 +205,15 @@ xfs_end_io(
199 struct xfs_inode *ip = XFS_I(ioend->io_inode); 205 struct xfs_inode *ip = XFS_I(ioend->io_inode);
200 int error = 0; 206 int error = 0;
201 207
208 if (ioend->io_append_trans) {
209 /*
210 * We've got freeze protection passed with the transaction.
211 * Tell lockdep about it.
212 */
213 rwsem_acquire_read(
214 &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
215 0, 1, _THIS_IP_);
216 }
202 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 217 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
203 ioend->io_error = -EIO; 218 ioend->io_error = -EIO;
204 goto done; 219 goto done;
@@ -1425,6 +1440,9 @@ out_trans_cancel:
1425 if (ioend->io_append_trans) { 1440 if (ioend->io_append_trans) {
1426 current_set_flags_nested(&ioend->io_append_trans->t_pflags, 1441 current_set_flags_nested(&ioend->io_append_trans->t_pflags,
1427 PF_FSTRANS); 1442 PF_FSTRANS);
1443 rwsem_acquire_read(
1444 &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
1445 0, 1, _THIS_IP_);
1428 xfs_trans_cancel(ioend->io_append_trans, 0); 1446 xfs_trans_cancel(ioend->io_append_trans, 0);
1429 } 1447 }
1430out_destroy_ioend: 1448out_destroy_ioend:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c4559c6e6f2c..56afcdb2377d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -770,10 +770,12 @@ xfs_file_aio_write(
770 if (ocount == 0) 770 if (ocount == 0)
771 return 0; 771 return 0;
772 772
773 xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); 773 sb_start_write(inode->i_sb);
774 774
775 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 775 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
776 return -EIO; 776 ret = -EIO;
777 goto out;
778 }
777 779
778 if (unlikely(file->f_flags & O_DIRECT)) 780 if (unlikely(file->f_flags & O_DIRECT))
779 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); 781 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
@@ -792,6 +794,8 @@ xfs_file_aio_write(
792 ret = err; 794 ret = err;
793 } 795 }
794 796
797out:
798 sb_end_write(inode->i_sb);
795 return ret; 799 return ret;
796} 800}
797 801
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 1f1535d25a9b..0e0232c3b6d9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -364,9 +364,15 @@ xfs_fssetdm_by_handle(
364 if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) 364 if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
365 return -XFS_ERROR(EFAULT); 365 return -XFS_ERROR(EFAULT);
366 366
367 error = mnt_want_write_file(parfilp);
368 if (error)
369 return error;
370
367 dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); 371 dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
368 if (IS_ERR(dentry)) 372 if (IS_ERR(dentry)) {
373 mnt_drop_write_file(parfilp);
369 return PTR_ERR(dentry); 374 return PTR_ERR(dentry);
375 }
370 376
371 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { 377 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
372 error = -XFS_ERROR(EPERM); 378 error = -XFS_ERROR(EPERM);
@@ -382,6 +388,7 @@ xfs_fssetdm_by_handle(
382 fsd.fsd_dmstate); 388 fsd.fsd_dmstate);
383 389
384 out: 390 out:
391 mnt_drop_write_file(parfilp);
385 dput(dentry); 392 dput(dentry);
386 return error; 393 return error;
387} 394}
@@ -634,7 +641,11 @@ xfs_ioc_space(
634 if (ioflags & IO_INVIS) 641 if (ioflags & IO_INVIS)
635 attr_flags |= XFS_ATTR_DMI; 642 attr_flags |= XFS_ATTR_DMI;
636 643
644 error = mnt_want_write_file(filp);
645 if (error)
646 return error;
637 error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); 647 error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
648 mnt_drop_write_file(filp);
638 return -error; 649 return -error;
639} 650}
640 651
@@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr(
1163{ 1174{
1164 struct fsxattr fa; 1175 struct fsxattr fa;
1165 unsigned int mask; 1176 unsigned int mask;
1177 int error;
1166 1178
1167 if (copy_from_user(&fa, arg, sizeof(fa))) 1179 if (copy_from_user(&fa, arg, sizeof(fa)))
1168 return -EFAULT; 1180 return -EFAULT;
@@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr(
1171 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1183 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1172 mask |= FSX_NONBLOCK; 1184 mask |= FSX_NONBLOCK;
1173 1185
1174 return -xfs_ioctl_setattr(ip, &fa, mask); 1186 error = mnt_want_write_file(filp);
1187 if (error)
1188 return error;
1189 error = xfs_ioctl_setattr(ip, &fa, mask);
1190 mnt_drop_write_file(filp);
1191 return -error;
1175} 1192}
1176 1193
1177STATIC int 1194STATIC int
@@ -1196,6 +1213,7 @@ xfs_ioc_setxflags(
1196 struct fsxattr fa; 1213 struct fsxattr fa;
1197 unsigned int flags; 1214 unsigned int flags;
1198 unsigned int mask; 1215 unsigned int mask;
1216 int error;
1199 1217
1200 if (copy_from_user(&flags, arg, sizeof(flags))) 1218 if (copy_from_user(&flags, arg, sizeof(flags)))
1201 return -EFAULT; 1219 return -EFAULT;
@@ -1210,7 +1228,12 @@ xfs_ioc_setxflags(
1210 mask |= FSX_NONBLOCK; 1228 mask |= FSX_NONBLOCK;
1211 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); 1229 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
1212 1230
1213 return -xfs_ioctl_setattr(ip, &fa, mask); 1231 error = mnt_want_write_file(filp);
1232 if (error)
1233 return error;
1234 error = xfs_ioctl_setattr(ip, &fa, mask);
1235 mnt_drop_write_file(filp);
1236 return -error;
1214} 1237}
1215 1238
1216STATIC int 1239STATIC int
@@ -1385,8 +1408,13 @@ xfs_file_ioctl(
1385 if (copy_from_user(&dmi, arg, sizeof(dmi))) 1408 if (copy_from_user(&dmi, arg, sizeof(dmi)))
1386 return -XFS_ERROR(EFAULT); 1409 return -XFS_ERROR(EFAULT);
1387 1410
1411 error = mnt_want_write_file(filp);
1412 if (error)
1413 return error;
1414
1388 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, 1415 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
1389 dmi.fsd_dmstate); 1416 dmi.fsd_dmstate);
1417 mnt_drop_write_file(filp);
1390 return -error; 1418 return -error;
1391 } 1419 }
1392 1420
@@ -1434,7 +1462,11 @@ xfs_file_ioctl(
1434 1462
1435 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) 1463 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
1436 return -XFS_ERROR(EFAULT); 1464 return -XFS_ERROR(EFAULT);
1465 error = mnt_want_write_file(filp);
1466 if (error)
1467 return error;
1437 error = xfs_swapext(&sxp); 1468 error = xfs_swapext(&sxp);
1469 mnt_drop_write_file(filp);
1438 return -error; 1470 return -error;
1439 } 1471 }
1440 1472
@@ -1463,9 +1495,14 @@ xfs_file_ioctl(
1463 if (copy_from_user(&inout, arg, sizeof(inout))) 1495 if (copy_from_user(&inout, arg, sizeof(inout)))
1464 return -XFS_ERROR(EFAULT); 1496 return -XFS_ERROR(EFAULT);
1465 1497
1498 error = mnt_want_write_file(filp);
1499 if (error)
1500 return error;
1501
1466 /* input parameter is passed in resblks field of structure */ 1502 /* input parameter is passed in resblks field of structure */
1467 in = inout.resblks; 1503 in = inout.resblks;
1468 error = xfs_reserve_blocks(mp, &in, &inout); 1504 error = xfs_reserve_blocks(mp, &in, &inout);
1505 mnt_drop_write_file(filp);
1469 if (error) 1506 if (error)
1470 return -error; 1507 return -error;
1471 1508
@@ -1496,7 +1533,11 @@ xfs_file_ioctl(
1496 if (copy_from_user(&in, arg, sizeof(in))) 1533 if (copy_from_user(&in, arg, sizeof(in)))
1497 return -XFS_ERROR(EFAULT); 1534 return -XFS_ERROR(EFAULT);
1498 1535
1536 error = mnt_want_write_file(filp);
1537 if (error)
1538 return error;
1499 error = xfs_growfs_data(mp, &in); 1539 error = xfs_growfs_data(mp, &in);
1540 mnt_drop_write_file(filp);
1500 return -error; 1541 return -error;
1501 } 1542 }
1502 1543
@@ -1506,7 +1547,11 @@ xfs_file_ioctl(
1506 if (copy_from_user(&in, arg, sizeof(in))) 1547 if (copy_from_user(&in, arg, sizeof(in)))
1507 return -XFS_ERROR(EFAULT); 1548 return -XFS_ERROR(EFAULT);
1508 1549
1550 error = mnt_want_write_file(filp);
1551 if (error)
1552 return error;
1509 error = xfs_growfs_log(mp, &in); 1553 error = xfs_growfs_log(mp, &in);
1554 mnt_drop_write_file(filp);
1510 return -error; 1555 return -error;
1511 } 1556 }
1512 1557
@@ -1516,7 +1561,11 @@ xfs_file_ioctl(
1516 if (copy_from_user(&in, arg, sizeof(in))) 1561 if (copy_from_user(&in, arg, sizeof(in)))
1517 return -XFS_ERROR(EFAULT); 1562 return -XFS_ERROR(EFAULT);
1518 1563
1564 error = mnt_want_write_file(filp);
1565 if (error)
1566 return error;
1519 error = xfs_growfs_rt(mp, &in); 1567 error = xfs_growfs_rt(mp, &in);
1568 mnt_drop_write_file(filp);
1520 return -error; 1569 return -error;
1521 } 1570 }
1522 1571
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c4f2da0d2bf5..1244274a5674 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -600,7 +600,11 @@ xfs_file_compat_ioctl(
600 600
601 if (xfs_compat_growfs_data_copyin(&in, arg)) 601 if (xfs_compat_growfs_data_copyin(&in, arg))
602 return -XFS_ERROR(EFAULT); 602 return -XFS_ERROR(EFAULT);
603 error = mnt_want_write_file(filp);
604 if (error)
605 return error;
603 error = xfs_growfs_data(mp, &in); 606 error = xfs_growfs_data(mp, &in);
607 mnt_drop_write_file(filp);
604 return -error; 608 return -error;
605 } 609 }
606 case XFS_IOC_FSGROWFSRT_32: { 610 case XFS_IOC_FSGROWFSRT_32: {
@@ -608,7 +612,11 @@ xfs_file_compat_ioctl(
608 612
609 if (xfs_compat_growfs_rt_copyin(&in, arg)) 613 if (xfs_compat_growfs_rt_copyin(&in, arg))
610 return -XFS_ERROR(EFAULT); 614 return -XFS_ERROR(EFAULT);
615 error = mnt_want_write_file(filp);
616 if (error)
617 return error;
611 error = xfs_growfs_rt(mp, &in); 618 error = xfs_growfs_rt(mp, &in);
619 mnt_drop_write_file(filp);
612 return -error; 620 return -error;
613 } 621 }
614#endif 622#endif
@@ -627,7 +635,11 @@ xfs_file_compat_ioctl(
627 offsetof(struct xfs_swapext, sx_stat)) || 635 offsetof(struct xfs_swapext, sx_stat)) ||
628 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) 636 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
629 return -XFS_ERROR(EFAULT); 637 return -XFS_ERROR(EFAULT);
638 error = mnt_want_write_file(filp);
639 if (error)
640 return error;
630 error = xfs_swapext(&sxp); 641 error = xfs_swapext(&sxp);
642 mnt_drop_write_file(filp);
631 return -error; 643 return -error;
632 } 644 }
633 case XFS_IOC_FSBULKSTAT_32: 645 case XFS_IOC_FSBULKSTAT_32:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 915edf6639f0..973dff6ad935 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -680,9 +680,9 @@ xfs_iomap_write_unwritten(
680 * the same inode that we complete here and might deadlock 680 * the same inode that we complete here and might deadlock
681 * on the iolock. 681 * on the iolock.
682 */ 682 */
683 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 683 sb_start_intwrite(mp->m_super);
684 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); 684 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
685 tp->t_flags |= XFS_TRANS_RESERVE; 685 tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
686 error = xfs_trans_reserve(tp, resblks, 686 error = xfs_trans_reserve(tp, resblks,
687 XFS_WRITE_LOG_RES(mp), 0, 687 XFS_WRITE_LOG_RES(mp), 0,
688 XFS_TRANS_PERM_LOG_RES, 688 XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 711ca51ca3d7..29c2f83d4147 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1551,7 +1551,7 @@ xfs_unmountfs(
1551int 1551int
1552xfs_fs_writable(xfs_mount_t *mp) 1552xfs_fs_writable(xfs_mount_t *mp)
1553{ 1553{
1554 return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || 1554 return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) ||
1555 (mp->m_flags & XFS_MOUNT_RDONLY)); 1555 (mp->m_flags & XFS_MOUNT_RDONLY));
1556} 1556}
1557 1557
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8724336a9a08..05a05a7b6119 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -311,9 +311,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
311#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ 311#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
312#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ 312#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
313 313
314#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
315#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
316
317/* 314/*
318 * Flags for xfs_mountfs 315 * Flags for xfs_mountfs
319 */ 316 */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 97304f10e78a..96548176db80 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -403,7 +403,7 @@ xfs_sync_worker(
403 if (!(mp->m_super->s_flags & MS_ACTIVE) && 403 if (!(mp->m_super->s_flags & MS_ACTIVE) &&
404 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 404 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
405 /* dgc: errors ignored here */ 405 /* dgc: errors ignored here */
406 if (mp->m_super->s_frozen == SB_UNFROZEN && 406 if (mp->m_super->s_writers.frozen == SB_UNFROZEN &&
407 xfs_log_need_covered(mp)) 407 xfs_log_need_covered(mp))
408 error = xfs_fs_log_dummy(mp); 408 error = xfs_fs_log_dummy(mp);
409 else 409 else
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index fdf324508c5e..06ed520a767f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -576,8 +576,12 @@ xfs_trans_alloc(
576 xfs_mount_t *mp, 576 xfs_mount_t *mp,
577 uint type) 577 uint type)
578{ 578{
579 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 579 xfs_trans_t *tp;
580 return _xfs_trans_alloc(mp, type, KM_SLEEP); 580
581 sb_start_intwrite(mp->m_super);
582 tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
583 tp->t_flags |= XFS_TRANS_FREEZE_PROT;
584 return tp;
581} 585}
582 586
583xfs_trans_t * 587xfs_trans_t *
@@ -588,6 +592,7 @@ _xfs_trans_alloc(
588{ 592{
589 xfs_trans_t *tp; 593 xfs_trans_t *tp;
590 594
595 WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
591 atomic_inc(&mp->m_active_trans); 596 atomic_inc(&mp->m_active_trans);
592 597
593 tp = kmem_zone_zalloc(xfs_trans_zone, memflags); 598 tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
@@ -611,6 +616,8 @@ xfs_trans_free(
611 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); 616 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
612 617
613 atomic_dec(&tp->t_mountp->m_active_trans); 618 atomic_dec(&tp->t_mountp->m_active_trans);
619 if (tp->t_flags & XFS_TRANS_FREEZE_PROT)
620 sb_end_intwrite(tp->t_mountp->m_super);
614 xfs_trans_free_dqinfo(tp); 621 xfs_trans_free_dqinfo(tp);
615 kmem_zone_free(xfs_trans_zone, tp); 622 kmem_zone_free(xfs_trans_zone, tp);
616} 623}
@@ -643,7 +650,11 @@ xfs_trans_dup(
643 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 650 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
644 ASSERT(tp->t_ticket != NULL); 651 ASSERT(tp->t_ticket != NULL);
645 652
646 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); 653 ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
654 (tp->t_flags & XFS_TRANS_RESERVE) |
655 (tp->t_flags & XFS_TRANS_FREEZE_PROT);
656 /* We gave our writer reference to the new transaction */
657 tp->t_flags &= ~XFS_TRANS_FREEZE_PROT;
647 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); 658 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
648 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; 659 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
649 tp->t_blk_res = tp->t_blk_res_used; 660 tp->t_blk_res = tp->t_blk_res_used;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index bc2afd52a0b7..db056544cbb5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -179,6 +179,8 @@ struct xfs_log_item_desc {
179#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ 179#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
180#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ 180#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
181#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ 181#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
182#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
183 count in superblock */
182 184
183/* 185/*
184 * Values for call flags parameter. 186 * Values for call flags parameter.
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 22f292a917a3..36abf2aa7e68 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -130,6 +130,7 @@
130#define AUDIT_LAST_KERN_ANOM_MSG 1799 130#define AUDIT_LAST_KERN_ANOM_MSG 1799
131#define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */ 131#define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */
132#define AUDIT_ANOM_ABEND 1701 /* Process ended abnormally */ 132#define AUDIT_ANOM_ABEND 1701 /* Process ended abnormally */
133#define AUDIT_ANOM_LINK 1702 /* Suspicious use of file links */
133#define AUDIT_INTEGRITY_DATA 1800 /* Data integrity verification */ 134#define AUDIT_INTEGRITY_DATA 1800 /* Data integrity verification */
134#define AUDIT_INTEGRITY_METADATA 1801 /* Metadata integrity verification */ 135#define AUDIT_INTEGRITY_METADATA 1801 /* Metadata integrity verification */
135#define AUDIT_INTEGRITY_STATUS 1802 /* Integrity enable status */ 136#define AUDIT_INTEGRITY_STATUS 1802 /* Integrity enable status */
@@ -687,6 +688,8 @@ extern void audit_log_d_path(struct audit_buffer *ab,
687 const struct path *path); 688 const struct path *path);
688extern void audit_log_key(struct audit_buffer *ab, 689extern void audit_log_key(struct audit_buffer *ab,
689 char *key); 690 char *key);
691extern void audit_log_link_denied(const char *operation,
692 struct path *link);
690extern void audit_log_lost(const char *message); 693extern void audit_log_lost(const char *message);
691#ifdef CONFIG_SECURITY 694#ifdef CONFIG_SECURITY
692extern void audit_log_secctx(struct audit_buffer *ab, u32 secid); 695extern void audit_log_secctx(struct audit_buffer *ab, u32 secid);
@@ -716,6 +719,7 @@ extern int audit_enabled;
716#define audit_log_untrustedstring(a,s) do { ; } while (0) 719#define audit_log_untrustedstring(a,s) do { ; } while (0)
717#define audit_log_d_path(b, p, d) do { ; } while (0) 720#define audit_log_d_path(b, p, d) do { ; } while (0)
718#define audit_log_key(b, k) do { ; } while (0) 721#define audit_log_key(b, k) do { ; } while (0)
722#define audit_log_link_denied(o, l) do { ; } while (0)
719#define audit_log_secctx(b,s) do { ; } while (0) 723#define audit_log_secctx(b,s) do { ; } while (0)
720#define audit_enabled 0 724#define audit_enabled 0
721#endif 725#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ba5c8715523..38dba16c4176 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -414,6 +414,7 @@ struct inodes_stat_t {
414#include <linux/shrinker.h> 414#include <linux/shrinker.h>
415#include <linux/migrate_mode.h> 415#include <linux/migrate_mode.h>
416#include <linux/uidgid.h> 416#include <linux/uidgid.h>
417#include <linux/lockdep.h>
417 418
418#include <asm/byteorder.h> 419#include <asm/byteorder.h>
419 420
@@ -440,6 +441,8 @@ extern unsigned long get_max_files(void);
440extern int sysctl_nr_open; 441extern int sysctl_nr_open;
441extern struct inodes_stat_t inodes_stat; 442extern struct inodes_stat_t inodes_stat;
442extern int leases_enable, lease_break_time; 443extern int leases_enable, lease_break_time;
444extern int sysctl_protected_symlinks;
445extern int sysctl_protected_hardlinks;
443 446
444struct buffer_head; 447struct buffer_head;
445typedef int (get_block_t)(struct inode *inode, sector_t iblock, 448typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -1445,6 +1448,8 @@ extern void f_delown(struct file *filp);
1445extern pid_t f_getown(struct file *filp); 1448extern pid_t f_getown(struct file *filp);
1446extern int send_sigurg(struct fown_struct *fown); 1449extern int send_sigurg(struct fown_struct *fown);
1447 1450
1451struct mm_struct;
1452
1448/* 1453/*
1449 * Umount options 1454 * Umount options
1450 */ 1455 */
@@ -1458,6 +1463,31 @@ extern int send_sigurg(struct fown_struct *fown);
1458extern struct list_head super_blocks; 1463extern struct list_head super_blocks;
1459extern spinlock_t sb_lock; 1464extern spinlock_t sb_lock;
1460 1465
1466/* Possible states of 'frozen' field */
1467enum {
1468 SB_UNFROZEN = 0, /* FS is unfrozen */
1469 SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */
1470 SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */
1471 SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop
1472 * internal threads if needed) */
1473 SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */
1474};
1475
1476#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1477
1478struct sb_writers {
1479 /* Counters for counting writers at each level */
1480 struct percpu_counter counter[SB_FREEZE_LEVELS];
1481 wait_queue_head_t wait; /* queue for waiting for
1482 writers / faults to finish */
1483 int frozen; /* Is sb frozen? */
1484 wait_queue_head_t wait_unfrozen; /* queue for waiting for
1485 sb to be thawed */
1486#ifdef CONFIG_DEBUG_LOCK_ALLOC
1487 struct lockdep_map lock_map[SB_FREEZE_LEVELS];
1488#endif
1489};
1490
1461struct super_block { 1491struct super_block {
1462 struct list_head s_list; /* Keep this first */ 1492 struct list_head s_list; /* Keep this first */
1463 dev_t s_dev; /* search index; _not_ kdev_t */ 1493 dev_t s_dev; /* search index; _not_ kdev_t */
@@ -1505,8 +1535,7 @@ struct super_block {
1505 struct hlist_node s_instances; 1535 struct hlist_node s_instances;
1506 struct quota_info s_dquot; /* Diskquota specific options */ 1536 struct quota_info s_dquot; /* Diskquota specific options */
1507 1537
1508 int s_frozen; 1538 struct sb_writers s_writers;
1509 wait_queue_head_t s_wait_unfrozen;
1510 1539
1511 char s_id[32]; /* Informational name */ 1540 char s_id[32]; /* Informational name */
1512 u8 s_uuid[16]; /* UUID */ 1541 u8 s_uuid[16]; /* UUID */
@@ -1561,14 +1590,117 @@ extern struct timespec current_fs_time(struct super_block *sb);
1561/* 1590/*
1562 * Snapshotting support. 1591 * Snapshotting support.
1563 */ 1592 */
1564enum {
1565 SB_UNFROZEN = 0,
1566 SB_FREEZE_WRITE = 1,
1567 SB_FREEZE_TRANS = 2,
1568};
1569 1593
1570#define vfs_check_frozen(sb, level) \ 1594void __sb_end_write(struct super_block *sb, int level);
1571 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) 1595int __sb_start_write(struct super_block *sb, int level, bool wait);
1596
1597/**
1598 * sb_end_write - drop write access to a superblock
1599 * @sb: the super we wrote to
1600 *
1601 * Decrement number of writers to the filesystem. Wake up possible waiters
1602 * wanting to freeze the filesystem.
1603 */
1604static inline void sb_end_write(struct super_block *sb)
1605{
1606 __sb_end_write(sb, SB_FREEZE_WRITE);
1607}
1608
1609/**
1610 * sb_end_pagefault - drop write access to a superblock from a page fault
1611 * @sb: the super we wrote to
1612 *
1613 * Decrement number of processes handling write page fault to the filesystem.
1614 * Wake up possible waiters wanting to freeze the filesystem.
1615 */
1616static inline void sb_end_pagefault(struct super_block *sb)
1617{
1618 __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
1619}
1620
1621/**
1622 * sb_end_intwrite - drop write access to a superblock for internal fs purposes
1623 * @sb: the super we wrote to
1624 *
1625 * Decrement fs-internal number of writers to the filesystem. Wake up possible
1626 * waiters wanting to freeze the filesystem.
1627 */
1628static inline void sb_end_intwrite(struct super_block *sb)
1629{
1630 __sb_end_write(sb, SB_FREEZE_FS);
1631}
1632
1633/**
1634 * sb_start_write - get write access to a superblock
1635 * @sb: the super we write to
1636 *
1637 * When a process wants to write data or metadata to a file system (i.e. dirty
1638 * a page or an inode), it should embed the operation in a sb_start_write() -
1639 * sb_end_write() pair to get exclusion against file system freezing. This
1640 * function increments number of writers preventing freezing. If the file
1641 * system is already frozen, the function waits until the file system is
1642 * thawed.
1643 *
1644 * Since freeze protection behaves as a lock, users have to preserve
1645 * ordering of freeze protection and other filesystem locks. Generally,
1646 * freeze protection should be the outermost lock. In particular, we have:
1647 *
1648 * sb_start_write
1649 * -> i_mutex (write path, truncate, directory ops, ...)
1650 * -> s_umount (freeze_super, thaw_super)
1651 */
1652static inline void sb_start_write(struct super_block *sb)
1653{
1654 __sb_start_write(sb, SB_FREEZE_WRITE, true);
1655}
1656
1657static inline int sb_start_write_trylock(struct super_block *sb)
1658{
1659 return __sb_start_write(sb, SB_FREEZE_WRITE, false);
1660}
1661
1662/**
1663 * sb_start_pagefault - get write access to a superblock from a page fault
1664 * @sb: the super we write to
1665 *
1666 * When a process starts handling write page fault, it should embed the
1667 * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1668 * exclusion against file system freezing. This is needed since the page fault
1669 * is going to dirty a page. This function increments number of running page
1670 * faults preventing freezing. If the file system is already frozen, the
1671 * function waits until the file system is thawed.
1672 *
1673 * Since page fault freeze protection behaves as a lock, users have to preserve
1674 * ordering of freeze protection and other filesystem locks. It is advised to
1675 * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
1676 * handling code implies lock dependency:
1677 *
1678 * mmap_sem
1679 * -> sb_start_pagefault
1680 */
1681static inline void sb_start_pagefault(struct super_block *sb)
1682{
1683 __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
1684}
1685
1686/*
1687 * sb_start_intwrite - get write access to a superblock for internal fs purposes
1688 * @sb: the super we write to
1689 *
1690 * This is the third level of protection against filesystem freezing. It is
1691 * free for use by a filesystem. The only requirement is that it must rank
1692 * below sb_start_pagefault.
1693 *
1694 * For example filesystem can call sb_start_intwrite() when starting a
1695 * transaction which somewhat eases handling of freezing for internal sources
1696 * of filesystem changes (internal fs threads, discarding preallocation on file
1697 * close, etc.).
1698 */
1699static inline void sb_start_intwrite(struct super_block *sb)
1700{
1701 __sb_start_write(sb, SB_FREEZE_FS, true);
1702}
1703
1572 1704
1573extern bool inode_owner_or_capable(const struct inode *inode); 1705extern bool inode_owner_or_capable(const struct inode *inode);
1574 1706
@@ -1892,6 +2024,7 @@ struct file_system_type {
1892 struct lock_class_key s_lock_key; 2024 struct lock_class_key s_lock_key;
1893 struct lock_class_key s_umount_key; 2025 struct lock_class_key s_umount_key;
1894 struct lock_class_key s_vfs_rename_key; 2026 struct lock_class_key s_vfs_rename_key;
2027 struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
1895 2028
1896 struct lock_class_key i_lock_key; 2029 struct lock_class_key i_lock_key;
1897 struct lock_class_key i_mutex_key; 2030 struct lock_class_key i_mutex_key;
@@ -2334,9 +2467,6 @@ static inline void i_readcount_inc(struct inode *inode)
2334} 2467}
2335#endif 2468#endif
2336extern int do_pipe_flags(int *, int); 2469extern int do_pipe_flags(int *, int);
2337extern struct file *create_read_pipe(struct file *f, int flags);
2338extern struct file *create_write_pipe(int flags);
2339extern void free_write_pipe(struct file *);
2340 2470
2341extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2471extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2342extern struct file * open_exec(const char *); 2472extern struct file * open_exec(const char *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bd079a1b0fdc..311be906b57d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1441,6 +1441,7 @@ extern void truncate_inode_pages_range(struct address_space *,
1441 1441
1442/* generic vm_area_ops exported for stackable file systems */ 1442/* generic vm_area_ops exported for stackable file systems */
1443extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); 1443extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
1444extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1444 1445
1445/* mm/page-writeback.c */ 1446/* mm/page-writeback.c */
1446int write_one_page(struct page *page, int wait); 1447int write_one_page(struct page *page, int wait);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d2ef8b34b967..4bf19d8174ed 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -67,6 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *);
67 67
68extern struct dentry *kern_path_create(int, const char *, struct path *, int); 68extern struct dentry *kern_path_create(int, const char *, struct path *, int);
69extern struct dentry *user_path_create(int, const char __user *, struct path *, int); 69extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
70extern void done_path_create(struct path *, struct dentry *);
70extern struct dentry *kern_path_locked(const char *, struct path *); 71extern struct dentry *kern_path_locked(const char *, struct path *);
71extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 72extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
72 const char *, unsigned int, struct path *); 73 const char *, unsigned int, struct path *);
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index ce4743a26015..fa63048fecff 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -143,6 +143,7 @@ typedef struct svc_fh {
143 int fh_maxsize; /* max size for fh_handle */ 143 int fh_maxsize; /* max size for fh_handle */
144 144
145 unsigned char fh_locked; /* inode locked by us */ 145 unsigned char fh_locked; /* inode locked by us */
146 unsigned char fh_want_write; /* remount protection taken */
146 147
147#ifdef CONFIG_NFSD_V3 148#ifdef CONFIG_NFSD_V3
148 unsigned char fh_post_saved; /* post-op attrs saved */ 149 unsigned char fh_post_saved; /* post-op attrs saved */
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index e11d1c0fc60f..ad1a427b5267 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -160,4 +160,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
160long pipe_fcntl(struct file *, unsigned int, unsigned long arg); 160long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
161struct pipe_inode_info *get_pipe_info(struct file *file); 161struct pipe_inode_info *get_pipe_info(struct file *file);
162 162
163int create_pipe_files(struct file **, int);
164
163#endif 165#endif
diff --git a/kernel/audit.c b/kernel/audit.c
index 4a3f28d2ca65..ea3b7b6191c7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1456,6 +1456,27 @@ void audit_log_key(struct audit_buffer *ab, char *key)
1456} 1456}
1457 1457
1458/** 1458/**
1459 * audit_log_link_denied - report a link restriction denial
1460 * @operation: specific link opreation
1461 * @link: the path that triggered the restriction
1462 */
1463void audit_log_link_denied(const char *operation, struct path *link)
1464{
1465 struct audit_buffer *ab;
1466
1467 ab = audit_log_start(current->audit_context, GFP_KERNEL,
1468 AUDIT_ANOM_LINK);
1469 audit_log_format(ab, "op=%s action=denied", operation);
1470 audit_log_format(ab, " pid=%d comm=", current->pid);
1471 audit_log_untrustedstring(ab, current->comm);
1472 audit_log_d_path(ab, " path=", link);
1473 audit_log_format(ab, " dev=");
1474 audit_log_untrustedstring(ab, link->dentry->d_inode->i_sb->s_id);
1475 audit_log_format(ab, " ino=%lu", link->dentry->d_inode->i_ino);
1476 audit_log_end(ab);
1477}
1478
1479/**
1459 * audit_log_end - end one audit record 1480 * audit_log_end - end one audit record
1460 * @ab: the audit_buffer 1481 * @ab: the audit_buffer
1461 * 1482 *
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6502d35a25ba..87174ef59161 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1498,6 +1498,24 @@ static struct ctl_table fs_table[] = {
1498#endif 1498#endif
1499#endif 1499#endif
1500 { 1500 {
1501 .procname = "protected_symlinks",
1502 .data = &sysctl_protected_symlinks,
1503 .maxlen = sizeof(int),
1504 .mode = 0600,
1505 .proc_handler = proc_dointvec_minmax,
1506 .extra1 = &zero,
1507 .extra2 = &one,
1508 },
1509 {
1510 .procname = "protected_hardlinks",
1511 .data = &sysctl_protected_hardlinks,
1512 .maxlen = sizeof(int),
1513 .mode = 0600,
1514 .proc_handler = proc_dointvec_minmax,
1515 .extra1 = &zero,
1516 .extra2 = &one,
1517 },
1518 {
1501 .procname = "suid_dumpable", 1519 .procname = "suid_dumpable",
1502 .data = &suid_dumpable, 1520 .data = &suid_dumpable,
1503 .maxlen = sizeof(int), 1521 .maxlen = sizeof(int),
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index f8a3f1a829b8..ba6085d9c741 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -12,7 +12,7 @@
12 12
13#ifdef CONFIG_HOTPLUG_CPU 13#ifdef CONFIG_HOTPLUG_CPU
14static LIST_HEAD(percpu_counters); 14static LIST_HEAD(percpu_counters);
15static DEFINE_MUTEX(percpu_counters_lock); 15static DEFINE_SPINLOCK(percpu_counters_lock);
16#endif 16#endif
17 17
18#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER 18#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
@@ -123,9 +123,9 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
123 123
124#ifdef CONFIG_HOTPLUG_CPU 124#ifdef CONFIG_HOTPLUG_CPU
125 INIT_LIST_HEAD(&fbc->list); 125 INIT_LIST_HEAD(&fbc->list);
126 mutex_lock(&percpu_counters_lock); 126 spin_lock(&percpu_counters_lock);
127 list_add(&fbc->list, &percpu_counters); 127 list_add(&fbc->list, &percpu_counters);
128 mutex_unlock(&percpu_counters_lock); 128 spin_unlock(&percpu_counters_lock);
129#endif 129#endif
130 return 0; 130 return 0;
131} 131}
@@ -139,9 +139,9 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
139 debug_percpu_counter_deactivate(fbc); 139 debug_percpu_counter_deactivate(fbc);
140 140
141#ifdef CONFIG_HOTPLUG_CPU 141#ifdef CONFIG_HOTPLUG_CPU
142 mutex_lock(&percpu_counters_lock); 142 spin_lock(&percpu_counters_lock);
143 list_del(&fbc->list); 143 list_del(&fbc->list);
144 mutex_unlock(&percpu_counters_lock); 144 spin_unlock(&percpu_counters_lock);
145#endif 145#endif
146 free_percpu(fbc->counters); 146 free_percpu(fbc->counters);
147 fbc->counters = NULL; 147 fbc->counters = NULL;
@@ -170,7 +170,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
170 return NOTIFY_OK; 170 return NOTIFY_OK;
171 171
172 cpu = (unsigned long)hcpu; 172 cpu = (unsigned long)hcpu;
173 mutex_lock(&percpu_counters_lock); 173 spin_lock(&percpu_counters_lock);
174 list_for_each_entry(fbc, &percpu_counters, list) { 174 list_for_each_entry(fbc, &percpu_counters, list) {
175 s32 *pcount; 175 s32 *pcount;
176 unsigned long flags; 176 unsigned long flags;
@@ -181,7 +181,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
181 *pcount = 0; 181 *pcount = 0;
182 raw_spin_unlock_irqrestore(&fbc->lock, flags); 182 raw_spin_unlock_irqrestore(&fbc->lock, flags);
183 } 183 }
184 mutex_unlock(&percpu_counters_lock); 184 spin_unlock(&percpu_counters_lock);
185#endif 185#endif
186 return NOTIFY_OK; 186 return NOTIFY_OK;
187} 187}
diff --git a/mm/filemap.c b/mm/filemap.c
index a4a5260b0279..fa5ca304148e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1712,8 +1712,35 @@ page_not_uptodate:
1712} 1712}
1713EXPORT_SYMBOL(filemap_fault); 1713EXPORT_SYMBOL(filemap_fault);
1714 1714
1715int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1716{
1717 struct page *page = vmf->page;
1718 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1719 int ret = VM_FAULT_LOCKED;
1720
1721 sb_start_pagefault(inode->i_sb);
1722 file_update_time(vma->vm_file);
1723 lock_page(page);
1724 if (page->mapping != inode->i_mapping) {
1725 unlock_page(page);
1726 ret = VM_FAULT_NOPAGE;
1727 goto out;
1728 }
1729 /*
1730 * We mark the page dirty already here so that when freeze is in
1731 * progress, we are guaranteed that writeback during freezing will
1732 * see the dirty page and writeprotect it again.
1733 */
1734 set_page_dirty(page);
1735out:
1736 sb_end_pagefault(inode->i_sb);
1737 return ret;
1738}
1739EXPORT_SYMBOL(filemap_page_mkwrite);
1740
1715const struct vm_operations_struct generic_file_vm_ops = { 1741const struct vm_operations_struct generic_file_vm_ops = {
1716 .fault = filemap_fault, 1742 .fault = filemap_fault,
1743 .page_mkwrite = filemap_page_mkwrite,
1717}; 1744};
1718 1745
1719/* This is used for a general mmap of a disk file */ 1746/* This is used for a general mmap of a disk file */
@@ -2407,8 +2434,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2407 count = ocount; 2434 count = ocount;
2408 pos = *ppos; 2435 pos = *ppos;
2409 2436
2410 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2411
2412 /* We can write back this queue in page reclaim */ 2437 /* We can write back this queue in page reclaim */
2413 current->backing_dev_info = mapping->backing_dev_info; 2438 current->backing_dev_info = mapping->backing_dev_info;
2414 written = 0; 2439 written = 0;
@@ -2507,6 +2532,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2507 2532
2508 BUG_ON(iocb->ki_pos != pos); 2533 BUG_ON(iocb->ki_pos != pos);
2509 2534
2535 sb_start_write(inode->i_sb);
2510 mutex_lock(&inode->i_mutex); 2536 mutex_lock(&inode->i_mutex);
2511 blk_start_plug(&plug); 2537 blk_start_plug(&plug);
2512 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 2538 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
@@ -2520,6 +2546,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2520 ret = err; 2546 ret = err;
2521 } 2547 }
2522 blk_finish_plug(&plug); 2548 blk_finish_plug(&plug);
2549 sb_end_write(inode->i_sb);
2523 return ret; 2550 return ret;
2524} 2551}
2525EXPORT_SYMBOL(generic_file_aio_write); 2552EXPORT_SYMBOL(generic_file_aio_write);
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 213ca1f53409..13e013b1270c 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -304,6 +304,7 @@ out:
304 304
305static const struct vm_operations_struct xip_file_vm_ops = { 305static const struct vm_operations_struct xip_file_vm_ops = {
306 .fault = xip_file_fault, 306 .fault = xip_file_fault,
307 .page_mkwrite = filemap_page_mkwrite,
307}; 308};
308 309
309int xip_file_mmap(struct file * file, struct vm_area_struct * vma) 310int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
@@ -401,6 +402,8 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
401 loff_t pos; 402 loff_t pos;
402 ssize_t ret; 403 ssize_t ret;
403 404
405 sb_start_write(inode->i_sb);
406
404 mutex_lock(&inode->i_mutex); 407 mutex_lock(&inode->i_mutex);
405 408
406 if (!access_ok(VERIFY_READ, buf, len)) { 409 if (!access_ok(VERIFY_READ, buf, len)) {
@@ -411,8 +414,6 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
411 pos = *ppos; 414 pos = *ppos;
412 count = len; 415 count = len;
413 416
414 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
415
416 /* We can write back this queue in page reclaim */ 417 /* We can write back this queue in page reclaim */
417 current->backing_dev_info = mapping->backing_dev_info; 418 current->backing_dev_info = mapping->backing_dev_info;
418 419
@@ -436,6 +437,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
436 current->backing_dev_info = NULL; 437 current->backing_dev_info = NULL;
437 out_up: 438 out_up:
438 mutex_unlock(&inode->i_mutex); 439 mutex_unlock(&inode->i_mutex);
440 sb_end_write(inode->i_sb);
439 return ret; 441 return ret;
440} 442}
441EXPORT_SYMBOL_GPL(xip_file_write); 443EXPORT_SYMBOL_GPL(xip_file_write);
diff --git a/mm/memory.c b/mm/memory.c
index 482f089765ff..57361708d1a5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2650,6 +2650,9 @@ reuse:
2650 if (!page_mkwrite) { 2650 if (!page_mkwrite) {
2651 wait_on_page_locked(dirty_page); 2651 wait_on_page_locked(dirty_page);
2652 set_page_dirty_balance(dirty_page, page_mkwrite); 2652 set_page_dirty_balance(dirty_page, page_mkwrite);
2653 /* file_update_time outside page_lock */
2654 if (vma->vm_file)
2655 file_update_time(vma->vm_file);
2653 } 2656 }
2654 put_page(dirty_page); 2657 put_page(dirty_page);
2655 if (page_mkwrite) { 2658 if (page_mkwrite) {
@@ -2667,10 +2670,6 @@ reuse:
2667 } 2670 }
2668 } 2671 }
2669 2672
2670 /* file_update_time outside page_lock */
2671 if (vma->vm_file)
2672 file_update_time(vma->vm_file);
2673
2674 return ret; 2673 return ret;
2675 } 2674 }
2676 2675
@@ -3339,12 +3338,13 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3339 3338
3340 if (dirty_page) { 3339 if (dirty_page) {
3341 struct address_space *mapping = page->mapping; 3340 struct address_space *mapping = page->mapping;
3341 int dirtied = 0;
3342 3342
3343 if (set_page_dirty(dirty_page)) 3343 if (set_page_dirty(dirty_page))
3344 page_mkwrite = 1; 3344 dirtied = 1;
3345 unlock_page(dirty_page); 3345 unlock_page(dirty_page);
3346 put_page(dirty_page); 3346 put_page(dirty_page);
3347 if (page_mkwrite && mapping) { 3347 if ((dirtied || page_mkwrite) && mapping) {
3348 /* 3348 /*
3349 * Some device drivers do not set page.mapping but still 3349 * Some device drivers do not set page.mapping but still
3350 * dirty their pages 3350 * dirty their pages
@@ -3353,7 +3353,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3353 } 3353 }
3354 3354
3355 /* file_update_time outside page_lock */ 3355 /* file_update_time outside page_lock */
3356 if (vma->vm_file) 3356 if (vma->vm_file && !page_mkwrite)
3357 file_update_time(vma->vm_file); 3357 file_update_time(vma->vm_file);
3358 } else { 3358 } else {
3359 unlock_page(vmf.page); 3359 unlock_page(vmf.page);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 79981d97bc9c..e4768c180da2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -823,6 +823,34 @@ fail:
823 return NULL; 823 return NULL;
824} 824}
825 825
826static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
827{
828 struct dentry *dentry;
829 struct path path;
830 int err = 0;
831 /*
832 * Get the parent directory, calculate the hash for last
833 * component.
834 */
835 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
836 err = PTR_ERR(dentry);
837 if (IS_ERR(dentry))
838 return err;
839
840 /*
841 * All right, let's create it.
842 */
843 err = security_path_mknod(&path, dentry, mode, 0);
844 if (!err) {
845 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
846 if (!err) {
847 res->mnt = mntget(path.mnt);
848 res->dentry = dget(dentry);
849 }
850 }
851 done_path_create(&path, dentry);
852 return err;
853}
826 854
827static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 855static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
828{ 856{
@@ -831,8 +859,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
831 struct unix_sock *u = unix_sk(sk); 859 struct unix_sock *u = unix_sk(sk);
832 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 860 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
833 char *sun_path = sunaddr->sun_path; 861 char *sun_path = sunaddr->sun_path;
834 struct dentry *dentry = NULL;
835 struct path path;
836 int err; 862 int err;
837 unsigned int hash; 863 unsigned int hash;
838 struct unix_address *addr; 864 struct unix_address *addr;
@@ -869,43 +895,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
869 atomic_set(&addr->refcnt, 1); 895 atomic_set(&addr->refcnt, 1);
870 896
871 if (sun_path[0]) { 897 if (sun_path[0]) {
872 umode_t mode; 898 struct path path;
873 err = 0; 899 umode_t mode = S_IFSOCK |
874 /*
875 * Get the parent directory, calculate the hash for last
876 * component.
877 */
878 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
879 err = PTR_ERR(dentry);
880 if (IS_ERR(dentry))
881 goto out_mknod_parent;
882
883 /*
884 * All right, let's create it.
885 */
886 mode = S_IFSOCK |
887 (SOCK_INODE(sock)->i_mode & ~current_umask()); 900 (SOCK_INODE(sock)->i_mode & ~current_umask());
888 err = mnt_want_write(path.mnt); 901 err = unix_mknod(sun_path, mode, &path);
889 if (err) 902 if (err) {
890 goto out_mknod_dput; 903 if (err == -EEXIST)
891 err = security_path_mknod(&path, dentry, mode, 0); 904 err = -EADDRINUSE;
892 if (err) 905 unix_release_addr(addr);
893 goto out_mknod_drop_write; 906 goto out_up;
894 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); 907 }
895out_mknod_drop_write:
896 mnt_drop_write(path.mnt);
897 if (err)
898 goto out_mknod_dput;
899 mutex_unlock(&path.dentry->d_inode->i_mutex);
900 dput(path.dentry);
901 path.dentry = dentry;
902
903 addr->hash = UNIX_HASH_SIZE; 908 addr->hash = UNIX_HASH_SIZE;
904 } 909 hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
905 910 spin_lock(&unix_table_lock);
906 spin_lock(&unix_table_lock); 911 u->path = path;
907 912 list = &unix_socket_table[hash];
908 if (!sun_path[0]) { 913 } else {
914 spin_lock(&unix_table_lock);
909 err = -EADDRINUSE; 915 err = -EADDRINUSE;
910 if (__unix_find_socket_byname(net, sunaddr, addr_len, 916 if (__unix_find_socket_byname(net, sunaddr, addr_len,
911 sk->sk_type, hash)) { 917 sk->sk_type, hash)) {
@@ -914,9 +920,6 @@ out_mknod_drop_write:
914 } 920 }
915 921
916 list = &unix_socket_table[addr->hash]; 922 list = &unix_socket_table[addr->hash];
917 } else {
918 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
919 u->path = path;
920 } 923 }
921 924
922 err = 0; 925 err = 0;
@@ -930,16 +933,6 @@ out_up:
930 mutex_unlock(&u->readlock); 933 mutex_unlock(&u->readlock);
931out: 934out:
932 return err; 935 return err;
933
934out_mknod_dput:
935 dput(dentry);
936 mutex_unlock(&path.dentry->d_inode->i_mutex);
937 path_put(&path);
938out_mknod_parent:
939 if (err == -EEXIST)
940 err = -EADDRINUSE;
941 unix_release_addr(addr);
942 goto out_up;
943} 936}
944 937
945static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 938static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
diff --git a/sound/sound_firmware.c b/sound/sound_firmware.c
index 7e96249536b4..37711a5d0d6b 100644
--- a/sound/sound_firmware.c
+++ b/sound/sound_firmware.c
@@ -23,14 +23,14 @@ static int do_mod_firmware_load(const char *fn, char **fp)
23 if (l <= 0 || l > 131072) 23 if (l <= 0 || l > 131072)
24 { 24 {
25 printk(KERN_INFO "Invalid firmware '%s'\n", fn); 25 printk(KERN_INFO "Invalid firmware '%s'\n", fn);
26 filp_close(filp, current->files); 26 filp_close(filp, NULL);
27 return 0; 27 return 0;
28 } 28 }
29 dp = vmalloc(l); 29 dp = vmalloc(l);
30 if (dp == NULL) 30 if (dp == NULL)
31 { 31 {
32 printk(KERN_INFO "Out of memory loading '%s'.\n", fn); 32 printk(KERN_INFO "Out of memory loading '%s'.\n", fn);
33 filp_close(filp, current->files); 33 filp_close(filp, NULL);
34 return 0; 34 return 0;
35 } 35 }
36 pos = 0; 36 pos = 0;
@@ -38,10 +38,10 @@ static int do_mod_firmware_load(const char *fn, char **fp)
38 { 38 {
39 printk(KERN_INFO "Failed to read '%s'.\n", fn); 39 printk(KERN_INFO "Failed to read '%s'.\n", fn);
40 vfree(dp); 40 vfree(dp);
41 filp_close(filp, current->files); 41 filp_close(filp, NULL);
42 return 0; 42 return 0;
43 } 43 }
44 filp_close(filp, current->files); 44 filp_close(filp, NULL);
45 *fp = dp; 45 *fp = dp;
46 return (int) l; 46 return (int) l;
47} 47}