aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2010-05-19 13:12:41 -0400
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2010-05-19 13:12:41 -0400
commit8d0bc2b456103a34c11e01305cd1aed1cde579e5 (patch)
tree5e1e6ad55cc9e2b5c5617f6f320114b8cff9e3f3 /fs
parent30ba3ead05763b172acaa65ae1be71af2a878940 (diff)
parente40152ee1e1c7a63f4777791863215e3faa37a86 (diff)
Merge commit 'v2.6.34' into next
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c10
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_super.c1
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c24
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/afs/volume.c7
-rw-r--r--fs/autofs4/root.c5
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c98
-rw-r--r--fs/cachefiles/security.c4
-rw-r--r--fs/ceph/addr.c70
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.h2
-rw-r--r--fs/ceph/auth_x.c32
-rw-r--r--fs/ceph/caps.c63
-rw-r--r--fs/ceph/dir.c16
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/messenger.c48
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c26
-rw-r--r--fs/ceph/osd_client.h3
-rw-r--r--fs/ceph/osdmap.c209
-rw-r--r--fs/ceph/osdmap.h3
-rw-r--r--fs/ceph/rados.h7
-rw-r--r--fs/ceph/snap.c50
-rw-r--r--fs/ceph/super.c30
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/cifs/cifs_fs_sb.h3
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/inode.c21
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/dir.c4
-rw-r--r--fs/ecryptfs/crypto.c37
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h15
-rw-r--r--fs/ecryptfs/inode.c129
-rw-r--r--fs/ecryptfs/main.c10
-rw-r--r--fs/ecryptfs/mmap.c38
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/exofs/super.c8
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/ext4/mballoc.c21
-rw-r--r--fs/ioctl.c92
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/jfs_dmap.c16
-rw-r--r--fs/jfs/jfs_dmap.h6
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/namei.c4
-rw-r--r--fs/jfs/resize.c6
-rw-r--r--fs/jfs/super.c13
-rw-r--r--fs/jfs/symlink.c14
-rw-r--r--fs/logfs/gc.c8
-rw-r--r--fs/logfs/journal.c29
-rw-r--r--fs/logfs/logfs.h15
-rw-r--r--fs/logfs/readwrite.c75
-rw-r--r--fs/logfs/segment.c8
-rw-r--r--fs/logfs/super.c25
-rw-r--r--fs/namei.c27
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/client.c5
-rw-r--r--fs/nfs/delegation.c86
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/inode.c8
-rw-r--r--fs/nfs/nfs4proc.c9
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfs/write.c95
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nilfs2/super.c1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c16
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c5
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/file.c32
-rw-r--r--fs/ocfs2/inode.c68
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/namei.c58
-rw-r--r--fs/ocfs2/refcounttree.c3
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/task_mmu.c19
-rw-r--r--fs/quota/Kconfig8
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/xattr.c19
-rw-r--r--fs/smbfs/inode.c8
-rw-r--r--fs/squashfs/block.c5
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/squashfs/zlib_wrapper.c3
-rw-r--r--fs/super.c9
-rw-r--r--fs/sync.c3
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c116
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_dfrag.c22
-rw-r--r--fs/xfs/xfs_log.c38
-rw-r--r--fs/xfs/xfs_mount.h1
115 files changed, 1433 insertions, 780 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5c5bc8480070..f8b86e92cd66 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
238 return ERR_PTR(-ENOMEM); 238 return ERR_PTR(-ENOMEM);
239 } 239 }
240 240
241 rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
242 if (rc) {
243 __putname(v9ses->aname);
244 __putname(v9ses->uname);
245 return ERR_PTR(rc);
246 }
247
241 spin_lock(&v9fs_sessionlist_lock); 248 spin_lock(&v9fs_sessionlist_lock);
242 list_add(&v9ses->slist, &v9fs_sessionlist); 249 list_add(&v9ses->slist, &v9fs_sessionlist);
243 spin_unlock(&v9fs_sessionlist_lock); 250 spin_unlock(&v9fs_sessionlist_lock);
@@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
301 return fid; 308 return fid;
302 309
303error: 310error:
311 bdi_destroy(&v9ses->bdi);
304 return ERR_PTR(retval); 312 return ERR_PTR(retval);
305} 313}
306 314
@@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
326 __putname(v9ses->uname); 334 __putname(v9ses->uname);
327 __putname(v9ses->aname); 335 __putname(v9ses->aname);
328 336
337 bdi_destroy(&v9ses->bdi);
338
329 spin_lock(&v9fs_sessionlist_lock); 339 spin_lock(&v9fs_sessionlist_lock);
330 list_del(&v9ses->slist); 340 list_del(&v9ses->slist);
331 spin_unlock(&v9fs_sessionlist_lock); 341 spin_unlock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a0a8d3dd1361..bec4d0bcb458 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,7 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#include <linux/backing-dev.h>
23 24
24/** 25/**
25 * enum p9_session_flags - option flags for each 9P session 26 * enum p9_session_flags - option flags for each 9P session
@@ -102,6 +103,7 @@ struct v9fs_session_info {
102 u32 uid; /* if ACCESS_SINGLE, the uid that has access */ 103 u32 uid; /* if ACCESS_SINGLE, the uid that has access */
103 struct p9_client *clnt; /* 9p client */ 104 struct p9_client *clnt; /* 9p client */
104 struct list_head slist; /* list of sessions registered with v9fs */ 105 struct list_head slist; /* list of sessions registered with v9fs */
106 struct backing_dev_info bdi;
105}; 107};
106 108
107struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 109struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 491108bd6e0d..806da5d3b3a0 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
77 sb->s_blocksize = 1 << sb->s_blocksize_bits; 77 sb->s_blocksize = 1 << sb->s_blocksize_bits;
78 sb->s_magic = V9FS_MAGIC; 78 sb->s_magic = V9FS_MAGIC;
79 sb->s_op = &v9fs_super_ops; 79 sb->s_op = &v9fs_super_ops;
80 sb->s_bdi = &v9ses->bdi;
80 81
81 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 82 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
82 MS_NOATIME; 83 MS_NOATIME;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c54dad4e6063..a10f2582844f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -19,6 +19,7 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/fscache.h> 21#include <linux/fscache.h>
22#include <linux/backing-dev.h>
22 23
23#include "afs.h" 24#include "afs.h"
24#include "afs_vl.h" 25#include "afs_vl.h"
@@ -313,6 +314,7 @@ struct afs_volume {
313 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ 314 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
314 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ 315 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
315 struct rw_semaphore server_sem; /* lock for accessing current server */ 316 struct rw_semaphore server_sem; /* lock for accessing current server */
317 struct backing_dev_info bdi;
316}; 318};
317 319
318/* 320/*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5e813a816ce4..b3feddc4f7d6 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
138{ 138{
139 struct afs_super_info *super; 139 struct afs_super_info *super;
140 struct vfsmount *mnt; 140 struct vfsmount *mnt;
141 struct page *page = NULL; 141 struct page *page;
142 size_t size; 142 size_t size;
143 char *buf, *devname = NULL, *options = NULL; 143 char *buf, *devname, *options;
144 int ret; 144 int ret;
145 145
146 _enter("{%s}", mntpt->d_name.name); 146 _enter("{%s}", mntpt->d_name.name);
@@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
150 ret = -EINVAL; 150 ret = -EINVAL;
151 size = mntpt->d_inode->i_size; 151 size = mntpt->d_inode->i_size;
152 if (size > PAGE_SIZE - 1) 152 if (size > PAGE_SIZE - 1)
153 goto error; 153 goto error_no_devname;
154 154
155 ret = -ENOMEM; 155 ret = -ENOMEM;
156 devname = (char *) get_zeroed_page(GFP_KERNEL); 156 devname = (char *) get_zeroed_page(GFP_KERNEL);
157 if (!devname) 157 if (!devname)
158 goto error; 158 goto error_no_devname;
159 159
160 options = (char *) get_zeroed_page(GFP_KERNEL); 160 options = (char *) get_zeroed_page(GFP_KERNEL);
161 if (!options) 161 if (!options)
162 goto error; 162 goto error_no_options;
163 163
164 /* read the contents of the AFS special symlink */ 164 /* read the contents of the AFS special symlink */
165 page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); 165 page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
166 if (IS_ERR(page)) { 166 if (IS_ERR(page)) {
167 ret = PTR_ERR(page); 167 ret = PTR_ERR(page);
168 goto error; 168 goto error_no_page;
169 } 169 }
170 170
171 ret = -EIO; 171 ret = -EIO;
@@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
196 return mnt; 196 return mnt;
197 197
198error: 198error:
199 if (page) 199 page_cache_release(page);
200 page_cache_release(page); 200error_no_page:
201 if (devname) 201 free_page((unsigned long) options);
202 free_page((unsigned long) devname); 202error_no_options:
203 if (options) 203 free_page((unsigned long) devname);
204 free_page((unsigned long) options); 204error_no_devname:
205 _leave(" = %d", ret); 205 _leave(" = %d", ret);
206 return ERR_PTR(ret); 206 return ERR_PTR(ret);
207} 207}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 14f6431598ad..e932e5a3a0c1 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
311 sb->s_magic = AFS_FS_MAGIC; 311 sb->s_magic = AFS_FS_MAGIC;
312 sb->s_op = &afs_super_ops; 312 sb->s_op = &afs_super_ops;
313 sb->s_fs_info = as; 313 sb->s_fs_info = as;
314 sb->s_bdi = &as->volume->bdi;
314 315
315 /* allocate the root inode and dentry */ 316 /* allocate the root inode and dentry */
316 fid.vid = as->volume->vid; 317 fid.vid = as->volume->vid;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index a353e69e2391..401eeb21869f 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
106 volume->cell = params->cell; 106 volume->cell = params->cell;
107 volume->vid = vlocation->vldb.vid[params->type]; 107 volume->vid = vlocation->vldb.vid[params->type];
108 108
109 ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
110 if (ret)
111 goto error_bdi;
112
109 init_rwsem(&volume->server_sem); 113 init_rwsem(&volume->server_sem);
110 114
111 /* look up all the applicable server records */ 115 /* look up all the applicable server records */
@@ -151,6 +155,8 @@ error:
151 return ERR_PTR(ret); 155 return ERR_PTR(ret);
152 156
153error_discard: 157error_discard:
158 bdi_destroy(&volume->bdi);
159error_bdi:
154 up_write(&params->cell->vl_sem); 160 up_write(&params->cell->vl_sem);
155 161
156 for (loop = volume->nservers - 1; loop >= 0; loop--) 162 for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume)
200 for (loop = volume->nservers - 1; loop >= 0; loop--) 206 for (loop = volume->nservers - 1; loop >= 0; loop--)
201 afs_put_server(volume->servers[loop]); 207 afs_put_server(volume->servers[loop]);
202 208
209 bdi_destroy(&volume->bdi);
203 kfree(volume); 210 kfree(volume);
204 211
205 _leave(" [destroyed]"); 212 _leave(" [destroyed]");
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c606d92..e8e5e63ac950 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
177 } 177 }
178 /* Trigger mount for path component or follow link */ 178 /* Trigger mount for path component or follow link */
179 } else if (ino->flags & AUTOFS_INF_PENDING || 179 } else if (ino->flags & AUTOFS_INF_PENDING ||
180 autofs4_need_mount(flags) || 180 autofs4_need_mount(flags)) {
181 current->link_count) {
182 DPRINTK("waiting for mount name=%.*s", 181 DPRINTK("waiting for mount name=%.*s",
183 dentry->d_name.len, dentry->d_name.name); 182 dentry->d_name.len, dentry->d_name.name);
184 183
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
262 spin_unlock(&dcache_lock); 261 spin_unlock(&dcache_lock);
263 spin_unlock(&sbi->fs_lock); 262 spin_unlock(&sbi->fs_lock);
264 263
265 status = try_to_fill_dentry(dentry, 0); 264 status = try_to_fill_dentry(dentry, nd->flags);
266 if (status) 265 if (status)
267 goto out_error; 266 goto out_error;
268 267
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7ab23e006e4c..2c5f9a0e5d72 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
1005 } 1005 }
1006 } else if (!mm->start_data) { 1006 } else if (!mm->start_data) {
1007 mm->start_data = seg->addr; 1007 mm->start_data = seg->addr;
1008#ifndef CONFIG_MMU
1009 mm->end_data = seg->addr + phdr->p_memsz; 1008 mm->end_data = seg->addr + phdr->p_memsz;
1010#endif
1011 } 1009 }
1012
1013#ifdef CONFIG_MMU
1014 if (seg->addr + phdr->p_memsz > mm->end_data)
1015 mm->end_data = seg->addr + phdr->p_memsz;
1016#endif
1017 } 1010 }
1018 1011
1019 seg++; 1012 seg++;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e0e769bdca59..49566c1687d8 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
355 355
356 if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { 356 if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
357 printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)", 357 printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)",
358 (int) r,(int)(start_brk-start_code),(int)text_len); 358 (int) r,(int)(start_brk-start_data+text_len),(int)text_len);
359 goto failed; 359 goto failed;
360 } 360 }
361 361
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2a6d0193f139..6dcee88c2e5d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -406,16 +406,23 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
406 406
407int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) 407int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
408{ 408{
409 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 409 struct inode *bd_inode = filp->f_mapping->host;
410 struct block_device *bdev = I_BDEV(bd_inode);
410 int error; 411 int error;
411 412
412 error = sync_blockdev(bdev); 413 /*
413 if (error) 414 * There is no need to serialise calls to blkdev_issue_flush with
414 return error; 415 * i_mutex and doing so causes performance issues with concurrent
415 416 * O_SYNC writers to a block device.
417 */
418 mutex_unlock(&bd_inode->i_mutex);
419
416 error = blkdev_issue_flush(bdev, NULL); 420 error = blkdev_issue_flush(bdev, NULL);
417 if (error == -EOPNOTSUPP) 421 if (error == -EOPNOTSUPP)
418 error = 0; 422 error = 0;
423
424 mutex_lock(&bd_inode->i_mutex);
425
419 return error; 426 return error;
420} 427}
421EXPORT_SYMBOL(blkdev_fsync); 428EXPORT_SYMBOL(blkdev_fsync);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b8f2c89ccb..feca04197d02 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops;
44static void end_workqueue_fn(struct btrfs_work *work); 44static void end_workqueue_fn(struct btrfs_work *work);
45static void free_fs_root(struct btrfs_root *root); 45static void free_fs_root(struct btrfs_root *root);
46 46
47static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
48
49/* 47/*
50 * end_io_wq structs are used to do processing in task context when an IO is 48 * end_io_wq structs are used to do processing in task context when an IO is
51 * complete. This is used during reads to verify checksums, and it is used 49 * complete. This is used during reads to verify checksums, and it is used
@@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1375{ 1373{
1376 int err; 1374 int err;
1377 1375
1378 bdi->name = "btrfs";
1379 bdi->capabilities = BDI_CAP_MAP_COPY; 1376 bdi->capabilities = BDI_CAP_MAP_COPY;
1380 err = bdi_init(bdi); 1377 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1381 if (err) 1378 if (err)
1382 return err; 1379 return err;
1383 1380
1384 err = bdi_register(bdi, NULL, "btrfs-%d",
1385 atomic_inc_return(&btrfs_bdi_num));
1386 if (err) {
1387 bdi_destroy(bdi);
1388 return err;
1389 }
1390
1391 bdi->ra_pages = default_backing_dev_info.ra_pages; 1381 bdi->ra_pages = default_backing_dev_info.ra_pages;
1392 bdi->unplug_io_fn = btrfs_unplug_io_fn; 1382 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1393 bdi->unplug_io_data = info; 1383 bdi->unplug_io_data = info;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e84ef60ffe35..97a97839a867 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1481 ret = -EBADF; 1481 ret = -EBADF;
1482 goto out_drop_write; 1482 goto out_drop_write;
1483 } 1483 }
1484
1484 src = src_file->f_dentry->d_inode; 1485 src = src_file->f_dentry->d_inode;
1485 1486
1486 ret = -EINVAL; 1487 ret = -EINVAL;
1487 if (src == inode) 1488 if (src == inode)
1488 goto out_fput; 1489 goto out_fput;
1489 1490
1491 /* the src must be open for reading */
1492 if (!(src_file->f_mode & FMODE_READ))
1493 goto out_fput;
1494
1490 ret = -EISDIR; 1495 ret = -EISDIR;
1491 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 1496 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
1492 goto out_fput; 1497 goto out_fput;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f9c624..a8cd821226da 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
34 loff_t i_size; /* object size */ 34 loff_t i_size; /* object size */
35 unsigned long flags; 35 unsigned long flags;
36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ 36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
37#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
37 atomic_t usage; /* object usage count */ 38 atomic_t usage; /* object usage count */
38 uint8_t type; /* object type */ 39 uint8_t type; /* object type */
39 uint8_t new; /* T if object new */ 40 uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1ee0d..f4a7840bf42c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
93} 93}
94 94
95/* 95/*
96 * mark the owner of a dentry, if there is one, to indicate that that dentry
97 * has been preemptively deleted
98 * - the caller must hold the i_mutex on the dentry's parent as required to
99 * call vfs_unlink(), vfs_rmdir() or vfs_rename()
100 */
101static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
102 struct dentry *dentry)
103{
104 struct cachefiles_object *object;
105 struct rb_node *p;
106
107 _enter(",'%*.*s'",
108 dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
109
110 write_lock(&cache->active_lock);
111
112 p = cache->active_nodes.rb_node;
113 while (p) {
114 object = rb_entry(p, struct cachefiles_object, active_node);
115 if (object->dentry > dentry)
116 p = p->rb_left;
117 else if (object->dentry < dentry)
118 p = p->rb_right;
119 else
120 goto found_dentry;
121 }
122
123 write_unlock(&cache->active_lock);
124 _leave(" [no owner]");
125 return;
126
127 /* found the dentry for */
128found_dentry:
129 kdebug("preemptive burial: OBJ%x [%s] %p",
130 object->fscache.debug_id,
131 fscache_object_states[object->fscache.state],
132 dentry);
133
134 if (object->fscache.state < FSCACHE_OBJECT_DYING) {
135 printk(KERN_ERR "\n");
136 printk(KERN_ERR "CacheFiles: Error:"
137 " Can't preemptively bury live object\n");
138 cachefiles_printk_object(object, NULL);
139 } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
140 printk(KERN_ERR "CacheFiles: Error:"
141 " Object already preemptively buried\n");
142 }
143
144 write_unlock(&cache->active_lock);
145 _leave(" [owner marked]");
146}
147
148/*
96 * record the fact that an object is now active 149 * record the fact that an object is now active
97 */ 150 */
98static int cachefiles_mark_object_active(struct cachefiles_cache *cache, 151static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
219 */ 272 */
220static int cachefiles_bury_object(struct cachefiles_cache *cache, 273static int cachefiles_bury_object(struct cachefiles_cache *cache,
221 struct dentry *dir, 274 struct dentry *dir,
222 struct dentry *rep) 275 struct dentry *rep,
276 bool preemptive)
223{ 277{
224 struct dentry *grave, *trap; 278 struct dentry *grave, *trap;
225 char nbuffer[8 + 8 + 1]; 279 char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
229 dir->d_name.len, dir->d_name.len, dir->d_name.name, 283 dir->d_name.len, dir->d_name.len, dir->d_name.name,
230 rep->d_name.len, rep->d_name.len, rep->d_name.name); 284 rep->d_name.len, rep->d_name.len, rep->d_name.name);
231 285
286 _debug("remove %p from %p", rep, dir);
287
232 /* non-directories can just be unlinked */ 288 /* non-directories can just be unlinked */
233 if (!S_ISDIR(rep->d_inode->i_mode)) { 289 if (!S_ISDIR(rep->d_inode->i_mode)) {
234 _debug("unlink stale object"); 290 _debug("unlink stale object");
235 ret = vfs_unlink(dir->d_inode, rep); 291 ret = vfs_unlink(dir->d_inode, rep);
236 292
293 if (preemptive)
294 cachefiles_mark_object_buried(cache, rep);
295
237 mutex_unlock(&dir->d_inode->i_mutex); 296 mutex_unlock(&dir->d_inode->i_mutex);
238 297
239 if (ret == -EIO) 298 if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
325 if (ret != 0 && ret != -ENOMEM) 384 if (ret != 0 && ret != -ENOMEM)
326 cachefiles_io_error(cache, "Rename failed with error %d", ret); 385 cachefiles_io_error(cache, "Rename failed with error %d", ret);
327 386
387 if (preemptive)
388 cachefiles_mark_object_buried(cache, rep);
389
328 unlock_rename(cache->graveyard, dir); 390 unlock_rename(cache->graveyard, dir);
329 dput(grave); 391 dput(grave);
330 _leave(" = 0"); 392 _leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
340 struct dentry *dir; 402 struct dentry *dir;
341 int ret; 403 int ret;
342 404
343 _enter(",{%p}", object->dentry); 405 _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
344 406
345 ASSERT(object->dentry); 407 ASSERT(object->dentry);
346 ASSERT(object->dentry->d_inode); 408 ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
350 412
351 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 413 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
352 414
353 /* we need to check that our parent is _still_ our parent - it may have 415 if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
354 * been renamed */ 416 /* object allocation for the same key preemptively deleted this
355 if (dir == object->dentry->d_parent) { 417 * object's file so that it could create its own file */
356 ret = cachefiles_bury_object(cache, dir, object->dentry); 418 _debug("object preemptively buried");
357 } else {
358 /* it got moved, presumably by cachefilesd culling it, so it's
359 * no longer in the key path and we can ignore it */
360 mutex_unlock(&dir->d_inode->i_mutex); 419 mutex_unlock(&dir->d_inode->i_mutex);
361 ret = 0; 420 ret = 0;
421 } else {
422 /* we need to check that our parent is _still_ our parent - it
423 * may have been renamed */
424 if (dir == object->dentry->d_parent) {
425 ret = cachefiles_bury_object(cache, dir,
426 object->dentry, false);
427 } else {
428 /* it got moved, presumably by cachefilesd culling it,
429 * so it's no longer in the key path and we can ignore
430 * it */
431 mutex_unlock(&dir->d_inode->i_mutex);
432 ret = 0;
433 }
362 } 434 }
363 435
364 dput(dir); 436 dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
381 const char *name; 453 const char *name;
382 int ret, nlen; 454 int ret, nlen;
383 455
384 _enter("{%p},,%s,", parent->dentry, key); 456 _enter("OBJ%x{%p},OBJ%x,%s,",
457 parent->fscache.debug_id, parent->dentry,
458 object->fscache.debug_id, key);
385 459
386 cache = container_of(parent->fscache.cache, 460 cache = container_of(parent->fscache.cache,
387 struct cachefiles_cache, cache); 461 struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
509 * mutex) */ 583 * mutex) */
510 object->dentry = NULL; 584 object->dentry = NULL;
511 585
512 ret = cachefiles_bury_object(cache, dir, next); 586 ret = cachefiles_bury_object(cache, dir, next, true);
513 dput(next); 587 dput(next);
514 next = NULL; 588 next = NULL;
515 589
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
828 /* actually remove the victim (drops the dir mutex) */ 902 /* actually remove the victim (drops the dir mutex) */
829 _debug("bury"); 903 _debug("bury");
830 904
831 ret = cachefiles_bury_object(cache, dir, victim); 905 ret = cachefiles_bury_object(cache, dir, victim, false);
832 if (ret < 0) 906 if (ret < 0)
833 goto error; 907 goto error;
834 908
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb2232..039b5011d83b 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
77/* 77/*
78 * check the security details of the on-disk cache 78 * check the security details of the on-disk cache
79 * - must be called with security override in force 79 * - must be called with security override in force
80 * - must return with a security override in force - even in the case of an
81 * error
80 */ 82 */
81int cachefiles_determine_cache_security(struct cachefiles_cache *cache, 83int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
82 struct dentry *root, 84 struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
99 * which create files */ 101 * which create files */
100 ret = set_create_files_as(new, root->d_inode); 102 ret = set_create_files_as(new, root->d_inode);
101 if (ret < 0) { 103 if (ret < 0) {
104 abort_creds(new);
105 cachefiles_begin_secure(cache, _saved_cred);
102 _leave(" = %d [cfa]", ret); 106 _leave(" = %d [cfa]", ret);
103 return ret; 107 return ret;
104 } 108 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index aa3cd7cc3e40..a9005d862ed4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -337,16 +337,15 @@ out:
337/* 337/*
338 * Get ref for the oldest snapc for an inode with dirty data... that is, the 338 * Get ref for the oldest snapc for an inode with dirty data... that is, the
339 * only snap context we are allowed to write back. 339 * only snap context we are allowed to write back.
340 *
341 * Caller holds i_lock.
342 */ 340 */
343static struct ceph_snap_context *__get_oldest_context(struct inode *inode, 341static struct ceph_snap_context *get_oldest_context(struct inode *inode,
344 u64 *snap_size) 342 u64 *snap_size)
345{ 343{
346 struct ceph_inode_info *ci = ceph_inode(inode); 344 struct ceph_inode_info *ci = ceph_inode(inode);
347 struct ceph_snap_context *snapc = NULL; 345 struct ceph_snap_context *snapc = NULL;
348 struct ceph_cap_snap *capsnap = NULL; 346 struct ceph_cap_snap *capsnap = NULL;
349 347
348 spin_lock(&inode->i_lock);
350 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 349 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
351 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 350 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
352 capsnap->context, capsnap->dirty_pages); 351 capsnap->context, capsnap->dirty_pages);
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
357 break; 356 break;
358 } 357 }
359 } 358 }
360 if (!snapc && ci->i_snap_realm) { 359 if (!snapc && ci->i_head_snapc) {
361 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 360 snapc = ceph_get_snap_context(ci->i_head_snapc);
362 dout(" head snapc %p has %d dirty pages\n", 361 dout(" head snapc %p has %d dirty pages\n",
363 snapc, ci->i_wrbuffer_ref_head); 362 snapc, ci->i_wrbuffer_ref_head);
364 } 363 }
365 return snapc;
366}
367
368static struct ceph_snap_context *get_oldest_context(struct inode *inode,
369 u64 *snap_size)
370{
371 struct ceph_snap_context *snapc = NULL;
372
373 spin_lock(&inode->i_lock);
374 snapc = __get_oldest_context(inode, snap_size);
375 spin_unlock(&inode->i_lock); 364 spin_unlock(&inode->i_lock);
376 return snapc; 365 return snapc;
377} 366}
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
392 int len = PAGE_CACHE_SIZE; 381 int len = PAGE_CACHE_SIZE;
393 loff_t i_size; 382 loff_t i_size;
394 int err = 0; 383 int err = 0;
395 struct ceph_snap_context *snapc; 384 struct ceph_snap_context *snapc, *oldest;
396 u64 snap_size = 0; 385 u64 snap_size = 0;
397 long writeback_stat; 386 long writeback_stat;
398 387
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
413 dout("writepage %p page %p not dirty?\n", inode, page); 402 dout("writepage %p page %p not dirty?\n", inode, page);
414 goto out; 403 goto out;
415 } 404 }
416 if (snapc != get_oldest_context(inode, &snap_size)) { 405 oldest = get_oldest_context(inode, &snap_size);
406 if (snapc->seq > oldest->seq) {
417 dout("writepage %p page %p snapc %p not writeable - noop\n", 407 dout("writepage %p page %p snapc %p not writeable - noop\n",
418 inode, page, (void *)page->private); 408 inode, page, (void *)page->private);
419 /* we should only noop if called by kswapd */ 409 /* we should only noop if called by kswapd */
420 WARN_ON((current->flags & PF_MEMALLOC) == 0); 410 WARN_ON((current->flags & PF_MEMALLOC) == 0);
411 ceph_put_snap_context(oldest);
421 goto out; 412 goto out;
422 } 413 }
414 ceph_put_snap_context(oldest);
423 415
424 /* is this a partial page at end of file? */ 416 /* is this a partial page at end of file? */
425 if (snap_size) 417 if (snap_size)
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
458 ClearPagePrivate(page); 450 ClearPagePrivate(page);
459 end_page_writeback(page); 451 end_page_writeback(page);
460 ceph_put_wrbuffer_cap_refs(ci, 1, snapc); 452 ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
461 ceph_put_snap_context(snapc); 453 ceph_put_snap_context(snapc); /* page's reference */
462out: 454out:
463 return err; 455 return err;
464} 456}
@@ -512,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req,
512 int i; 504 int i;
513 struct ceph_snap_context *snapc = req->r_snapc; 505 struct ceph_snap_context *snapc = req->r_snapc;
514 struct address_space *mapping = inode->i_mapping; 506 struct address_space *mapping = inode->i_mapping;
515 struct writeback_control *wbc = req->r_wbc;
516 __s32 rc = -EIO; 507 __s32 rc = -EIO;
517 u64 bytes = 0; 508 u64 bytes = 0;
518 struct ceph_client *client = ceph_inode_to_client(inode); 509 struct ceph_client *client = ceph_inode_to_client(inode);
519 long writeback_stat; 510 long writeback_stat;
520 unsigned issued = __ceph_caps_issued(ci, NULL); 511 unsigned issued = ceph_caps_issued(ci);
521 512
522 /* parse reply */ 513 /* parse reply */
523 replyhead = msg->front.iov_base; 514 replyhead = msg->front.iov_base;
@@ -554,13 +545,9 @@ static void writepages_finish(struct ceph_osd_request *req,
554 clear_bdi_congested(&client->backing_dev_info, 545 clear_bdi_congested(&client->backing_dev_info,
555 BLK_RW_ASYNC); 546 BLK_RW_ASYNC);
556 547
557 if (i >= wrote) { 548 ceph_put_snap_context((void *)page->private);
558 dout("inode %p skipping page %p\n", inode, page);
559 wbc->pages_skipped++;
560 }
561 page->private = 0; 549 page->private = 0;
562 ClearPagePrivate(page); 550 ClearPagePrivate(page);
563 ceph_put_snap_context(snapc);
564 dout("unlocking %d %p\n", i, page); 551 dout("unlocking %d %p\n", i, page);
565 end_page_writeback(page); 552 end_page_writeback(page);
566 553
@@ -618,7 +605,7 @@ static int ceph_writepages_start(struct address_space *mapping,
618 int range_whole = 0; 605 int range_whole = 0;
619 int should_loop = 1; 606 int should_loop = 1;
620 pgoff_t max_pages = 0, max_pages_ever = 0; 607 pgoff_t max_pages = 0, max_pages_ever = 0;
621 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; 608 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
622 struct pagevec pvec; 609 struct pagevec pvec;
623 int done = 0; 610 int done = 0;
624 int rc = 0; 611 int rc = 0;
@@ -770,9 +757,10 @@ get_more_pages:
770 } 757 }
771 758
772 /* only if matching snap context */ 759 /* only if matching snap context */
773 if (snapc != (void *)page->private) { 760 pgsnapc = (void *)page->private;
774 dout("page snapc %p != oldest %p\n", 761 if (pgsnapc->seq > snapc->seq) {
775 (void *)page->private, snapc); 762 dout("page snapc %p %lld > oldest %p %lld\n",
763 pgsnapc, pgsnapc->seq, snapc, snapc->seq);
776 unlock_page(page); 764 unlock_page(page);
777 if (!locked_pages) 765 if (!locked_pages)
778 continue; /* keep looking for snap */ 766 continue; /* keep looking for snap */
@@ -806,7 +794,6 @@ get_more_pages:
806 alloc_page_vec(client, req); 794 alloc_page_vec(client, req);
807 req->r_callback = writepages_finish; 795 req->r_callback = writepages_finish;
808 req->r_inode = inode; 796 req->r_inode = inode;
809 req->r_wbc = wbc;
810 } 797 }
811 798
812 /* note position of first page in pvec */ 799 /* note position of first page in pvec */
@@ -914,7 +901,10 @@ static int context_is_writeable_or_written(struct inode *inode,
914 struct ceph_snap_context *snapc) 901 struct ceph_snap_context *snapc)
915{ 902{
916 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); 903 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
917 return !oldest || snapc->seq <= oldest->seq; 904 int ret = !oldest || snapc->seq <= oldest->seq;
905
906 ceph_put_snap_context(oldest);
907 return ret;
918} 908}
919 909
920/* 910/*
@@ -936,8 +926,8 @@ static int ceph_update_writeable_page(struct file *file,
936 int pos_in_page = pos & ~PAGE_CACHE_MASK; 926 int pos_in_page = pos & ~PAGE_CACHE_MASK;
937 int end_in_page = pos_in_page + len; 927 int end_in_page = pos_in_page + len;
938 loff_t i_size; 928 loff_t i_size;
939 struct ceph_snap_context *snapc;
940 int r; 929 int r;
930 struct ceph_snap_context *snapc, *oldest;
941 931
942retry_locked: 932retry_locked:
943 /* writepages currently holds page lock, but if we change that later, */ 933 /* writepages currently holds page lock, but if we change that later, */
@@ -947,23 +937,24 @@ retry_locked:
947 BUG_ON(!ci->i_snap_realm); 937 BUG_ON(!ci->i_snap_realm);
948 down_read(&mdsc->snap_rwsem); 938 down_read(&mdsc->snap_rwsem);
949 BUG_ON(!ci->i_snap_realm->cached_context); 939 BUG_ON(!ci->i_snap_realm->cached_context);
950 if (page->private && 940 snapc = (void *)page->private;
951 (void *)page->private != ci->i_snap_realm->cached_context) { 941 if (snapc && snapc != ci->i_head_snapc) {
952 /* 942 /*
953 * this page is already dirty in another (older) snap 943 * this page is already dirty in another (older) snap
954 * context! is it writeable now? 944 * context! is it writeable now?
955 */ 945 */
956 snapc = get_oldest_context(inode, NULL); 946 oldest = get_oldest_context(inode, NULL);
957 up_read(&mdsc->snap_rwsem); 947 up_read(&mdsc->snap_rwsem);
958 948
959 if (snapc != (void *)page->private) { 949 if (snapc->seq > oldest->seq) {
950 ceph_put_snap_context(oldest);
960 dout(" page %p snapc %p not current or oldest\n", 951 dout(" page %p snapc %p not current or oldest\n",
961 page, (void *)page->private); 952 page, snapc);
962 /* 953 /*
963 * queue for writeback, and wait for snapc to 954 * queue for writeback, and wait for snapc to
964 * be writeable or written 955 * be writeable or written
965 */ 956 */
966 snapc = ceph_get_snap_context((void *)page->private); 957 snapc = ceph_get_snap_context(snapc);
967 unlock_page(page); 958 unlock_page(page);
968 ceph_queue_writeback(inode); 959 ceph_queue_writeback(inode);
969 r = wait_event_interruptible(ci->i_cap_wq, 960 r = wait_event_interruptible(ci->i_cap_wq,
@@ -973,6 +964,7 @@ retry_locked:
973 return r; 964 return r;
974 return -EAGAIN; 965 return -EAGAIN;
975 } 966 }
967 ceph_put_snap_context(oldest);
976 968
977 /* yay, writeable, do it now (without dropping page lock) */ 969 /* yay, writeable, do it now (without dropping page lock) */
978 dout(" page %p snapc %p not current, but oldest\n", 970 dout(" page %p snapc %p not current, but oldest\n",
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b94b866..818afe72e6c7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/slab.h>
6 7
7#include "types.h" 8#include "types.h"
8#include "auth_none.h" 9#include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c05533a31c..8164df1a08be 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
1#ifndef _FS_CEPH_AUTH_NONE_H 1#ifndef _FS_CEPH_AUTH_NONE_H
2#define _FS_CEPH_AUTH_NONE_H 2#define _FS_CEPH_AUTH_NONE_H
3 3
4#include <linux/slab.h>
5
4#include "auth.h" 6#include "auth.h"
5 7
6/* 8/*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4dc8cc..fee5a08da881 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
12#include "auth.h" 12#include "auth.h"
13#include "decode.h" 13#include "decode.h"
14 14
15struct kmem_cache *ceph_x_ticketbuf_cachep;
16
17#define TEMP_TICKET_BUF_LEN 256 15#define TEMP_TICKET_BUF_LEN 256
18 16
19static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); 17static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
131 char *ticket_buf; 129 char *ticket_buf;
132 u8 struct_v; 130 u8 struct_v;
133 131
134 dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC); 132 dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
135 if (!dbuf) 133 if (!dbuf)
136 return -ENOMEM; 134 return -ENOMEM;
137 135
138 ret = -ENOMEM; 136 ret = -ENOMEM;
139 ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, 137 ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
140 GFP_NOFS | GFP_ATOMIC);
141 if (!ticket_buf) 138 if (!ticket_buf)
142 goto out_dbuf; 139 goto out_dbuf;
143 140
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
251 248
252 ret = 0; 249 ret = 0;
253out: 250out:
254 kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf); 251 kfree(ticket_buf);
255out_dbuf: 252out_dbuf:
256 kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf); 253 kfree(dbuf);
257 return ret; 254 return ret;
258 255
259bad: 256bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
605 remove_ticket_handler(ac, th); 602 remove_ticket_handler(ac, th);
606 } 603 }
607 604
608 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
609
610 kfree(ac->private); 605 kfree(ac->private);
611 ac->private = NULL; 606 ac->private = NULL;
612} 607}
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
641 int ret; 636 int ret;
642 637
643 dout("ceph_x_init %p\n", ac); 638 dout("ceph_x_init %p\n", ac);
639 ret = -ENOMEM;
644 xi = kzalloc(sizeof(*xi), GFP_NOFS); 640 xi = kzalloc(sizeof(*xi), GFP_NOFS);
645 if (!xi) 641 if (!xi)
646 return -ENOMEM; 642 goto out;
647 643
648 ret = -ENOMEM;
649 ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
650 TEMP_TICKET_BUF_LEN, 8,
651 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
652 NULL);
653 if (!ceph_x_ticketbuf_cachep)
654 goto done_nomem;
655 ret = -EINVAL; 644 ret = -EINVAL;
656 if (!ac->secret) { 645 if (!ac->secret) {
657 pr_err("no secret set (for auth_x protocol)\n"); 646 pr_err("no secret set (for auth_x protocol)\n");
658 goto done_nomem; 647 goto out_nomem;
659 } 648 }
660 649
661 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); 650 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
662 if (ret) 651 if (ret)
663 goto done_nomem; 652 goto out_nomem;
664 653
665 xi->starting = true; 654 xi->starting = true;
666 xi->ticket_handlers = RB_ROOT; 655 xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
670 ac->ops = &ceph_x_ops; 659 ac->ops = &ceph_x_ops;
671 return 0; 660 return 0;
672 661
673done_nomem: 662out_nomem:
674 kfree(xi); 663 kfree(xi);
675 if (ceph_x_ticketbuf_cachep) 664out:
676 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
677 return ret; 665 return ret;
678} 666}
679 667
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 3710e077a857..d9400534b279 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
858} 858}
859 859
860/* 860/*
861 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
862 *
861 * caller should hold i_lock. 863 * caller should hold i_lock.
862 * caller will not hold session s_mutex if called from destroy_inode. 864 * caller will not hold session s_mutex if called from destroy_inode.
863 */ 865 */
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
866 struct ceph_mds_session *session = cap->session; 868 struct ceph_mds_session *session = cap->session;
867 struct ceph_inode_info *ci = cap->ci; 869 struct ceph_inode_info *ci = cap->ci;
868 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; 870 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
871 int removed = 0;
869 872
870 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); 873 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
871 874
872 /* remove from inode list */
873 rb_erase(&cap->ci_node, &ci->i_caps);
874 cap->ci = NULL;
875 if (ci->i_auth_cap == cap)
876 ci->i_auth_cap = NULL;
877
878 /* remove from session list */ 875 /* remove from session list */
879 spin_lock(&session->s_cap_lock); 876 spin_lock(&session->s_cap_lock);
880 if (session->s_cap_iterator == cap) { 877 if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
885 list_del_init(&cap->session_caps); 882 list_del_init(&cap->session_caps);
886 session->s_nr_caps--; 883 session->s_nr_caps--;
887 cap->session = NULL; 884 cap->session = NULL;
885 removed = 1;
888 } 886 }
887 /* protect backpointer with s_cap_lock: see iterate_session_caps */
888 cap->ci = NULL;
889 spin_unlock(&session->s_cap_lock); 889 spin_unlock(&session->s_cap_lock);
890 890
891 if (cap->session == NULL) 891 /* remove from inode list */
892 rb_erase(&cap->ci_node, &ci->i_caps);
893 if (ci->i_auth_cap == cap)
894 ci->i_auth_cap = NULL;
895
896 if (removed)
892 ceph_put_cap(cap); 897 ceph_put_cap(cap);
893 898
894 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 899 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
@@ -1205,6 +1210,12 @@ retry:
1205 if (capsnap->dirty_pages || capsnap->writing) 1210 if (capsnap->dirty_pages || capsnap->writing)
1206 continue; 1211 continue;
1207 1212
1213 /*
1214 * if cap writeback already occurred, we should have dropped
1215 * the capsnap in ceph_put_wrbuffer_cap_refs.
1216 */
1217 BUG_ON(capsnap->dirty == 0);
1218
1208 /* pick mds, take s_mutex */ 1219 /* pick mds, take s_mutex */
1209 mds = __ceph_get_cap_mds(ci, &mseq); 1220 mds = __ceph_get_cap_mds(ci, &mseq);
1210 if (session && session->s_mds != mds) { 1221 if (session && session->s_mds != mds) {
@@ -1855,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1855 } else { 1866 } else {
1856 pr_err("%p auth cap %p not mds%d ???\n", inode, 1867 pr_err("%p auth cap %p not mds%d ???\n", inode,
1857 cap, session->s_mds); 1868 cap, session->s_mds);
1858 spin_unlock(&inode->i_lock);
1859 } 1869 }
1870 spin_unlock(&inode->i_lock);
1860 } 1871 }
1861} 1872}
1862 1873
@@ -2118,8 +2129,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2118 } 2129 }
2119 spin_unlock(&inode->i_lock); 2130 spin_unlock(&inode->i_lock);
2120 2131
2121 dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had), 2132 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
2122 last ? "last" : ""); 2133 last ? " last" : "", put ? " put" : "");
2123 2134
2124 if (last && !flushsnaps) 2135 if (last && !flushsnaps)
2125 ceph_check_caps(ci, 0, NULL); 2136 ceph_check_caps(ci, 0, NULL);
@@ -2143,7 +2154,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2143{ 2154{
2144 struct inode *inode = &ci->vfs_inode; 2155 struct inode *inode = &ci->vfs_inode;
2145 int last = 0; 2156 int last = 0;
2146 int last_snap = 0; 2157 int complete_capsnap = 0;
2158 int drop_capsnap = 0;
2147 int found = 0; 2159 int found = 0;
2148 struct ceph_cap_snap *capsnap = NULL; 2160 struct ceph_cap_snap *capsnap = NULL;
2149 2161
@@ -2166,19 +2178,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2166 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 2178 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
2167 if (capsnap->context == snapc) { 2179 if (capsnap->context == snapc) {
2168 found = 1; 2180 found = 1;
2169 capsnap->dirty_pages -= nr;
2170 last_snap = !capsnap->dirty_pages;
2171 break; 2181 break;
2172 } 2182 }
2173 } 2183 }
2174 BUG_ON(!found); 2184 BUG_ON(!found);
2185 capsnap->dirty_pages -= nr;
2186 if (capsnap->dirty_pages == 0) {
2187 complete_capsnap = 1;
2188 if (capsnap->dirty == 0)
2189 /* cap writeback completed before we created
2190 * the cap_snap; no FLUSHSNAP is needed */
2191 drop_capsnap = 1;
2192 }
2175 dout("put_wrbuffer_cap_refs on %p cap_snap %p " 2193 dout("put_wrbuffer_cap_refs on %p cap_snap %p "
2176 " snap %lld %d/%d -> %d/%d %s%s\n", 2194 " snap %lld %d/%d -> %d/%d %s%s%s\n",
2177 inode, capsnap, capsnap->context->seq, 2195 inode, capsnap, capsnap->context->seq,
2178 ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, 2196 ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
2179 ci->i_wrbuffer_ref, capsnap->dirty_pages, 2197 ci->i_wrbuffer_ref, capsnap->dirty_pages,
2180 last ? " (wrbuffer last)" : "", 2198 last ? " (wrbuffer last)" : "",
2181 last_snap ? " (capsnap last)" : ""); 2199 complete_capsnap ? " (complete capsnap)" : "",
2200 drop_capsnap ? " (drop capsnap)" : "");
2201 if (drop_capsnap) {
2202 ceph_put_snap_context(capsnap->context);
2203 list_del(&capsnap->ci_item);
2204 list_del(&capsnap->flushing_item);
2205 ceph_put_cap_snap(capsnap);
2206 }
2182 } 2207 }
2183 2208
2184 spin_unlock(&inode->i_lock); 2209 spin_unlock(&inode->i_lock);
@@ -2186,10 +2211,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2186 if (last) { 2211 if (last) {
2187 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2212 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2188 iput(inode); 2213 iput(inode);
2189 } else if (last_snap) { 2214 } else if (complete_capsnap) {
2190 ceph_flush_snaps(ci); 2215 ceph_flush_snaps(ci);
2191 wake_up(&ci->i_cap_wq); 2216 wake_up(&ci->i_cap_wq);
2192 } 2217 }
2218 if (drop_capsnap)
2219 iput(inode);
2193} 2220}
2194 2221
2195/* 2222/*
@@ -2465,8 +2492,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2465 break; 2492 break;
2466 } 2493 }
2467 WARN_ON(capsnap->dirty_pages || capsnap->writing); 2494 WARN_ON(capsnap->dirty_pages || capsnap->writing);
2468 dout(" removing cap_snap %p follows %lld\n", 2495 dout(" removing %p cap_snap %p follows %lld\n",
2469 capsnap, follows); 2496 inode, capsnap, follows);
2470 ceph_put_snap_context(capsnap->context); 2497 ceph_put_snap_context(capsnap->context);
2471 list_del(&capsnap->ci_item); 2498 list_del(&capsnap->ci_item);
2472 list_del(&capsnap->flushing_item); 2499 list_del(&capsnap->flushing_item);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7261dc6c2ead..650d2db5ed26 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -171,11 +171,11 @@ more:
171 spin_lock(&inode->i_lock); 171 spin_lock(&inode->i_lock);
172 spin_lock(&dcache_lock); 172 spin_lock(&dcache_lock);
173 173
174 last = dentry;
175
174 if (err < 0) 176 if (err < 0)
175 goto out_unlock; 177 goto out_unlock;
176 178
177 last = dentry;
178
179 p = p->prev; 179 p = p->prev;
180 filp->f_pos++; 180 filp->f_pos++;
181 181
@@ -312,7 +312,7 @@ more:
312 req->r_readdir_offset = fi->next_offset; 312 req->r_readdir_offset = fi->next_offset;
313 req->r_args.readdir.frag = cpu_to_le32(frag); 313 req->r_args.readdir.frag = cpu_to_le32(frag);
314 req->r_args.readdir.max_entries = cpu_to_le32(max_entries); 314 req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
315 req->r_num_caps = max_entries; 315 req->r_num_caps = max_entries + 1;
316 err = ceph_mdsc_do_request(mdsc, NULL, req); 316 err = ceph_mdsc_do_request(mdsc, NULL, req);
317 if (err < 0) { 317 if (err < 0) {
318 ceph_mdsc_put_request(req); 318 ceph_mdsc_put_request(req);
@@ -489,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
489 struct inode *inode = ceph_get_snapdir(parent); 489 struct inode *inode = ceph_get_snapdir(parent);
490 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", 490 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
491 dentry, dentry->d_name.len, dentry->d_name.name, inode); 491 dentry, dentry->d_name.len, dentry->d_name.name, inode);
492 BUG_ON(!d_unhashed(dentry));
492 d_add(dentry, inode); 493 d_add(dentry, inode);
493 err = 0; 494 err = 0;
494 } 495 }
@@ -879,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
879 * do_request, above). If there is no trace, we need 880 * do_request, above). If there is no trace, we need
880 * to do it here. 881 * to do it here.
881 */ 882 */
883
884 /* d_move screws up d_subdirs order */
885 ceph_i_clear(new_dir, CEPH_I_COMPLETE);
886
882 d_move(old_dentry, new_dentry); 887 d_move(old_dentry, new_dentry);
888
889 /* ensure target dentry is invalidated, despite
890 rehashing bug in vfs_rename_dir */
891 new_dentry->d_time = jiffies;
892 ceph_dentry(new_dentry)->lease_shared_gen = 0;
883 } 893 }
884 ceph_mdsc_put_request(req); 894 ceph_mdsc_put_request(req);
885 return err; 895 return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5da2c1..ed6f19721d6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
665 * throw out any page cache pages in this range. this 665 * throw out any page cache pages in this range. this
666 * may block. 666 * may block.
667 */ 667 */
668 truncate_inode_pages_range(inode->i_mapping, pos, pos+len); 668 truncate_inode_pages_range(inode->i_mapping, pos,
669 (pos+len) | (PAGE_CACHE_SIZE-1));
669 } else { 670 } else {
670 pages = alloc_page_vector(num_pages); 671 pages = alloc_page_vector(num_pages);
671 if (IS_ERR(pages)) { 672 if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index aca82d55cc53..85b4d2ffdeba 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
733 __ceph_get_fmode(ci, cap_fmode); 733 __ceph_get_fmode(ci, cap_fmode);
734 spin_unlock(&inode->i_lock); 734 spin_unlock(&inode->i_lock);
735 } 735 }
736 } else if (cap_fmode >= 0) {
737 pr_warning("mds issued no caps on %llx.%llx\n",
738 ceph_vinop(inode));
739 __ceph_get_fmode(ci, cap_fmode);
736 } 740 }
737 741
738 /* update delegation info? */ 742 /* update delegation info? */
@@ -886,6 +890,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
886 struct inode *in = NULL; 890 struct inode *in = NULL;
887 struct ceph_mds_reply_inode *ininfo; 891 struct ceph_mds_reply_inode *ininfo;
888 struct ceph_vino vino; 892 struct ceph_vino vino;
893 struct ceph_client *client = ceph_sb_to_client(sb);
889 int i = 0; 894 int i = 0;
890 int err = 0; 895 int err = 0;
891 896
@@ -949,7 +954,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
949 return err; 954 return err;
950 } 955 }
951 956
952 if (rinfo->head->is_dentry && !req->r_aborted) { 957 /*
958 * ignore null lease/binding on snapdir ENOENT, or else we
959 * will have trouble splicing in the virtual snapdir later
960 */
961 if (rinfo->head->is_dentry && !req->r_aborted &&
962 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
963 client->mount_args->snapdir_name,
964 req->r_dentry->d_name.len))) {
953 /* 965 /*
954 * lookup link rename : null -> possibly existing inode 966 * lookup link rename : null -> possibly existing inode
955 * mknod symlink mkdir : null -> new inode 967 * mknod symlink mkdir : null -> new inode
@@ -989,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
989 dn, dn->d_name.len, dn->d_name.name); 1001 dn, dn->d_name.len, dn->d_name.name);
990 dout("fill_trace doing d_move %p -> %p\n", 1002 dout("fill_trace doing d_move %p -> %p\n",
991 req->r_old_dentry, dn); 1003 req->r_old_dentry, dn);
1004
1005 /* d_move screws up d_subdirs order */
1006 ceph_i_clear(dir, CEPH_I_COMPLETE);
1007
992 d_move(req->r_old_dentry, dn); 1008 d_move(req->r_old_dentry, dn);
993 dout(" src %p '%.*s' dst %p '%.*s'\n", 1009 dout(" src %p '%.*s' dst %p '%.*s'\n",
994 req->r_old_dentry, 1010 req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47be..24561a557e01 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
736} 736}
737 737
738/* 738/*
739 * Helper to safely iterate over all caps associated with a session. 739 * Helper to safely iterate over all caps associated with a session, with
740 * special care taken to handle a racing __ceph_remove_cap().
740 * 741 *
741 * caller must hold session s_mutex 742 * Caller must hold session s_mutex.
742 */ 743 */
743static int iterate_session_caps(struct ceph_mds_session *session, 744static int iterate_session_caps(struct ceph_mds_session *session,
744 int (*cb)(struct inode *, struct ceph_cap *, 745 int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2136 struct ceph_mds_session *session = NULL; 2137 struct ceph_mds_session *session = NULL;
2137 struct ceph_msg *reply; 2138 struct ceph_msg *reply;
2138 struct rb_node *p; 2139 struct rb_node *p;
2139 int err; 2140 int err = -ENOMEM;
2140 struct ceph_pagelist *pagelist; 2141 struct ceph_pagelist *pagelist;
2141 2142
2142 pr_info("reconnect to recovering mds%d\n", mds); 2143 pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2185 goto fail; 2186 goto fail;
2186 err = iterate_session_caps(session, encode_caps_cb, pagelist); 2187 err = iterate_session_caps(session, encode_caps_cb, pagelist);
2187 if (err < 0) 2188 if (err < 0)
2188 goto out; 2189 goto fail;
2189 2190
2190 /* 2191 /*
2191 * snaprealms. we provide mds with the ino, seq (version), and 2192 * snaprealms. we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
2213 reply->nr_pages = calc_pages_for(0, pagelist->length); 2214 reply->nr_pages = calc_pages_for(0, pagelist->length);
2214 ceph_con_send(&session->s_con, reply); 2215 ceph_con_send(&session->s_con, reply);
2215 2216
2216 if (session) { 2217 session->s_state = CEPH_MDS_SESSION_OPEN;
2217 session->s_state = CEPH_MDS_SESSION_OPEN; 2218 mutex_unlock(&session->s_mutex);
2218 __wake_requests(mdsc, &session->s_waiting); 2219
2219 } 2220 mutex_lock(&mdsc->mutex);
2221 __wake_requests(mdsc, &session->s_waiting);
2222 mutex_unlock(&mdsc->mutex);
2223
2224 ceph_put_mds_session(session);
2220 2225
2221out:
2222 up_read(&mdsc->snap_rwsem); 2226 up_read(&mdsc->snap_rwsem);
2223 if (session) {
2224 mutex_unlock(&session->s_mutex);
2225 ceph_put_mds_session(session);
2226 }
2227 mutex_lock(&mdsc->mutex); 2227 mutex_lock(&mdsc->mutex);
2228 return; 2228 return;
2229 2229
2230fail: 2230fail:
2231 ceph_msg_put(reply); 2231 ceph_msg_put(reply);
2232 up_read(&mdsc->snap_rwsem);
2233 mutex_unlock(&session->s_mutex);
2234 ceph_put_mds_session(session);
2232fail_nomsg: 2235fail_nomsg:
2233 ceph_pagelist_release(pagelist); 2236 ceph_pagelist_release(pagelist);
2234 kfree(pagelist); 2237 kfree(pagelist);
2235fail_nopagelist: 2238fail_nopagelist:
2236 pr_err("ENOMEM preparing reconnect for mds%d\n", mds); 2239 pr_err("error %d preparing reconnect for mds%d\n", err, mds);
2237 goto out; 2240 mutex_lock(&mdsc->mutex);
2241 return;
2238} 2242}
2239 2243
2240 2244
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 8f1715ffbe4b..cd4fadb6491a 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -30,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG;
30static char tag_ack = CEPH_MSGR_TAG_ACK; 30static char tag_ack = CEPH_MSGR_TAG_ACK;
31static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 31static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
32 32
33#ifdef CONFIG_LOCKDEP
34static struct lock_class_key socket_class;
35#endif
36
33 37
34static void queue_con(struct ceph_connection *con); 38static void queue_con(struct ceph_connection *con);
35static void con_work(struct work_struct *); 39static void con_work(struct work_struct *);
@@ -228,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
228 con->sock = sock; 232 con->sock = sock;
229 sock->sk->sk_allocation = GFP_NOFS; 233 sock->sk->sk_allocation = GFP_NOFS;
230 234
235#ifdef CONFIG_LOCKDEP
236 lockdep_set_class(&sock->sk->sk_lock, &socket_class);
237#endif
238
231 set_sock_callbacks(sock, con); 239 set_sock_callbacks(sock, con);
232 240
233 dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); 241 dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
@@ -333,6 +341,7 @@ static void reset_connection(struct ceph_connection *con)
333 con->out_msg = NULL; 341 con->out_msg = NULL;
334 } 342 }
335 con->in_seq = 0; 343 con->in_seq = 0;
344 con->in_seq_acked = 0;
336} 345}
337 346
338/* 347/*
@@ -483,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
483 list_move_tail(&m->list_head, &con->out_sent); 492 list_move_tail(&m->list_head, &con->out_sent);
484 } 493 }
485 494
486 m->hdr.seq = cpu_to_le64(++con->out_seq); 495 /*
496 * only assign outgoing seq # if we haven't sent this message
497 * yet. if it is requeued, resend with it's original seq.
498 */
499 if (m->needs_out_seq) {
500 m->hdr.seq = cpu_to_le64(++con->out_seq);
501 m->needs_out_seq = false;
502 }
487 503
488 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 504 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
489 m, con->out_seq, le16_to_cpu(m->hdr.type), 505 m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1325,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con)
1325 unsigned front_len, middle_len, data_len, data_off; 1341 unsigned front_len, middle_len, data_len, data_off;
1326 int datacrc = con->msgr->nocrc; 1342 int datacrc = con->msgr->nocrc;
1327 int skip; 1343 int skip;
1344 u64 seq;
1328 1345
1329 dout("read_partial_message con %p msg %p\n", con, m); 1346 dout("read_partial_message con %p msg %p\n", con, m);
1330 1347
@@ -1359,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con)
1359 return -EIO; 1376 return -EIO;
1360 data_off = le16_to_cpu(con->in_hdr.data_off); 1377 data_off = le16_to_cpu(con->in_hdr.data_off);
1361 1378
1379 /* verify seq# */
1380 seq = le64_to_cpu(con->in_hdr.seq);
1381 if ((s64)seq - (s64)con->in_seq < 1) {
1382 pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
1383 ENTITY_NAME(con->peer_name),
1384 pr_addr(&con->peer_addr.in_addr),
1385 seq, con->in_seq + 1);
1386 con->in_base_pos = -front_len - middle_len - data_len -
1387 sizeof(m->footer);
1388 con->in_tag = CEPH_MSGR_TAG_READY;
1389 con->in_seq++;
1390 return 0;
1391 } else if ((s64)seq - (s64)con->in_seq > 1) {
1392 pr_err("read_partial_message bad seq %lld expected %lld\n",
1393 seq, con->in_seq + 1);
1394 con->error_msg = "bad message sequence # for incoming message";
1395 return -EBADMSG;
1396 }
1397
1362 /* allocate message? */ 1398 /* allocate message? */
1363 if (!con->in_msg) { 1399 if (!con->in_msg) {
1364 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 1400 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1370,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con)
1370 con->in_base_pos = -front_len - middle_len - data_len - 1406 con->in_base_pos = -front_len - middle_len - data_len -
1371 sizeof(m->footer); 1407 sizeof(m->footer);
1372 con->in_tag = CEPH_MSGR_TAG_READY; 1408 con->in_tag = CEPH_MSGR_TAG_READY;
1409 con->in_seq++;
1373 return 0; 1410 return 0;
1374 } 1411 }
1375 if (IS_ERR(con->in_msg)) { 1412 if (IS_ERR(con->in_msg)) {
@@ -1956,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
1956 1993
1957 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); 1994 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
1958 1995
1996 msg->needs_out_seq = true;
1997
1959 /* queue */ 1998 /* queue */
1960 mutex_lock(&con->mutex); 1999 mutex_lock(&con->mutex);
1961 BUG_ON(!list_empty(&msg->list_head)); 2000 BUG_ON(!list_empty(&msg->list_head));
@@ -2021,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2021 ceph_msg_put(con->in_msg); 2060 ceph_msg_put(con->in_msg);
2022 con->in_msg = NULL; 2061 con->in_msg = NULL;
2023 con->in_tag = CEPH_MSGR_TAG_READY; 2062 con->in_tag = CEPH_MSGR_TAG_READY;
2063 con->in_seq++;
2024 } else { 2064 } else {
2025 dout("con_revoke_pages %p msg %p pages %p no-op\n", 2065 dout("con_revoke_pages %p msg %p pages %p no-op\n",
2026 con, con->in_msg, msg); 2066 con, con->in_msg, msg);
@@ -2054,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
2054 kref_init(&m->kref); 2094 kref_init(&m->kref);
2055 INIT_LIST_HEAD(&m->list_head); 2095 INIT_LIST_HEAD(&m->list_head);
2056 2096
2097 m->hdr.tid = 0;
2057 m->hdr.type = cpu_to_le16(type); 2098 m->hdr.type = cpu_to_le16(type);
2099 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2100 m->hdr.version = 0;
2058 m->hdr.front_len = cpu_to_le32(front_len); 2101 m->hdr.front_len = cpu_to_le32(front_len);
2059 m->hdr.middle_len = 0; 2102 m->hdr.middle_len = 0;
2060 m->hdr.data_len = cpu_to_le32(page_len); 2103 m->hdr.data_len = cpu_to_le32(page_len);
2061 m->hdr.data_off = cpu_to_le16(page_off); 2104 m->hdr.data_off = cpu_to_le16(page_off);
2062 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 2105 m->hdr.reserved = 0;
2063 m->footer.front_crc = 0; 2106 m->footer.front_crc = 0;
2064 m->footer.middle_crc = 0; 2107 m->footer.middle_crc = 0;
2065 m->footer.data_crc = 0; 2108 m->footer.data_crc = 0;
2109 m->footer.flags = 0;
2066 m->front_max = front_len; 2110 m->front_max = front_len;
2067 m->front_is_vmalloc = false; 2111 m->front_is_vmalloc = false;
2068 m->more_to_follow = false; 2112 m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae73cdc..a5caf91cc971 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
86 struct kref kref; 86 struct kref kref;
87 bool front_is_vmalloc; 87 bool front_is_vmalloc;
88 bool more_to_follow; 88 bool more_to_follow;
89 bool needs_out_seq;
89 int front_max; 90 int front_max;
90 91
91 struct ceph_msgpool *pool; 92 struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4dedaace6..3514f71ff85f 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
565{ 565{
566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
567 struct ceph_pg pgid; 567 struct ceph_pg pgid;
568 int o = -1; 568 int acting[CEPH_PG_MAX_SIZE];
569 int o = -1, num = 0;
569 int err; 570 int err;
570 571
571 dout("map_osds %p tid %lld\n", req, req->r_tid); 572 dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
576 pgid = reqhead->layout.ol_pgid; 577 pgid = reqhead->layout.ol_pgid;
577 req->r_pgid = pgid; 578 req->r_pgid = pgid;
578 579
579 o = ceph_calc_pg_primary(osdc->osdmap, pgid); 580 err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
581 if (err > 0) {
582 o = acting[0];
583 num = err;
584 }
580 585
581 if ((req->r_osd && req->r_osd->o_osd == o && 586 if ((req->r_osd && req->r_osd->o_osd == o &&
582 req->r_sent >= req->r_osd->o_incarnation) || 587 req->r_sent >= req->r_osd->o_incarnation &&
588 req->r_num_pg_osds == num &&
589 memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
583 (req->r_osd == NULL && o == -1)) 590 (req->r_osd == NULL && o == -1))
584 return 0; /* no change */ 591 return 0; /* no change */
585 592
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
587 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, 594 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
588 req->r_osd ? req->r_osd->o_osd : -1); 595 req->r_osd ? req->r_osd->o_osd : -1);
589 596
597 /* record full pg acting set */
598 memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
599 req->r_num_pg_osds = num;
600
590 if (req->r_osd) { 601 if (req->r_osd) {
591 __cancel_request(req); 602 __cancel_request(req);
592 list_del_init(&req->r_osd_item); 603 list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
612 __remove_osd_from_lru(req->r_osd); 623 __remove_osd_from_lru(req->r_osd);
613 list_add(&req->r_osd_item, &req->r_osd->o_requests); 624 list_add(&req->r_osd_item, &req->r_osd->o_requests);
614 } 625 }
615 err = 1; /* osd changed */ 626 err = 1; /* osd or pg changed */
616 627
617out: 628out:
618 return err; 629 return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
779 struct ceph_osd_request *req; 790 struct ceph_osd_request *req;
780 u64 tid; 791 u64 tid;
781 int numops, object_len, flags; 792 int numops, object_len, flags;
793 s32 result;
782 794
783 tid = le64_to_cpu(msg->hdr.tid); 795 tid = le64_to_cpu(msg->hdr.tid);
784 if (msg->front.iov_len < sizeof(*rhead)) 796 if (msg->front.iov_len < sizeof(*rhead))
785 goto bad; 797 goto bad;
786 numops = le32_to_cpu(rhead->num_ops); 798 numops = le32_to_cpu(rhead->num_ops);
787 object_len = le32_to_cpu(rhead->object_len); 799 object_len = le32_to_cpu(rhead->object_len);
800 result = le32_to_cpu(rhead->result);
788 if (msg->front.iov_len != sizeof(*rhead) + object_len + 801 if (msg->front.iov_len != sizeof(*rhead) + object_len +
789 numops * sizeof(struct ceph_osd_op)) 802 numops * sizeof(struct ceph_osd_op))
790 goto bad; 803 goto bad;
791 dout("handle_reply %p tid %llu\n", msg, tid); 804 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
792 805
793 /* lookup */ 806 /* lookup */
794 mutex_lock(&osdc->request_mutex); 807 mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
834 dout("handle_reply tid %llu flags %d\n", tid, flags); 847 dout("handle_reply tid %llu flags %d\n", tid, flags);
835 848
836 /* either this is a read, or we got the safe response */ 849 /* either this is a read, or we got the safe response */
837 if ((flags & CEPH_OSD_FLAG_ONDISK) || 850 if (result < 0 ||
851 (flags & CEPH_OSD_FLAG_ONDISK) ||
838 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 852 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
839 __unregister_request(osdc, req); 853 __unregister_request(osdc, req);
840 854
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b0759911e7c3..ce776989ef6a 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
48 struct list_head r_osd_item; 48 struct list_head r_osd_item;
49 struct ceph_osd *r_osd; 49 struct ceph_osd *r_osd;
50 struct ceph_pg r_pgid; 50 struct ceph_pg r_pgid;
51 int r_pg_osds[CEPH_PG_MAX_SIZE];
52 int r_num_pg_osds;
51 53
52 struct ceph_connection *r_con_filling_msg; 54 struct ceph_connection *r_con_filling_msg;
53 55
@@ -66,7 +68,6 @@ struct ceph_osd_request {
66 struct list_head r_unsafe_item; 68 struct list_head r_unsafe_item;
67 69
68 struct inode *r_inode; /* for use by callbacks */ 70 struct inode *r_inode; /* for use by callbacks */
69 struct writeback_control *r_wbc; /* ditto */
70 71
71 char r_oid[40]; /* object name */ 72 char r_oid[40]; /* object name */
72 int r_oid_len; 73 int r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 21c6623c4b07..cfdd8f4388b7 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -314,71 +314,6 @@ bad:
314 return ERR_PTR(err); 314 return ERR_PTR(err);
315} 315}
316 316
317
318/*
319 * osd map
320 */
321void ceph_osdmap_destroy(struct ceph_osdmap *map)
322{
323 dout("osdmap_destroy %p\n", map);
324 if (map->crush)
325 crush_destroy(map->crush);
326 while (!RB_EMPTY_ROOT(&map->pg_temp)) {
327 struct ceph_pg_mapping *pg =
328 rb_entry(rb_first(&map->pg_temp),
329 struct ceph_pg_mapping, node);
330 rb_erase(&pg->node, &map->pg_temp);
331 kfree(pg);
332 }
333 while (!RB_EMPTY_ROOT(&map->pg_pools)) {
334 struct ceph_pg_pool_info *pi =
335 rb_entry(rb_first(&map->pg_pools),
336 struct ceph_pg_pool_info, node);
337 rb_erase(&pi->node, &map->pg_pools);
338 kfree(pi);
339 }
340 kfree(map->osd_state);
341 kfree(map->osd_weight);
342 kfree(map->osd_addr);
343 kfree(map);
344}
345
346/*
347 * adjust max osd value. reallocate arrays.
348 */
349static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
350{
351 u8 *state;
352 struct ceph_entity_addr *addr;
353 u32 *weight;
354
355 state = kcalloc(max, sizeof(*state), GFP_NOFS);
356 addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
357 weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
358 if (state == NULL || addr == NULL || weight == NULL) {
359 kfree(state);
360 kfree(addr);
361 kfree(weight);
362 return -ENOMEM;
363 }
364
365 /* copy old? */
366 if (map->osd_state) {
367 memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
368 memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
369 memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
370 kfree(map->osd_state);
371 kfree(map->osd_addr);
372 kfree(map->osd_weight);
373 }
374
375 map->osd_state = state;
376 map->osd_weight = weight;
377 map->osd_addr = addr;
378 map->max_osd = max;
379 return 0;
380}
381
382/* 317/*
383 * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid 318 * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
384 * to a set of osds) 319 * to a set of osds)
@@ -482,6 +417,13 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
482 return NULL; 417 return NULL;
483} 418}
484 419
420static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
421{
422 rb_erase(&pi->node, root);
423 kfree(pi->name);
424 kfree(pi);
425}
426
485void __decode_pool(void **p, struct ceph_pg_pool_info *pi) 427void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
486{ 428{
487 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 429 ceph_decode_copy(p, &pi->v, sizeof(pi->v));
@@ -490,6 +432,98 @@ void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
490 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; 432 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
491} 433}
492 434
435static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
436{
437 struct ceph_pg_pool_info *pi;
438 u32 num, len, pool;
439
440 ceph_decode_32_safe(p, end, num, bad);
441 dout(" %d pool names\n", num);
442 while (num--) {
443 ceph_decode_32_safe(p, end, pool, bad);
444 ceph_decode_32_safe(p, end, len, bad);
445 dout(" pool %d len %d\n", pool, len);
446 pi = __lookup_pg_pool(&map->pg_pools, pool);
447 if (pi) {
448 kfree(pi->name);
449 pi->name = kmalloc(len + 1, GFP_NOFS);
450 if (pi->name) {
451 memcpy(pi->name, *p, len);
452 pi->name[len] = '\0';
453 dout(" name is %s\n", pi->name);
454 }
455 }
456 *p += len;
457 }
458 return 0;
459
460bad:
461 return -EINVAL;
462}
463
464/*
465 * osd map
466 */
467void ceph_osdmap_destroy(struct ceph_osdmap *map)
468{
469 dout("osdmap_destroy %p\n", map);
470 if (map->crush)
471 crush_destroy(map->crush);
472 while (!RB_EMPTY_ROOT(&map->pg_temp)) {
473 struct ceph_pg_mapping *pg =
474 rb_entry(rb_first(&map->pg_temp),
475 struct ceph_pg_mapping, node);
476 rb_erase(&pg->node, &map->pg_temp);
477 kfree(pg);
478 }
479 while (!RB_EMPTY_ROOT(&map->pg_pools)) {
480 struct ceph_pg_pool_info *pi =
481 rb_entry(rb_first(&map->pg_pools),
482 struct ceph_pg_pool_info, node);
483 __remove_pg_pool(&map->pg_pools, pi);
484 }
485 kfree(map->osd_state);
486 kfree(map->osd_weight);
487 kfree(map->osd_addr);
488 kfree(map);
489}
490
491/*
492 * adjust max osd value. reallocate arrays.
493 */
494static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
495{
496 u8 *state;
497 struct ceph_entity_addr *addr;
498 u32 *weight;
499
500 state = kcalloc(max, sizeof(*state), GFP_NOFS);
501 addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
502 weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
503 if (state == NULL || addr == NULL || weight == NULL) {
504 kfree(state);
505 kfree(addr);
506 kfree(weight);
507 return -ENOMEM;
508 }
509
510 /* copy old? */
511 if (map->osd_state) {
512 memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
513 memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
514 memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
515 kfree(map->osd_state);
516 kfree(map->osd_addr);
517 kfree(map->osd_weight);
518 }
519
520 map->osd_state = state;
521 map->osd_weight = weight;
522 map->osd_addr = addr;
523 map->max_osd = max;
524 return 0;
525}
526
493/* 527/*
494 * decode a full map. 528 * decode a full map.
495 */ 529 */
@@ -526,7 +560,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
526 ceph_decode_32_safe(p, end, max, bad); 560 ceph_decode_32_safe(p, end, max, bad);
527 while (max--) { 561 while (max--) {
528 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); 562 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
529 pi = kmalloc(sizeof(*pi), GFP_NOFS); 563 pi = kzalloc(sizeof(*pi), GFP_NOFS);
530 if (!pi) 564 if (!pi)
531 goto bad; 565 goto bad;
532 pi->id = ceph_decode_32(p); 566 pi->id = ceph_decode_32(p);
@@ -539,6 +573,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
539 __decode_pool(p, pi); 573 __decode_pool(p, pi);
540 __insert_pg_pool(&map->pg_pools, pi); 574 __insert_pg_pool(&map->pg_pools, pi);
541 } 575 }
576
577 if (version >= 5 && __decode_pool_names(p, end, map) < 0)
578 goto bad;
579
542 ceph_decode_32_safe(p, end, map->pool_max, bad); 580 ceph_decode_32_safe(p, end, map->pool_max, bad);
543 581
544 ceph_decode_32_safe(p, end, map->flags, bad); 582 ceph_decode_32_safe(p, end, map->flags, bad);
@@ -712,7 +750,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
712 } 750 }
713 pi = __lookup_pg_pool(&map->pg_pools, pool); 751 pi = __lookup_pg_pool(&map->pg_pools, pool);
714 if (!pi) { 752 if (!pi) {
715 pi = kmalloc(sizeof(*pi), GFP_NOFS); 753 pi = kzalloc(sizeof(*pi), GFP_NOFS);
716 if (!pi) { 754 if (!pi) {
717 err = -ENOMEM; 755 err = -ENOMEM;
718 goto bad; 756 goto bad;
@@ -722,6 +760,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
722 } 760 }
723 __decode_pool(p, pi); 761 __decode_pool(p, pi);
724 } 762 }
763 if (version >= 5 && __decode_pool_names(p, end, map) < 0)
764 goto bad;
725 765
726 /* old_pool */ 766 /* old_pool */
727 ceph_decode_32_safe(p, end, len, bad); 767 ceph_decode_32_safe(p, end, len, bad);
@@ -730,10 +770,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
730 770
731 ceph_decode_32_safe(p, end, pool, bad); 771 ceph_decode_32_safe(p, end, pool, bad);
732 pi = __lookup_pg_pool(&map->pg_pools, pool); 772 pi = __lookup_pg_pool(&map->pg_pools, pool);
733 if (pi) { 773 if (pi)
734 rb_erase(&pi->node, &map->pg_pools); 774 __remove_pg_pool(&map->pg_pools, pi);
735 kfree(pi);
736 }
737 } 775 }
738 776
739 /* new_up */ 777 /* new_up */
@@ -1003,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1003} 1041}
1004 1042
1005/* 1043/*
1044 * Return acting set for given pgid.
1045 */
1046int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1047 int *acting)
1048{
1049 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1050 int i, o, num = CEPH_PG_MAX_SIZE;
1051
1052 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1053 if (!osds)
1054 return -1;
1055
1056 /* primary is first up osd */
1057 o = 0;
1058 for (i = 0; i < num; i++)
1059 if (ceph_osd_is_up(osdmap, osds[i]))
1060 acting[o++] = osds[i];
1061 return o;
1062}
1063
1064/*
1006 * Return primary osd for given pgid, or -1 if none. 1065 * Return primary osd for given pgid, or -1 if none.
1007 */ 1066 */
1008int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) 1067int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1009{ 1068{
1010 int rawosds[10], *osds; 1069 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1011 int i, num = ARRAY_SIZE(rawosds); 1070 int i, num = CEPH_PG_MAX_SIZE;
1012 1071
1013 osds = calc_pg_raw(osdmap, pgid, rawosds, &num); 1072 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1014 if (!osds) 1073 if (!osds)
@@ -1016,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1016 1075
1017 /* primary is first up osd */ 1076 /* primary is first up osd */
1018 for (i = 0; i < num; i++) 1077 for (i = 0; i < num; i++)
1019 if (ceph_osd_is_up(osdmap, osds[i])) { 1078 if (ceph_osd_is_up(osdmap, osds[i]))
1020 return osds[i]; 1079 return osds[i];
1021 break;
1022 }
1023 return -1; 1080 return -1;
1024} 1081}
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 1fb55afb2642..970b547e510d 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -23,6 +23,7 @@ struct ceph_pg_pool_info {
23 int id; 23 int id;
24 struct ceph_pg_pool v; 24 struct ceph_pg_pool v;
25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; 25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
26 char *name;
26}; 27};
27 28
28struct ceph_pg_mapping { 29struct ceph_pg_mapping {
@@ -119,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
119 const char *oid, 120 const char *oid,
120 struct ceph_file_layout *fl, 121 struct ceph_file_layout *fl,
121 struct ceph_osdmap *osdmap); 122 struct ceph_osdmap *osdmap);
123extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
124 int *acting);
122extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, 125extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
123 struct ceph_pg pgid); 126 struct ceph_pg pgid);
124 127
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 26ac8b89a676..fd56451a871f 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -11,8 +11,10 @@
11/* 11/*
12 * osdmap encoding versions 12 * osdmap encoding versions
13 */ 13 */
14#define CEPH_OSDMAP_INC_VERSION 4 14#define CEPH_OSDMAP_INC_VERSION 5
15#define CEPH_OSDMAP_VERSION 4 15#define CEPH_OSDMAP_INC_VERSION_EXT 5
16#define CEPH_OSDMAP_VERSION 5
17#define CEPH_OSDMAP_VERSION_EXT 5
16 18
17/* 19/*
18 * fs id 20 * fs id
@@ -56,6 +58,7 @@ struct ceph_timespec {
56#define CEPH_PG_LAYOUT_LINEAR 2 58#define CEPH_PG_LAYOUT_LINEAR 2
57#define CEPH_PG_LAYOUT_HYBRID 3 59#define CEPH_PG_LAYOUT_HYBRID 3
58 60
61#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
59 62
60/* 63/*
61 * placement group. 64 * placement group.
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e6f9bc57d472..d5114db70453 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -431,8 +431,7 @@ static int dup_array(u64 **dst, __le64 *src, int num)
431 * Caller must hold snap_rwsem for read (i.e., the realm topology won't 431 * Caller must hold snap_rwsem for read (i.e., the realm topology won't
432 * change). 432 * change).
433 */ 433 */
434void ceph_queue_cap_snap(struct ceph_inode_info *ci, 434void ceph_queue_cap_snap(struct ceph_inode_info *ci)
435 struct ceph_snap_context *snapc)
436{ 435{
437 struct inode *inode = &ci->vfs_inode; 436 struct inode *inode = &ci->vfs_inode;
438 struct ceph_cap_snap *capsnap; 437 struct ceph_cap_snap *capsnap;
@@ -451,10 +450,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
451 as no new writes are allowed to start when pending, so any 450 as no new writes are allowed to start when pending, so any
452 writes in progress now were started before the previous 451 writes in progress now were started before the previous
453 cap_snap. lucky us. */ 452 cap_snap. lucky us. */
454 dout("queue_cap_snap %p snapc %p seq %llu used %d" 453 dout("queue_cap_snap %p already pending\n", inode);
455 " already pending\n", inode, snapc, snapc->seq, used);
456 kfree(capsnap); 454 kfree(capsnap);
457 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { 455 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) {
456 struct ceph_snap_context *snapc = ci->i_head_snapc;
457
458 igrab(inode); 458 igrab(inode);
459 459
460 atomic_set(&capsnap->nref, 1); 460 atomic_set(&capsnap->nref, 1);
@@ -463,7 +463,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
463 INIT_LIST_HEAD(&capsnap->flushing_item); 463 INIT_LIST_HEAD(&capsnap->flushing_item);
464 464
465 capsnap->follows = snapc->seq - 1; 465 capsnap->follows = snapc->seq - 1;
466 capsnap->context = ceph_get_snap_context(snapc);
467 capsnap->issued = __ceph_caps_issued(ci, NULL); 466 capsnap->issued = __ceph_caps_issued(ci, NULL);
468 capsnap->dirty = __ceph_caps_dirty(ci); 467 capsnap->dirty = __ceph_caps_dirty(ci);
469 468
@@ -480,7 +479,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
480 snapshot. */ 479 snapshot. */
481 capsnap->dirty_pages = ci->i_wrbuffer_ref_head; 480 capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
482 ci->i_wrbuffer_ref_head = 0; 481 ci->i_wrbuffer_ref_head = 0;
483 ceph_put_snap_context(ci->i_head_snapc); 482 capsnap->context = snapc;
484 ci->i_head_snapc = NULL; 483 ci->i_head_snapc = NULL;
485 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); 484 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
486 485
@@ -522,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
522 capsnap->ctime = inode->i_ctime; 521 capsnap->ctime = inode->i_ctime;
523 capsnap->time_warp_seq = ci->i_time_warp_seq; 522 capsnap->time_warp_seq = ci->i_time_warp_seq;
524 if (capsnap->dirty_pages) { 523 if (capsnap->dirty_pages) {
525 dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu " 524 dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
526 "still has %d dirty pages\n", inode, capsnap, 525 "still has %d dirty pages\n", inode, capsnap,
527 capsnap->context, capsnap->context->seq, 526 capsnap->context, capsnap->context->seq,
528 capsnap->size, capsnap->dirty_pages); 527 ceph_cap_string(capsnap->dirty), capsnap->size,
528 capsnap->dirty_pages);
529 return 0; 529 return 0;
530 } 530 }
531 dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n", 531 dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
532 inode, capsnap, capsnap->context, 532 inode, capsnap, capsnap->context,
533 capsnap->context->seq, capsnap->size); 533 capsnap->context->seq, ceph_cap_string(capsnap->dirty),
534 capsnap->size);
534 535
535 spin_lock(&mdsc->snap_flush_lock); 536 spin_lock(&mdsc->snap_flush_lock);
536 list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); 537 list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
@@ -602,7 +603,7 @@ more:
602 if (lastinode) 603 if (lastinode)
603 iput(lastinode); 604 iput(lastinode);
604 lastinode = inode; 605 lastinode = inode;
605 ceph_queue_cap_snap(ci, realm->cached_context); 606 ceph_queue_cap_snap(ci);
606 spin_lock(&realm->inodes_with_caps_lock); 607 spin_lock(&realm->inodes_with_caps_lock);
607 } 608 }
608 spin_unlock(&realm->inodes_with_caps_lock); 609 spin_unlock(&realm->inodes_with_caps_lock);
@@ -824,8 +825,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
824 spin_unlock(&realm->inodes_with_caps_lock); 825 spin_unlock(&realm->inodes_with_caps_lock);
825 spin_unlock(&inode->i_lock); 826 spin_unlock(&inode->i_lock);
826 827
827 ceph_queue_cap_snap(ci, 828 ceph_queue_cap_snap(ci);
828 ci->i_snap_realm->cached_context);
829 829
830 iput(inode); 830 iput(inode);
831 continue; 831 continue;
@@ -869,16 +869,20 @@ skip_inode:
869 continue; 869 continue;
870 ci = ceph_inode(inode); 870 ci = ceph_inode(inode);
871 spin_lock(&inode->i_lock); 871 spin_lock(&inode->i_lock);
872 if (!ci->i_snap_realm) 872 if (list_empty(&ci->i_snap_realm_item)) {
873 goto split_skip_inode; 873 struct ceph_snap_realm *oldrealm =
874 ceph_put_snap_realm(mdsc, ci->i_snap_realm); 874 ci->i_snap_realm;
875 spin_lock(&realm->inodes_with_caps_lock); 875
876 list_add(&ci->i_snap_realm_item, 876 dout(" moving %p to split realm %llx %p\n",
877 &realm->inodes_with_caps); 877 inode, realm->ino, realm);
878 ci->i_snap_realm = realm; 878 spin_lock(&realm->inodes_with_caps_lock);
879 spin_unlock(&realm->inodes_with_caps_lock); 879 list_add(&ci->i_snap_realm_item,
880 ceph_get_snap_realm(mdsc, realm); 880 &realm->inodes_with_caps);
881split_skip_inode: 881 ci->i_snap_realm = realm;
882 spin_unlock(&realm->inodes_with_caps_lock);
883 ceph_get_snap_realm(mdsc, realm);
884 ceph_put_snap_realm(mdsc, oldrealm);
885 }
882 spin_unlock(&inode->i_lock); 886 spin_unlock(&inode->i_lock);
883 iput(inode); 887 iput(inode);
884 } 888 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02eaa1279..110857ba9269 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
47 */ 47 */
48static void ceph_put_super(struct super_block *s) 48static void ceph_put_super(struct super_block *s)
49{ 49{
50 struct ceph_client *cl = ceph_client(s); 50 struct ceph_client *client = ceph_sb_to_client(s);
51 51
52 dout("put_super\n"); 52 dout("put_super\n");
53 ceph_mdsc_close_sessions(&cl->mdsc); 53 ceph_mdsc_close_sessions(&client->mdsc);
54
55 /*
56 * ensure we release the bdi before put_anon_super releases
57 * the device name.
58 */
59 if (s->s_bdi == &client->backing_dev_info) {
60 bdi_unregister(&client->backing_dev_info);
61 s->s_bdi = NULL;
62 }
63
54 return; 64 return;
55} 65}
56 66
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
636 destroy_workqueue(client->pg_inv_wq); 646 destroy_workqueue(client->pg_inv_wq);
637 destroy_workqueue(client->trunc_wq); 647 destroy_workqueue(client->trunc_wq);
638 648
649 bdi_destroy(&client->backing_dev_info);
650
639 if (client->msgr) 651 if (client->msgr)
640 ceph_messenger_destroy(client->msgr); 652 ceph_messenger_destroy(client->msgr);
641 mempool_destroy(client->wb_pagevec_pool); 653 mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
876{ 888{
877 int err; 889 int err;
878 890
879 sb->s_bdi = &client->backing_dev_info;
880
881 /* set ra_pages based on rsize mount option? */ 891 /* set ra_pages based on rsize mount option? */
882 if (client->mount_args->rsize >= PAGE_CACHE_SIZE) 892 if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
883 client->backing_dev_info.ra_pages = 893 client->backing_dev_info.ra_pages =
884 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) 894 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
885 >> PAGE_SHIFT; 895 >> PAGE_SHIFT;
886 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); 896 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
897 if (!err)
898 sb->s_bdi = &client->backing_dev_info;
887 return err; 899 return err;
888} 900}
889 901
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
957 dout("kill_sb %p\n", s); 969 dout("kill_sb %p\n", s);
958 ceph_mdsc_pre_umount(&client->mdsc); 970 ceph_mdsc_pre_umount(&client->mdsc);
959 kill_anon_super(s); /* will call put_super after sb is r/o */ 971 kill_anon_super(s); /* will call put_super after sb is r/o */
960 if (s->s_bdi == &client->backing_dev_info)
961 bdi_unregister(&client->backing_dev_info);
962 bdi_destroy(&client->backing_dev_info);
963 ceph_destroy_client(client); 972 ceph_destroy_client(client);
964} 973}
965 974
@@ -996,9 +1005,10 @@ static int __init init_ceph(void)
996 if (ret) 1005 if (ret)
997 goto out_icache; 1006 goto out_icache;
998 1007
999 pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n", 1008 pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
1000 CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH, 1009 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
1001 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL); 1010 CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
1011 CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
1002 return 0; 1012 return 0;
1003 1013
1004out_icache: 1014out_icache:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ca702c67bc66..13513b80d87f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mempool.h> 11#include <linux/mempool.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/slab.h>
13#include <linux/wait.h> 14#include <linux/wait.h>
14#include <linux/writeback.h> 15#include <linux/writeback.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
@@ -715,8 +716,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m,
715extern void ceph_handle_snap(struct ceph_mds_client *mdsc, 716extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
716 struct ceph_mds_session *session, 717 struct ceph_mds_session *session,
717 struct ceph_msg *msg); 718 struct ceph_msg *msg);
718extern void ceph_queue_cap_snap(struct ceph_inode_info *ci, 719extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
719 struct ceph_snap_context *snapc);
720extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, 720extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
721 struct ceph_cap_snap *capsnap); 721 struct ceph_cap_snap *capsnap);
722extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); 722extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 4797787c6a44..246a167cb913 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -18,6 +18,8 @@
18#ifndef _CIFS_FS_SB_H 18#ifndef _CIFS_FS_SB_H
19#define _CIFS_FS_SB_H 19#define _CIFS_FS_SB_H
20 20
21#include <linux/backing-dev.h>
22
21#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ 23#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */
22#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */ 24#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */
23#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ 25#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */
@@ -50,5 +52,6 @@ struct cifs_sb_info {
50#ifdef CONFIG_CIFS_DFS_UPCALL 52#ifdef CONFIG_CIFS_DFS_UPCALL
51 char *mountdata; /* mount options received at mount time */ 53 char *mountdata; /* mount options received at mount time */
52#endif 54#endif
55 struct backing_dev_info bdi;
53}; 56};
54#endif /* _CIFS_FS_SB_H */ 57#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ded66be6597c..ad235d604a0b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data,
103 if (cifs_sb == NULL) 103 if (cifs_sb == NULL)
104 return -ENOMEM; 104 return -ENOMEM;
105 105
106 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
107 if (rc) {
108 kfree(cifs_sb);
109 return rc;
110 }
111
106#ifdef CONFIG_CIFS_DFS_UPCALL 112#ifdef CONFIG_CIFS_DFS_UPCALL
107 /* copy mount params to sb for use in submounts */ 113 /* copy mount params to sb for use in submounts */
108 /* BB: should we move this after the mount so we 114 /* BB: should we move this after the mount so we
@@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data,
115 int len = strlen(data); 121 int len = strlen(data);
116 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); 122 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
117 if (cifs_sb->mountdata == NULL) { 123 if (cifs_sb->mountdata == NULL) {
124 bdi_destroy(&cifs_sb->bdi);
118 kfree(sb->s_fs_info); 125 kfree(sb->s_fs_info);
119 sb->s_fs_info = NULL; 126 sb->s_fs_info = NULL;
120 return -ENOMEM; 127 return -ENOMEM;
@@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data,
135 142
136 sb->s_magic = CIFS_MAGIC_NUMBER; 143 sb->s_magic = CIFS_MAGIC_NUMBER;
137 sb->s_op = &cifs_super_ops; 144 sb->s_op = &cifs_super_ops;
145 sb->s_bdi = &cifs_sb->bdi;
138/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) 146/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
139 sb->s_blocksize = 147 sb->s_blocksize =
140 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ 148 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
@@ -183,6 +191,7 @@ out_mount_failed:
183 } 191 }
184#endif 192#endif
185 unload_nls(cifs_sb->local_nls); 193 unload_nls(cifs_sb->local_nls);
194 bdi_destroy(&cifs_sb->bdi);
186 kfree(cifs_sb); 195 kfree(cifs_sb);
187 } 196 }
188 return rc; 197 return rc;
@@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb)
214#endif 223#endif
215 224
216 unload_nls(cifs_sb->local_nls); 225 unload_nls(cifs_sb->local_nls);
226 bdi_destroy(&cifs_sb->bdi);
217 kfree(cifs_sb); 227 kfree(cifs_sb);
218 228
219 unlock_kernel(); 229 unlock_kernel();
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ecf0ffbe2b64..0c2fd17439c8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -502,6 +502,7 @@ struct dfs_info3_param {
502#define CIFS_FATTR_DFS_REFERRAL 0x1 502#define CIFS_FATTR_DFS_REFERRAL 0x1
503#define CIFS_FATTR_DELETE_PENDING 0x2 503#define CIFS_FATTR_DELETE_PENDING 0x2
504#define CIFS_FATTR_NEED_REVAL 0x4 504#define CIFS_FATTR_NEED_REVAL 0x4
505#define CIFS_FATTR_INO_COLLISION 0x8
505 506
506struct cifs_fattr { 507struct cifs_fattr {
507 u32 cf_flags; 508 u32 cf_flags;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35ec11716213..29b9ea244c81 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque)
715 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) 715 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
716 return 0; 716 return 0;
717 717
718 /*
719 * uh oh -- it's a directory. We can't use it since hardlinked dirs are
720 * verboten. Disable serverino and return it as if it were found, the
721 * caller can discard it, generate a uniqueid and retry the find
722 */
723 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
724 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
725 cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
726 }
727
718 return 1; 728 return 1;
719} 729}
720 730
@@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
734 unsigned long hash; 744 unsigned long hash;
735 struct inode *inode; 745 struct inode *inode;
736 746
747retry_iget5_locked:
737 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); 748 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
738 749
739 /* hash down to 32-bits on 32-bit arch */ 750 /* hash down to 32-bits on 32-bit arch */
740 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); 751 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
741 752
742 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); 753 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
743
744 /* we have fattrs in hand, update the inode */
745 if (inode) { 754 if (inode) {
755 /* was there a problematic inode number collision? */
756 if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
757 iput(inode);
758 fattr->cf_uniqueid = iunique(sb, ROOT_I);
759 fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
760 goto retry_iget5_locked;
761 }
762
746 cifs_fattr_to_inode(inode, fattr); 763 cifs_fattr_to_inode(inode, fattr);
747 if (sb->s_flags & MS_NOATIME) 764 if (sb->s_flags & MS_NOATIME)
748 inode->i_flags |= S_NOATIME | S_NOCMTIME; 765 inode->i_flags |= S_NOATIME | S_NOCMTIME;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index a1695dcadd99..d97f9935a028 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
167 return -EBUSY; 167 return -EBUSY;
168 } 168 }
169 169
170 error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
171 if (error)
172 goto bdi_err;
173
170 vc->vc_sb = sb; 174 vc->vc_sb = sb;
171 175
172 sb->s_fs_info = vc; 176 sb->s_fs_info = vc;
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
175 sb->s_blocksize_bits = 12; 179 sb->s_blocksize_bits = 12;
176 sb->s_magic = CODA_SUPER_MAGIC; 180 sb->s_magic = CODA_SUPER_MAGIC;
177 sb->s_op = &coda_super_operations; 181 sb->s_op = &coda_super_operations;
182 sb->s_bdi = &vc->bdi;
178 183
179 /* get root fid from Venus: this needs the root inode */ 184 /* get root fid from Venus: this needs the root inode */
180 error = venus_rootfid(sb, &fid); 185 error = venus_rootfid(sb, &fid);
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
200 return 0; 205 return 0;
201 206
202 error: 207 error:
208 bdi_destroy(&vc->bdi);
209 bdi_err:
203 if (root) 210 if (root)
204 iput(root); 211 iput(root);
205 if (vc) 212 if (vc)
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
210 217
211static void coda_put_super(struct super_block *sb) 218static void coda_put_super(struct super_block *sb)
212{ 219{
220 bdi_destroy(&coda_vcp(sb)->bdi);
213 coda_vcp(sb)->vc_sb = NULL; 221 coda_vcp(sb)->vc_sb = NULL;
214 sb->s_fs_info = NULL; 222 sb->s_fs_info = NULL;
215 223
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc478..05448730f840 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
1531 if (retval < 0) 1531 if (retval < 0)
1532 goto out; 1532 goto out;
1533 1533
1534 current->stack_start = current->mm->start_stack;
1535
1536 /* execve succeeded */ 1534 /* execve succeeded */
1537 current->fs->in_exec = 0; 1535 current->fs->in_exec = 0;
1538 current->in_execve = 0; 1536 current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c32a1b6a856b..641640dc7ae5 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -102,7 +102,6 @@
102#include <linux/nbd.h> 102#include <linux/nbd.h>
103#include <linux/random.h> 103#include <linux/random.h>
104#include <linux/filter.h> 104#include <linux/filter.h>
105#include <linux/pktcdvd.h>
106 105
107#include <linux/hiddev.h> 106#include <linux/hiddev.h>
108 107
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
1126COMPATIBLE_IOCTL(PPGETPHASE) 1125COMPATIBLE_IOCTL(PPGETPHASE)
1127COMPATIBLE_IOCTL(PPGETFLAGS) 1126COMPATIBLE_IOCTL(PPGETFLAGS)
1128COMPATIBLE_IOCTL(PPSETFLAGS) 1127COMPATIBLE_IOCTL(PPSETFLAGS)
1129/* pktcdvd */
1130COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
1131/* Big A */ 1128/* Big A */
1132/* sparc only */ 1129/* sparc only */
1133/* Big Q for sound/OSS */ 1130/* Big Q for sound/OSS */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e48b52205aa..0b502f80c691 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group)
645 645
646 configfs_detach_group(sd->s_element); 646 configfs_detach_group(sd->s_element);
647 child->d_inode->i_flags |= S_DEAD; 647 child->d_inode->i_flags |= S_DEAD;
648 dont_mount(child);
648 649
649 mutex_unlock(&child->d_inode->i_mutex); 650 mutex_unlock(&child->d_inode->i_mutex);
650 651
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item,
840 mutex_lock(&dentry->d_inode->i_mutex); 841 mutex_lock(&dentry->d_inode->i_mutex);
841 configfs_remove_dir(item); 842 configfs_remove_dir(item);
842 dentry->d_inode->i_flags |= S_DEAD; 843 dentry->d_inode->i_flags |= S_DEAD;
844 dont_mount(dentry);
843 mutex_unlock(&dentry->d_inode->i_mutex); 845 mutex_unlock(&dentry->d_inode->i_mutex);
844 d_delete(dentry); 846 d_delete(dentry);
845 } 847 }
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item,
882 if (ret) { 884 if (ret) {
883 configfs_detach_item(item); 885 configfs_detach_item(item);
884 dentry->d_inode->i_flags |= S_DEAD; 886 dentry->d_inode->i_flags |= S_DEAD;
887 dont_mount(dentry);
885 } 888 }
886 configfs_adjust_dir_dirent_depth_after_populate(sd); 889 configfs_adjust_dir_dirent_depth_after_populate(sd);
887 mutex_unlock(&dentry->d_inode->i_mutex); 890 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1725 mutex_unlock(&configfs_symlink_mutex); 1728 mutex_unlock(&configfs_symlink_mutex);
1726 configfs_detach_group(&group->cg_item); 1729 configfs_detach_group(&group->cg_item);
1727 dentry->d_inode->i_flags |= S_DEAD; 1730 dentry->d_inode->i_flags |= S_DEAD;
1731 dont_mount(dentry);
1728 mutex_unlock(&dentry->d_inode->i_mutex); 1732 mutex_unlock(&dentry->d_inode->i_mutex);
1729 1733
1730 d_delete(dentry); 1734 d_delete(dentry);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index efb2b9400391..1cc087635a5e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -382,8 +382,8 @@ out:
382static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, 382static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
383 struct ecryptfs_crypt_stat *crypt_stat) 383 struct ecryptfs_crypt_stat *crypt_stat)
384{ 384{
385 (*offset) = (crypt_stat->num_header_bytes_at_front 385 (*offset) = ecryptfs_lower_header_size(crypt_stat)
386 + (crypt_stat->extent_size * extent_num)); 386 + (crypt_stat->extent_size * extent_num);
387} 387}
388 388
389/** 389/**
@@ -835,13 +835,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
835 set_extent_mask_and_shift(crypt_stat); 835 set_extent_mask_and_shift(crypt_stat);
836 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; 836 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
837 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 837 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
838 crypt_stat->num_header_bytes_at_front = 0; 838 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
839 else { 839 else {
840 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) 840 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
841 crypt_stat->num_header_bytes_at_front = 841 crypt_stat->metadata_size =
842 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; 842 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
843 else 843 else
844 crypt_stat->num_header_bytes_at_front = PAGE_CACHE_SIZE; 844 crypt_stat->metadata_size = PAGE_CACHE_SIZE;
845 } 845 }
846} 846}
847 847
@@ -1108,9 +1108,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1109} 1109}
1110 1110
1111static void 1111void ecryptfs_write_crypt_stat_flags(char *page_virt,
1112write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, 1112 struct ecryptfs_crypt_stat *crypt_stat,
1113 size_t *written) 1113 size_t *written)
1114{ 1114{
1115 u32 flags = 0; 1115 u32 flags = 0;
1116 int i; 1116 int i;
@@ -1238,8 +1238,7 @@ ecryptfs_write_header_metadata(char *virt,
1238 1238
1239 header_extent_size = (u32)crypt_stat->extent_size; 1239 header_extent_size = (u32)crypt_stat->extent_size;
1240 num_header_extents_at_front = 1240 num_header_extents_at_front =
1241 (u16)(crypt_stat->num_header_bytes_at_front 1241 (u16)(crypt_stat->metadata_size / crypt_stat->extent_size);
1242 / crypt_stat->extent_size);
1243 put_unaligned_be32(header_extent_size, virt); 1242 put_unaligned_be32(header_extent_size, virt);
1244 virt += 4; 1243 virt += 4;
1245 put_unaligned_be16(num_header_extents_at_front, virt); 1244 put_unaligned_be16(num_header_extents_at_front, virt);
@@ -1292,7 +1291,8 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
1292 offset = ECRYPTFS_FILE_SIZE_BYTES; 1291 offset = ECRYPTFS_FILE_SIZE_BYTES;
1293 write_ecryptfs_marker((page_virt + offset), &written); 1292 write_ecryptfs_marker((page_virt + offset), &written);
1294 offset += written; 1293 offset += written;
1295 write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); 1294 ecryptfs_write_crypt_stat_flags((page_virt + offset), crypt_stat,
1295 &written);
1296 offset += written; 1296 offset += written;
1297 ecryptfs_write_header_metadata((page_virt + offset), crypt_stat, 1297 ecryptfs_write_header_metadata((page_virt + offset), crypt_stat,
1298 &written); 1298 &written);
@@ -1382,7 +1382,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1382 rc = -EINVAL; 1382 rc = -EINVAL;
1383 goto out; 1383 goto out;
1384 } 1384 }
1385 virt_len = crypt_stat->num_header_bytes_at_front; 1385 virt_len = crypt_stat->metadata_size;
1386 order = get_order(virt_len); 1386 order = get_order(virt_len);
1387 /* Released in this function */ 1387 /* Released in this function */
1388 virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order); 1388 virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
@@ -1428,16 +1428,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1428 header_extent_size = get_unaligned_be32(virt); 1428 header_extent_size = get_unaligned_be32(virt);
1429 virt += sizeof(__be32); 1429 virt += sizeof(__be32);
1430 num_header_extents_at_front = get_unaligned_be16(virt); 1430 num_header_extents_at_front = get_unaligned_be16(virt);
1431 crypt_stat->num_header_bytes_at_front = 1431 crypt_stat->metadata_size = (((size_t)num_header_extents_at_front
1432 (((size_t)num_header_extents_at_front 1432 * (size_t)header_extent_size));
1433 * (size_t)header_extent_size));
1434 (*bytes_read) = (sizeof(__be32) + sizeof(__be16)); 1433 (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
1435 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) 1434 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1436 && (crypt_stat->num_header_bytes_at_front 1435 && (crypt_stat->metadata_size
1437 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { 1436 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
1438 rc = -EINVAL; 1437 rc = -EINVAL;
1439 printk(KERN_WARNING "Invalid header size: [%zd]\n", 1438 printk(KERN_WARNING "Invalid header size: [%zd]\n",
1440 crypt_stat->num_header_bytes_at_front); 1439 crypt_stat->metadata_size);
1441 } 1440 }
1442 return rc; 1441 return rc;
1443} 1442}
@@ -1452,8 +1451,7 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1452 */ 1451 */
1453static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) 1452static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1454{ 1453{
1455 crypt_stat->num_header_bytes_at_front = 1454 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
1456 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
1457} 1455}
1458 1456
1459/** 1457/**
@@ -1607,6 +1605,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1607 ecryptfs_dentry, 1605 ecryptfs_dentry,
1608 ECRYPTFS_VALIDATE_HEADER_SIZE); 1606 ECRYPTFS_VALIDATE_HEADER_SIZE);
1609 if (rc) { 1607 if (rc) {
1608 memset(page_virt, 0, PAGE_CACHE_SIZE);
1610 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); 1609 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
1611 if (rc) { 1610 if (rc) {
1612 printk(KERN_DEBUG "Valid eCryptfs headers not found in " 1611 printk(KERN_DEBUG "Valid eCryptfs headers not found in "
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 542f625312f3..bfc2e0f78f00 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -35,6 +35,7 @@
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/nsproxy.h> 37#include <linux/nsproxy.h>
38#include <linux/backing-dev.h>
38 39
39/* Version verification for shared data structures w/ userspace */ 40/* Version verification for shared data structures w/ userspace */
40#define ECRYPTFS_VERSION_MAJOR 0x00 41#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -273,7 +274,7 @@ struct ecryptfs_crypt_stat {
273 u32 flags; 274 u32 flags;
274 unsigned int file_version; 275 unsigned int file_version;
275 size_t iv_bytes; 276 size_t iv_bytes;
276 size_t num_header_bytes_at_front; 277 size_t metadata_size;
277 size_t extent_size; /* Data extent size; default is 4096 */ 278 size_t extent_size; /* Data extent size; default is 4096 */
278 size_t key_size; 279 size_t key_size;
279 size_t extent_shift; 280 size_t extent_shift;
@@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat {
393struct ecryptfs_sb_info { 394struct ecryptfs_sb_info {
394 struct super_block *wsi_sb; 395 struct super_block *wsi_sb;
395 struct ecryptfs_mount_crypt_stat mount_crypt_stat; 396 struct ecryptfs_mount_crypt_stat mount_crypt_stat;
397 struct backing_dev_info bdi;
396}; 398};
397 399
398/* file private data. */ 400/* file private data. */
@@ -464,6 +466,14 @@ struct ecryptfs_daemon {
464 466
465extern struct mutex ecryptfs_daemon_hash_mux; 467extern struct mutex ecryptfs_daemon_hash_mux;
466 468
469static inline size_t
470ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat)
471{
472 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
473 return 0;
474 return crypt_stat->metadata_size;
475}
476
467static inline struct ecryptfs_file_info * 477static inline struct ecryptfs_file_info *
468ecryptfs_file_to_private(struct file *file) 478ecryptfs_file_to_private(struct file *file)
469{ 479{
@@ -651,6 +661,9 @@ int ecryptfs_decrypt_page(struct page *page);
651int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); 661int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
652int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); 662int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
653int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); 663int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
664void ecryptfs_write_crypt_stat_flags(char *page_virt,
665 struct ecryptfs_crypt_stat *crypt_stat,
666 size_t *written);
654int ecryptfs_read_and_validate_header_region(char *data, 667int ecryptfs_read_and_validate_header_region(char *data,
655 struct inode *ecryptfs_inode); 668 struct inode *ecryptfs_inode);
656int ecryptfs_read_and_validate_xattr_region(char *page_virt, 669int ecryptfs_read_and_validate_xattr_region(char *page_virt,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index d3362faf3852..e2d4418affac 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -324,6 +324,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
324 rc = ecryptfs_read_and_validate_header_region(page_virt, 324 rc = ecryptfs_read_and_validate_header_region(page_virt,
325 ecryptfs_dentry->d_inode); 325 ecryptfs_dentry->d_inode);
326 if (rc) { 326 if (rc) {
327 memset(page_virt, 0, PAGE_CACHE_SIZE);
327 rc = ecryptfs_read_and_validate_xattr_region(page_virt, 328 rc = ecryptfs_read_and_validate_xattr_region(page_virt,
328 ecryptfs_dentry); 329 ecryptfs_dentry);
329 if (rc) { 330 if (rc) {
@@ -336,7 +337,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
336 ecryptfs_dentry->d_sb)->mount_crypt_stat; 337 ecryptfs_dentry->d_sb)->mount_crypt_stat;
337 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { 338 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
338 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 339 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
339 file_size = (crypt_stat->num_header_bytes_at_front 340 file_size = (crypt_stat->metadata_size
340 + i_size_read(lower_dentry->d_inode)); 341 + i_size_read(lower_dentry->d_inode));
341 else 342 else
342 file_size = i_size_read(lower_dentry->d_inode); 343 file_size = i_size_read(lower_dentry->d_inode);
@@ -388,9 +389,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
388 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); 389 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
389 if (IS_ERR(lower_dentry)) { 390 if (IS_ERR(lower_dentry)) {
390 rc = PTR_ERR(lower_dentry); 391 rc = PTR_ERR(lower_dentry);
391 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 392 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
392 "lower_dentry = [%s]\n", __func__, rc, 393 "[%d] on lower_dentry = [%s]\n", __func__, rc,
393 ecryptfs_dentry->d_name.name); 394 encrypted_and_encoded_name);
394 goto out_d_drop; 395 goto out_d_drop;
395 } 396 }
396 if (lower_dentry->d_inode) 397 if (lower_dentry->d_inode)
@@ -417,9 +418,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
417 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); 418 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
418 if (IS_ERR(lower_dentry)) { 419 if (IS_ERR(lower_dentry)) {
419 rc = PTR_ERR(lower_dentry); 420 rc = PTR_ERR(lower_dentry);
420 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 421 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
421 "lower_dentry = [%s]\n", __func__, rc, 422 "[%d] on lower_dentry = [%s]\n", __func__, rc,
422 encrypted_and_encoded_name); 423 encrypted_and_encoded_name);
423 goto out_d_drop; 424 goto out_d_drop;
424 } 425 }
425lookup_and_interpose: 426lookup_and_interpose:
@@ -456,8 +457,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
456 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); 457 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
457 if (rc) 458 if (rc)
458 goto out_lock; 459 goto out_lock;
459 fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); 460 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
460 fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); 461 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
461 old_dentry->d_inode->i_nlink = 462 old_dentry->d_inode->i_nlink =
462 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; 463 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
463 i_size_write(new_dentry->d_inode, file_size_save); 464 i_size_write(new_dentry->d_inode, file_size_save);
@@ -648,38 +649,17 @@ out_lock:
648 return rc; 649 return rc;
649} 650}
650 651
651static int 652static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf,
652ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) 653 size_t *bufsiz)
653{ 654{
655 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
654 char *lower_buf; 656 char *lower_buf;
655 size_t lower_bufsiz; 657 size_t lower_bufsiz = PATH_MAX;
656 struct dentry *lower_dentry;
657 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
658 char *plaintext_name;
659 size_t plaintext_name_size;
660 mm_segment_t old_fs; 658 mm_segment_t old_fs;
661 int rc; 659 int rc;
662 660
663 lower_dentry = ecryptfs_dentry_to_lower(dentry);
664 if (!lower_dentry->d_inode->i_op->readlink) {
665 rc = -EINVAL;
666 goto out;
667 }
668 mount_crypt_stat = &ecryptfs_superblock_to_private(
669 dentry->d_sb)->mount_crypt_stat;
670 /*
671 * If the lower filename is encrypted, it will result in a significantly
672 * longer name. If needed, truncate the name after decode and decrypt.
673 */
674 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
675 lower_bufsiz = PATH_MAX;
676 else
677 lower_bufsiz = bufsiz;
678 /* Released in this function */
679 lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); 661 lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
680 if (lower_buf == NULL) { 662 if (!lower_buf) {
681 printk(KERN_ERR "%s: Out of memory whilst attempting to "
682 "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
683 rc = -ENOMEM; 663 rc = -ENOMEM;
684 goto out; 664 goto out;
685 } 665 }
@@ -689,29 +669,31 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
689 (char __user *)lower_buf, 669 (char __user *)lower_buf,
690 lower_bufsiz); 670 lower_bufsiz);
691 set_fs(old_fs); 671 set_fs(old_fs);
692 if (rc >= 0) { 672 if (rc < 0)
693 rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, 673 goto out;
694 &plaintext_name_size, 674 lower_bufsiz = rc;
695 dentry, lower_buf, 675 rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry,
696 rc); 676 lower_buf, lower_bufsiz);
697 if (rc) { 677out:
698 printk(KERN_ERR "%s: Error attempting to decode and "
699 "decrypt filename; rc = [%d]\n", __func__,
700 rc);
701 goto out_free_lower_buf;
702 }
703 /* Check for bufsiz <= 0 done in sys_readlinkat() */
704 rc = copy_to_user(buf, plaintext_name,
705 min((size_t) bufsiz, plaintext_name_size));
706 if (rc)
707 rc = -EFAULT;
708 else
709 rc = plaintext_name_size;
710 kfree(plaintext_name);
711 fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
712 }
713out_free_lower_buf:
714 kfree(lower_buf); 678 kfree(lower_buf);
679 return rc;
680}
681
682static int
683ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
684{
685 char *kbuf;
686 size_t kbufsiz, copied;
687 int rc;
688
689 rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz);
690 if (rc)
691 goto out;
692 copied = min_t(size_t, bufsiz, kbufsiz);
693 rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied;
694 kfree(kbuf);
695 fsstack_copy_attr_atime(dentry->d_inode,
696 ecryptfs_dentry_to_lower(dentry)->d_inode);
715out: 697out:
716 return rc; 698 return rc;
717} 699}
@@ -769,7 +751,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
769{ 751{
770 loff_t lower_size; 752 loff_t lower_size;
771 753
772 lower_size = crypt_stat->num_header_bytes_at_front; 754 lower_size = ecryptfs_lower_header_size(crypt_stat);
773 if (upper_size != 0) { 755 if (upper_size != 0) {
774 loff_t num_extents; 756 loff_t num_extents;
775 757
@@ -1016,6 +998,28 @@ out:
1016 return rc; 998 return rc;
1017} 999}
1018 1000
1001int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
1002 struct kstat *stat)
1003{
1004 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
1005 int rc = 0;
1006
1007 mount_crypt_stat = &ecryptfs_superblock_to_private(
1008 dentry->d_sb)->mount_crypt_stat;
1009 generic_fillattr(dentry->d_inode, stat);
1010 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
1011 char *target;
1012 size_t targetsiz;
1013
1014 rc = ecryptfs_readlink_lower(dentry, &target, &targetsiz);
1015 if (!rc) {
1016 kfree(target);
1017 stat->size = targetsiz;
1018 }
1019 }
1020 return rc;
1021}
1022
1019int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1023int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1020 struct kstat *stat) 1024 struct kstat *stat)
1021{ 1025{
@@ -1040,7 +1044,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1040 1044
1041 lower_dentry = ecryptfs_dentry_to_lower(dentry); 1045 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1042 if (!lower_dentry->d_inode->i_op->setxattr) { 1046 if (!lower_dentry->d_inode->i_op->setxattr) {
1043 rc = -ENOSYS; 1047 rc = -EOPNOTSUPP;
1044 goto out; 1048 goto out;
1045 } 1049 }
1046 mutex_lock(&lower_dentry->d_inode->i_mutex); 1050 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1058,7 +1062,7 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
1058 int rc = 0; 1062 int rc = 0;
1059 1063
1060 if (!lower_dentry->d_inode->i_op->getxattr) { 1064 if (!lower_dentry->d_inode->i_op->getxattr) {
1061 rc = -ENOSYS; 1065 rc = -EOPNOTSUPP;
1062 goto out; 1066 goto out;
1063 } 1067 }
1064 mutex_lock(&lower_dentry->d_inode->i_mutex); 1068 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1085,7 +1089,7 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
1085 1089
1086 lower_dentry = ecryptfs_dentry_to_lower(dentry); 1090 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1087 if (!lower_dentry->d_inode->i_op->listxattr) { 1091 if (!lower_dentry->d_inode->i_op->listxattr) {
1088 rc = -ENOSYS; 1092 rc = -EOPNOTSUPP;
1089 goto out; 1093 goto out;
1090 } 1094 }
1091 mutex_lock(&lower_dentry->d_inode->i_mutex); 1095 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1102,7 +1106,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
1102 1106
1103 lower_dentry = ecryptfs_dentry_to_lower(dentry); 1107 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1104 if (!lower_dentry->d_inode->i_op->removexattr) { 1108 if (!lower_dentry->d_inode->i_op->removexattr) {
1105 rc = -ENOSYS; 1109 rc = -EOPNOTSUPP;
1106 goto out; 1110 goto out;
1107 } 1111 }
1108 mutex_lock(&lower_dentry->d_inode->i_mutex); 1112 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1133,6 +1137,7 @@ const struct inode_operations ecryptfs_symlink_iops = {
1133 .put_link = ecryptfs_put_link, 1137 .put_link = ecryptfs_put_link,
1134 .permission = ecryptfs_permission, 1138 .permission = ecryptfs_permission,
1135 .setattr = ecryptfs_setattr, 1139 .setattr = ecryptfs_setattr,
1140 .getattr = ecryptfs_getattr_link,
1136 .setxattr = ecryptfs_setxattr, 1141 .setxattr = ecryptfs_setxattr,
1137 .getxattr = ecryptfs_getxattr, 1142 .getxattr = ecryptfs_getxattr,
1138 .listxattr = ecryptfs_listxattr, 1143 .listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index af1a8f01ebac..760983d0f25e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache;
497static int 497static int
498ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) 498ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
499{ 499{
500 struct ecryptfs_sb_info *esi;
500 int rc = 0; 501 int rc = 0;
501 502
502 /* Released in ecryptfs_put_super() */ 503 /* Released in ecryptfs_put_super() */
503 ecryptfs_set_superblock_private(sb, 504 ecryptfs_set_superblock_private(sb,
504 kmem_cache_zalloc(ecryptfs_sb_info_cache, 505 kmem_cache_zalloc(ecryptfs_sb_info_cache,
505 GFP_KERNEL)); 506 GFP_KERNEL));
506 if (!ecryptfs_superblock_to_private(sb)) { 507 esi = ecryptfs_superblock_to_private(sb);
508 if (!esi) {
507 ecryptfs_printk(KERN_WARNING, "Out of memory\n"); 509 ecryptfs_printk(KERN_WARNING, "Out of memory\n");
508 rc = -ENOMEM; 510 rc = -ENOMEM;
509 goto out; 511 goto out;
510 } 512 }
513
514 rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
515 if (rc)
516 goto out;
517
518 sb->s_bdi = &esi->bdi;
511 sb->s_op = &ecryptfs_sops; 519 sb->s_op = &ecryptfs_sops;
512 /* Released through deactivate_super(sb) from get_sb_nodev */ 520 /* Released through deactivate_super(sb) from get_sb_nodev */
513 sb->s_root = d_alloc(NULL, &(const struct qstr) { 521 sb->s_root = d_alloc(NULL, &(const struct qstr) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index d491237c98e7..2ee9a3a7b68c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -83,6 +83,19 @@ out:
83 return rc; 83 return rc;
84} 84}
85 85
86static void strip_xattr_flag(char *page_virt,
87 struct ecryptfs_crypt_stat *crypt_stat)
88{
89 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
90 size_t written;
91
92 crypt_stat->flags &= ~ECRYPTFS_METADATA_IN_XATTR;
93 ecryptfs_write_crypt_stat_flags(page_virt, crypt_stat,
94 &written);
95 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
96 }
97}
98
86/** 99/**
87 * Header Extent: 100 * Header Extent:
88 * Octets 0-7: Unencrypted file size (big-endian) 101 * Octets 0-7: Unencrypted file size (big-endian)
@@ -98,19 +111,6 @@ out:
98 * (big-endian) 111 * (big-endian)
99 * Octet 26: Begin RFC 2440 authentication token packet set 112 * Octet 26: Begin RFC 2440 authentication token packet set
100 */ 113 */
101static void set_header_info(char *page_virt,
102 struct ecryptfs_crypt_stat *crypt_stat)
103{
104 size_t written;
105 size_t save_num_header_bytes_at_front =
106 crypt_stat->num_header_bytes_at_front;
107
108 crypt_stat->num_header_bytes_at_front =
109 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
110 ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written);
111 crypt_stat->num_header_bytes_at_front =
112 save_num_header_bytes_at_front;
113}
114 114
115/** 115/**
116 * ecryptfs_copy_up_encrypted_with_header 116 * ecryptfs_copy_up_encrypted_with_header
@@ -136,8 +136,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
136 * num_extents_per_page) 136 * num_extents_per_page)
137 + extent_num_in_page); 137 + extent_num_in_page);
138 size_t num_header_extents_at_front = 138 size_t num_header_extents_at_front =
139 (crypt_stat->num_header_bytes_at_front 139 (crypt_stat->metadata_size / crypt_stat->extent_size);
140 / crypt_stat->extent_size);
141 140
142 if (view_extent_num < num_header_extents_at_front) { 141 if (view_extent_num < num_header_extents_at_front) {
143 /* This is a header extent */ 142 /* This is a header extent */
@@ -147,9 +146,14 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
147 memset(page_virt, 0, PAGE_CACHE_SIZE); 146 memset(page_virt, 0, PAGE_CACHE_SIZE);
148 /* TODO: Support more than one header extent */ 147 /* TODO: Support more than one header extent */
149 if (view_extent_num == 0) { 148 if (view_extent_num == 0) {
149 size_t written;
150
150 rc = ecryptfs_read_xattr_region( 151 rc = ecryptfs_read_xattr_region(
151 page_virt, page->mapping->host); 152 page_virt, page->mapping->host);
152 set_header_info(page_virt, crypt_stat); 153 strip_xattr_flag(page_virt + 16, crypt_stat);
154 ecryptfs_write_header_metadata(page_virt + 20,
155 crypt_stat,
156 &written);
153 } 157 }
154 kunmap_atomic(page_virt, KM_USER0); 158 kunmap_atomic(page_virt, KM_USER0);
155 flush_dcache_page(page); 159 flush_dcache_page(page);
@@ -162,7 +166,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
162 /* This is an encrypted data extent */ 166 /* This is an encrypted data extent */
163 loff_t lower_offset = 167 loff_t lower_offset =
164 ((view_extent_num * crypt_stat->extent_size) 168 ((view_extent_num * crypt_stat->extent_size)
165 - crypt_stat->num_header_bytes_at_front); 169 - crypt_stat->metadata_size);
166 170
167 rc = ecryptfs_read_lower_page_segment( 171 rc = ecryptfs_read_lower_page_segment(
168 page, (lower_offset >> PAGE_CACHE_SHIFT), 172 page, (lower_offset >> PAGE_CACHE_SHIFT),
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index fcef41c1d2cf..0c0ae491d231 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -86,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
86 if (lower_dentry->d_inode) { 86 if (lower_dentry->d_inode) {
87 fput(inode_info->lower_file); 87 fput(inode_info->lower_file);
88 inode_info->lower_file = NULL; 88 inode_info->lower_file = NULL;
89 d_drop(lower_dentry);
90 } 89 }
91 } 90 }
92 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 91 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
@@ -123,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb)
123 lock_kernel(); 122 lock_kernel();
124 123
125 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); 124 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
125 bdi_destroy(&sb_info->bdi);
126 kmem_cache_free(ecryptfs_sb_info_cache, sb_info); 126 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
127 ecryptfs_set_superblock_private(sb, NULL); 127 ecryptfs_set_superblock_private(sb, NULL);
128 128
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b9..e6e94c626c2c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
1387 if (retval < 0) 1387 if (retval < 0)
1388 goto out; 1388 goto out;
1389 1389
1390 current->stack_start = current->mm->start_stack;
1391
1392 /* execve succeeded */ 1390 /* execve succeeded */
1393 current->fs->in_exec = 0; 1391 current->fs->in_exec = 0;
1394 current->in_execve = 0; 1392 current->in_execve = 0;
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 8442e353309f..22721b2fd890 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -35,6 +35,7 @@
35 35
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/backing-dev.h>
38#include "common.h" 39#include "common.h"
39 40
40/* FIXME: Remove once pnfs hits mainline 41/* FIXME: Remove once pnfs hits mainline
@@ -84,6 +85,7 @@ struct exofs_sb_info {
84 u32 s_next_generation; /* next gen # to use */ 85 u32 s_next_generation; /* next gen # to use */
85 atomic_t s_curr_pending; /* number of pending commands */ 86 atomic_t s_curr_pending; /* number of pending commands */
86 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ 87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
88 struct backing_dev_info bdi; /* register our bdi with VFS */
87 89
88 struct pnfs_osd_data_map data_map; /* Default raid to use 90 struct pnfs_osd_data_map data_map; /* Default raid to use
89 * FIXME: Needed ? 91 * FIXME: Needed ?
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 18e57ea1e5b4..03149b9a5178 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb)
302 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], 302 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
303 sbi->layout.s_pid); 303 sbi->layout.s_pid);
304 304
305 bdi_destroy(&sbi->bdi);
305 exofs_free_sbi(sbi); 306 exofs_free_sbi(sbi);
306 sb->s_fs_info = NULL; 307 sb->s_fs_info = NULL;
307} 308}
@@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
546 if (!sbi) 547 if (!sbi)
547 return -ENOMEM; 548 return -ENOMEM;
548 549
550 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
551 if (ret)
552 goto free_bdi;
553
549 /* use mount options to fill superblock */ 554 /* use mount options to fill superblock */
550 od = osduld_path_lookup(opts->dev_name); 555 od = osduld_path_lookup(opts->dev_name);
551 if (IS_ERR(od)) { 556 if (IS_ERR(od)) {
@@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
612 } 617 }
613 618
614 /* set up operation vectors */ 619 /* set up operation vectors */
620 sb->s_bdi = &sbi->bdi;
615 sb->s_fs_info = sbi; 621 sb->s_fs_info = sbi;
616 sb->s_op = &exofs_sops; 622 sb->s_op = &exofs_sops;
617 sb->s_export_op = &exofs_export_ops; 623 sb->s_export_op = &exofs_export_ops;
@@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
643 return 0; 649 return 0;
644 650
645free_sbi: 651free_sbi:
652 bdi_destroy(&sbi->bdi);
653free_bdi:
646 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 654 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
647 opts->dev_name, sbi->layout.s_pid, ret); 655 opts->dev_name, sbi->layout.s_pid, ret);
648 exofs_free_sbi(sbi); 656 exofs_free_sbi(sbi);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 94c8ee81f5e1..236b834b4ca8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
3879 physical += offset; 3879 physical += offset;
3880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset; 3880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3881 flags |= FIEMAP_EXTENT_DATA_INLINE; 3881 flags |= FIEMAP_EXTENT_DATA_INLINE;
3882 brelse(iloc.bh);
3882 } else { /* external block */ 3883 } else { /* external block */
3883 physical = EXT4_I(inode)->i_file_acl << blockbits; 3884 physical = EXT4_I(inode)->i_file_acl << blockbits;
3884 length = inode->i_sb->s_blocksize; 3885 length = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5381802d6052..81d605412844 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5375,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5375 } else { 5375 } else {
5376 struct ext4_iloc iloc; 5376 struct ext4_iloc iloc;
5377 5377
5378 err = ext4_get_inode_loc(inode, &iloc); 5378 err = __ext4_get_inode_loc(inode, &iloc, 0);
5379 if (err) 5379 if (err)
5380 return err; 5380 return err;
5381 if (wbc->sync_mode == WB_SYNC_ALL) 5381 if (wbc->sync_mode == WB_SYNC_ALL)
@@ -5386,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5386 (unsigned long long)iloc.bh->b_blocknr); 5386 (unsigned long long)iloc.bh->b_blocknr);
5387 err = -EIO; 5387 err = -EIO;
5388 } 5388 }
5389 brelse(iloc.bh);
5389 } 5390 }
5390 return err; 5391 return err;
5391} 5392}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bde9d0b170c2..b423a364dca3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2535,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2535 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2535 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2536 entry->count, entry->group, entry); 2536 entry->count, entry->group, entry);
2537 2537
2538 if (test_opt(sb, DISCARD)) {
2539 ext4_fsblk_t discard_block;
2540
2541 discard_block = entry->start_blk +
2542 ext4_group_first_block_no(sb, entry->group);
2543 trace_ext4_discard_blocks(sb,
2544 (unsigned long long)discard_block,
2545 entry->count);
2546 sb_issue_discard(sb, discard_block, entry->count);
2547 }
2548
2538 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2549 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2539 /* we expect to find existing buddy because it's pinned */ 2550 /* we expect to find existing buddy because it's pinned */
2540 BUG_ON(err != 0); 2551 BUG_ON(err != 0);
@@ -2556,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2556 page_cache_release(e4b.bd_bitmap_page); 2567 page_cache_release(e4b.bd_bitmap_page);
2557 } 2568 }
2558 ext4_unlock_group(sb, entry->group); 2569 ext4_unlock_group(sb, entry->group);
2559 if (test_opt(sb, DISCARD)) {
2560 ext4_fsblk_t discard_block;
2561
2562 discard_block = entry->start_blk +
2563 ext4_group_first_block_no(sb, entry->group);
2564 trace_ext4_discard_blocks(sb,
2565 (unsigned long long)discard_block,
2566 entry->count);
2567 sb_issue_discard(sb, discard_block, entry->count);
2568 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2570 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2571 ext4_mb_release_desc(&e4b);
2571 } 2572 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c751106c2e5..7faefb4da939 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
228 228
229#ifdef CONFIG_BLOCK 229#ifdef CONFIG_BLOCK
230 230
231#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) 231static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
232#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); 232{
233 return (offset >> inode->i_blkbits);
234}
235
236static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
237{
238 return (blk << inode->i_blkbits);
239}
233 240
234/** 241/**
235 * __generic_block_fiemap - FIEMAP for block based inodes (no locking) 242 * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
236 * @inode - the inode to map 243 * @inode: the inode to map
237 * @arg - the pointer to userspace where we copy everything to 244 * @fieinfo: the fiemap info struct that will be passed back to userspace
238 * @get_block - the fs's get_block function 245 * @start: where to start mapping in the inode
246 * @len: how much space to map
247 * @get_block: the fs's get_block function
239 * 248 *
240 * This does FIEMAP for block based inodes. Basically it will just loop 249 * This does FIEMAP for block based inodes. Basically it will just loop
241 * through get_block until we hit the number of extents we want to map, or we 250 * through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
250 */ 259 */
251 260
252int __generic_block_fiemap(struct inode *inode, 261int __generic_block_fiemap(struct inode *inode,
253 struct fiemap_extent_info *fieinfo, u64 start, 262 struct fiemap_extent_info *fieinfo, loff_t start,
254 u64 len, get_block_t *get_block) 263 loff_t len, get_block_t *get_block)
255{ 264{
256 struct buffer_head tmp; 265 struct buffer_head map_bh;
257 unsigned long long start_blk; 266 sector_t start_blk, last_blk;
258 long long length = 0, map_len = 0; 267 loff_t isize = i_size_read(inode);
259 u64 logical = 0, phys = 0, size = 0; 268 u64 logical = 0, phys = 0, size = 0;
260 u32 flags = FIEMAP_EXTENT_MERGED; 269 u32 flags = FIEMAP_EXTENT_MERGED;
261 int ret = 0, past_eof = 0, whole_file = 0; 270 bool past_eof = false, whole_file = false;
271 int ret = 0;
262 272
263 if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) 273 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
274 if (ret)
264 return ret; 275 return ret;
265 276
266 start_blk = logical_to_blk(inode, start); 277 /*
267 278 * Either the i_mutex or other appropriate locking needs to be held
268 length = (long long)min_t(u64, len, i_size_read(inode)); 279 * since we expect isize to not change at all through the duration of
269 if (length < len) 280 * this call.
270 whole_file = 1; 281 */
282 if (len >= isize) {
283 whole_file = true;
284 len = isize;
285 }
271 286
272 map_len = length; 287 start_blk = logical_to_blk(inode, start);
288 last_blk = logical_to_blk(inode, start + len - 1);
273 289
274 do { 290 do {
275 /* 291 /*
276 * we set b_size to the total size we want so it will map as 292 * we set b_size to the total size we want so it will map as
277 * many contiguous blocks as possible at once 293 * many contiguous blocks as possible at once
278 */ 294 */
279 memset(&tmp, 0, sizeof(struct buffer_head)); 295 memset(&map_bh, 0, sizeof(struct buffer_head));
280 tmp.b_size = map_len; 296 map_bh.b_size = len;
281 297
282 ret = get_block(inode, start_blk, &tmp, 0); 298 ret = get_block(inode, start_blk, &map_bh, 0);
283 if (ret) 299 if (ret)
284 break; 300 break;
285 301
286 /* HOLE */ 302 /* HOLE */
287 if (!buffer_mapped(&tmp)) { 303 if (!buffer_mapped(&map_bh)) {
288 length -= blk_to_logical(inode, 1);
289 start_blk++; 304 start_blk++;
290 305
291 /* 306 /*
292 * we want to handle the case where there is an 307 * We want to handle the case where there is an
293 * allocated block at the front of the file, and then 308 * allocated block at the front of the file, and then
294 * nothing but holes up to the end of the file properly, 309 * nothing but holes up to the end of the file properly,
295 * to make sure that extent at the front gets properly 310 * to make sure that extent at the front gets properly
296 * marked with FIEMAP_EXTENT_LAST 311 * marked with FIEMAP_EXTENT_LAST
297 */ 312 */
298 if (!past_eof && 313 if (!past_eof &&
299 blk_to_logical(inode, start_blk) >= 314 blk_to_logical(inode, start_blk) >= isize)
300 blk_to_logical(inode, 0)+i_size_read(inode))
301 past_eof = 1; 315 past_eof = 1;
302 316
303 /* 317 /*
304 * first hole after going past the EOF, this is our 318 * First hole after going past the EOF, this is our
305 * last extent 319 * last extent
306 */ 320 */
307 if (past_eof && size) { 321 if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
309 ret = fiemap_fill_next_extent(fieinfo, logical, 323 ret = fiemap_fill_next_extent(fieinfo, logical,
310 phys, size, 324 phys, size,
311 flags); 325 flags);
312 break; 326 } else if (size) {
327 ret = fiemap_fill_next_extent(fieinfo, logical,
328 phys, size, flags);
329 size = 0;
313 } 330 }
314 331
315 /* if we have holes up to/past EOF then we're done */ 332 /* if we have holes up to/past EOF then we're done */
316 if (length <= 0 || past_eof) 333 if (start_blk > last_blk || past_eof || ret)
317 break; 334 break;
318 } else { 335 } else {
319 /* 336 /*
320 * we have gone over the length of what we wanted to 337 * We have gone over the length of what we wanted to
321 * map, and it wasn't the entire file, so add the extent 338 * map, and it wasn't the entire file, so add the extent
322 * we got last time and exit. 339 * we got last time and exit.
323 * 340 *
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
331 * are good to go, just add the extent to the fieinfo 348 * are good to go, just add the extent to the fieinfo
332 * and break 349 * and break
333 */ 350 */
334 if (length <= 0 && !whole_file) { 351 if (start_blk > last_blk && !whole_file) {
335 ret = fiemap_fill_next_extent(fieinfo, logical, 352 ret = fiemap_fill_next_extent(fieinfo, logical,
336 phys, size, 353 phys, size,
337 flags); 354 flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
351 } 368 }
352 369
353 logical = blk_to_logical(inode, start_blk); 370 logical = blk_to_logical(inode, start_blk);
354 phys = blk_to_logical(inode, tmp.b_blocknr); 371 phys = blk_to_logical(inode, map_bh.b_blocknr);
355 size = tmp.b_size; 372 size = map_bh.b_size;
356 flags = FIEMAP_EXTENT_MERGED; 373 flags = FIEMAP_EXTENT_MERGED;
357 374
358 length -= tmp.b_size;
359 start_blk += logical_to_blk(inode, size); 375 start_blk += logical_to_blk(inode, size);
360 376
361 /* 377 /*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
363 * soon as we find a hole that the last extent we found 379 * soon as we find a hole that the last extent we found
364 * is marked with FIEMAP_EXTENT_LAST 380 * is marked with FIEMAP_EXTENT_LAST
365 */ 381 */
366 if (!past_eof && 382 if (!past_eof && logical + size >= isize)
367 logical+size >= 383 past_eof = true;
368 blk_to_logical(inode, 0)+i_size_read(inode))
369 past_eof = 1;
370 } 384 }
371 cond_resched(); 385 cond_resched();
372 } while (1); 386 } while (1);
373 387
374 /* if ret is 1 then we just hit the end of the extent array */ 388 /* If ret is 1 then we just hit the end of the extent array */
375 if (ret == 1) 389 if (ret == 1)
376 ret = 0; 390 ret = 0;
377 391
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9dd126276c9f..ed9ba6fe04f5 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
61 inode->i_op = &page_symlink_inode_operations; 61 inode->i_op = &page_symlink_inode_operations;
62 inode->i_mapping->a_ops = &jfs_aops; 62 inode->i_mapping->a_ops = &jfs_aops;
63 } else { 63 } else {
64 inode->i_op = &jfs_symlink_inode_operations; 64 inode->i_op = &jfs_fast_symlink_inode_operations;
65 /* 65 /*
66 * The inline data should be null-terminated, but 66 * The inline data should be null-terminated, but
67 * don't let on-disk corruption crash the kernel 67 * don't let on-disk corruption crash the kernel
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 6c4dfcbf3f55..9e2f6a721668 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -196,7 +196,7 @@ int dbMount(struct inode *ipbmap)
196 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); 196 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
197 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); 197 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
198 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); 198 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
199 bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); 199 bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
200 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); 200 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
201 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); 201 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
202 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); 202 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
@@ -288,7 +288,7 @@ int dbSync(struct inode *ipbmap)
288 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); 288 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
289 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); 289 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
290 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); 290 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
291 dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); 291 dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
292 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); 292 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
293 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); 293 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
294 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); 294 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
@@ -1441,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1441 * tree index of this allocation group within the control page. 1441 * tree index of this allocation group within the control page.
1442 */ 1442 */
1443 agperlev = 1443 agperlev =
1444 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; 1444 (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
1445 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); 1445 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1446 1446
1447 /* dmap control page trees fan-out by 4 and a single allocation 1447 /* dmap control page trees fan-out by 4 and a single allocation
@@ -1460,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1460 * the subtree to find the leftmost leaf that describes this 1460 * the subtree to find the leftmost leaf that describes this
1461 * free space. 1461 * free space.
1462 */ 1462 */
1463 for (k = bmp->db_agheigth; k > 0; k--) { 1463 for (k = bmp->db_agheight; k > 0; k--) {
1464 for (n = 0, m = (ti << 2) + 1; n < 4; n++) { 1464 for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
1465 if (l2nb <= dcp->stree[m + n]) { 1465 if (l2nb <= dcp->stree[m + n]) {
1466 ti = m + n; 1466 ti = m + n;
@@ -3607,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
3607 } 3607 }
3608 3608
3609 /* 3609 /*
3610 * compute db_aglevel, db_agheigth, db_width, db_agstart: 3610 * compute db_aglevel, db_agheight, db_width, db_agstart:
3611 * an ag is covered in aglevel dmapctl summary tree, 3611 * an ag is covered in aglevel dmapctl summary tree,
3612 * at agheight level height (from leaf) with agwidth number of nodes 3612 * at agheight level height (from leaf) with agwidth number of nodes
3613 * each, which starts at agstart index node of the smmary tree node 3613 * each, which starts at agstart index node of the smmary tree node
@@ -3616,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap)
3616 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); 3616 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
3617 l2nl = 3617 l2nl =
3618 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); 3618 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
3619 bmp->db_agheigth = l2nl >> 1; 3619 bmp->db_agheight = l2nl >> 1;
3620 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); 3620 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
3621 for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; 3621 for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
3622 i--) { 3622 i--) {
3623 bmp->db_agstart += n; 3623 bmp->db_agstart += n;
3624 n <<= 2; 3624 n <<= 2;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 1a6eb41569bc..6dcb906c55d8 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -210,7 +210,7 @@ struct dbmap_disk {
210 __le32 dn_maxag; /* 4: max active alloc group number */ 210 __le32 dn_maxag; /* 4: max active alloc group number */
211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */ 211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */
212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ 212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */
213 __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ 213 __le32 dn_agheight; /* 4: height in dmapctl of the AG */
214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ 214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */
215 __le32 dn_agstart; /* 4: start tree index at AG height */ 215 __le32 dn_agstart; /* 4: start tree index at AG height */
216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ 216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
@@ -229,7 +229,7 @@ struct dbmap {
229 int dn_maxag; /* max active alloc group number */ 229 int dn_maxag; /* max active alloc group number */
230 int dn_agpref; /* preferred alloc group (hint) */ 230 int dn_agpref; /* preferred alloc group (hint) */
231 int dn_aglevel; /* dmapctl level holding the AG */ 231 int dn_aglevel; /* dmapctl level holding the AG */
232 int dn_agheigth; /* height in dmapctl of the AG */ 232 int dn_agheight; /* height in dmapctl of the AG */
233 int dn_agwidth; /* width in dmapctl of the AG */ 233 int dn_agwidth; /* width in dmapctl of the AG */
234 int dn_agstart; /* start tree index at AG height */ 234 int dn_agstart; /* start tree index at AG height */
235 int dn_agl2size; /* l2 num of blks per alloc group */ 235 int dn_agl2size; /* l2 num of blks per alloc group */
@@ -255,7 +255,7 @@ struct bmap {
255#define db_agsize db_bmap.dn_agsize 255#define db_agsize db_bmap.dn_agsize
256#define db_agl2size db_bmap.dn_agl2size 256#define db_agl2size db_bmap.dn_agl2size
257#define db_agwidth db_bmap.dn_agwidth 257#define db_agwidth db_bmap.dn_agwidth
258#define db_agheigth db_bmap.dn_agheigth 258#define db_agheight db_bmap.dn_agheight
259#define db_agstart db_bmap.dn_agstart 259#define db_agstart db_bmap.dn_agstart
260#define db_numag db_bmap.dn_numag 260#define db_numag db_bmap.dn_numag
261#define db_maxlevel db_bmap.dn_maxlevel 261#define db_maxlevel db_bmap.dn_maxlevel
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 79e2c79661df..9e6bda30a6e8 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations;
48extern const struct inode_operations jfs_file_inode_operations; 48extern const struct inode_operations jfs_file_inode_operations;
49extern const struct file_operations jfs_file_operations; 49extern const struct file_operations jfs_file_operations;
50extern const struct inode_operations jfs_symlink_inode_operations; 50extern const struct inode_operations jfs_symlink_inode_operations;
51extern const struct inode_operations jfs_fast_symlink_inode_operations;
51extern const struct dentry_operations jfs_ci_dentry_operations; 52extern const struct dentry_operations jfs_ci_dentry_operations;
52#endif /* _H_JFS_INODE */ 53#endif /* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 4a3e9f39c21d..a9cf8e8675be 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
956 */ 956 */
957 957
958 if (ssize <= IDATASIZE) { 958 if (ssize <= IDATASIZE) {
959 ip->i_op = &jfs_symlink_inode_operations; 959 ip->i_op = &jfs_fast_symlink_inode_operations;
960 960
961 i_fastsymlink = JFS_IP(ip)->i_inline; 961 i_fastsymlink = JFS_IP(ip)->i_inline;
962 memcpy(i_fastsymlink, name, ssize); 962 memcpy(i_fastsymlink, name, ssize);
@@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
978 else { 978 else {
979 jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); 979 jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
980 980
981 ip->i_op = &page_symlink_inode_operations; 981 ip->i_op = &jfs_symlink_inode_operations;
982 ip->i_mapping->a_ops = &jfs_aops; 982 ip->i_mapping->a_ops = &jfs_aops;
983 983
984 /* 984 /*
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 7f24a0bb08ca..1aba0039f1c9 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
81 struct inode *iplist[1]; 81 struct inode *iplist[1];
82 struct jfs_superblock *j_sb, *j_sb2; 82 struct jfs_superblock *j_sb, *j_sb2;
83 uint old_agsize; 83 uint old_agsize;
84 int agsizechanged = 0;
84 struct buffer_head *bh, *bh2; 85 struct buffer_head *bh, *bh2;
85 86
86 /* If the volume hasn't grown, get out now */ 87 /* If the volume hasn't grown, get out now */
@@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
333 */ 334 */
334 if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) 335 if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
335 goto error_out; 336 goto error_out;
337
338 agsizechanged |= (bmp->db_agsize != old_agsize);
339
336 /* 340 /*
337 * the map now has extended to cover additional nblocks: 341 * the map now has extended to cover additional nblocks:
338 * dn_mapsize = oldMapsize + nblocks; 342 * dn_mapsize = oldMapsize + nblocks;
@@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
432 * will correctly identify the new ag); 436 * will correctly identify the new ag);
433 */ 437 */
434 /* if new AG size the same as old AG size, done! */ 438 /* if new AG size the same as old AG size, done! */
435 if (bmp->db_agsize != old_agsize) { 439 if (agsizechanged) {
436 if ((rc = diExtendFS(ipimap, ipbmap))) 440 if ((rc = diExtendFS(ipimap, ipbmap)))
437 goto error_out; 441 goto error_out;
438 442
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 157382fa6256..b66832ac33ac 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -446,10 +446,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
446 /* initialize the mount flag and determine the default error handler */ 446 /* initialize the mount flag and determine the default error handler */
447 flag = JFS_ERR_REMOUNT_RO; 447 flag = JFS_ERR_REMOUNT_RO;
448 448
449 if (!parse_options((char *) data, sb, &newLVSize, &flag)) { 449 if (!parse_options((char *) data, sb, &newLVSize, &flag))
450 kfree(sbi); 450 goto out_kfree;
451 return -EINVAL;
452 }
453 sbi->flag = flag; 451 sbi->flag = flag;
454 452
455#ifdef CONFIG_JFS_POSIX_ACL 453#ifdef CONFIG_JFS_POSIX_ACL
@@ -458,7 +456,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
458 456
459 if (newLVSize) { 457 if (newLVSize) {
460 printk(KERN_ERR "resize option for remount only\n"); 458 printk(KERN_ERR "resize option for remount only\n");
461 return -EINVAL; 459 goto out_kfree;
462 } 460 }
463 461
464 /* 462 /*
@@ -478,7 +476,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
478 inode = new_inode(sb); 476 inode = new_inode(sb);
479 if (inode == NULL) { 477 if (inode == NULL) {
480 ret = -ENOMEM; 478 ret = -ENOMEM;
481 goto out_kfree; 479 goto out_unload;
482 } 480 }
483 inode->i_ino = 0; 481 inode->i_ino = 0;
484 inode->i_nlink = 1; 482 inode->i_nlink = 1;
@@ -550,9 +548,10 @@ out_mount_failed:
550 make_bad_inode(sbi->direct_inode); 548 make_bad_inode(sbi->direct_inode);
551 iput(sbi->direct_inode); 549 iput(sbi->direct_inode);
552 sbi->direct_inode = NULL; 550 sbi->direct_inode = NULL;
553out_kfree: 551out_unload:
554 if (sbi->nls_tab) 552 if (sbi->nls_tab)
555 unload_nls(sbi->nls_tab); 553 unload_nls(sbi->nls_tab);
554out_kfree:
556 kfree(sbi); 555 kfree(sbi);
557 return ret; 556 return ret;
558} 557}
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 4af1a05aad0a..205b946d8e0d 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
29 return NULL; 29 return NULL;
30} 30}
31 31
32const struct inode_operations jfs_symlink_inode_operations = { 32const struct inode_operations jfs_fast_symlink_inode_operations = {
33 .readlink = generic_readlink, 33 .readlink = generic_readlink,
34 .follow_link = jfs_follow_link, 34 .follow_link = jfs_follow_link,
35 .setattr = jfs_setattr,
36 .setxattr = jfs_setxattr,
37 .getxattr = jfs_getxattr,
38 .listxattr = jfs_listxattr,
39 .removexattr = jfs_removexattr,
40};
41
42const struct inode_operations jfs_symlink_inode_operations = {
43 .readlink = generic_readlink,
44 .follow_link = page_follow_link_light,
45 .put_link = page_put_link,
46 .setattr = jfs_setattr,
35 .setxattr = jfs_setxattr, 47 .setxattr = jfs_setxattr,
36 .getxattr = jfs_getxattr, 48 .getxattr = jfs_getxattr,
37 .listxattr = jfs_listxattr, 49 .listxattr = jfs_listxattr,
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 84e36f52fe95..76c242fbe1b0 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -459,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target)
459 struct logfs_block *block; 459 struct logfs_block *block;
460 int round, progress, last_progress = 0; 460 int round, progress, last_progress = 0;
461 461
462 /*
463 * Doing too many changes to the segfile at once would result
464 * in a large number of aliases. Write the journal before
465 * things get out of hand.
466 */
467 if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
468 logfs_write_anchor(sb);
469
462 if (no_free_segments(sb) >= target && 470 if (no_free_segments(sb) >= target &&
463 super->s_no_object_aliases < MAX_OBJ_ALIASES) 471 super->s_no_object_aliases < MAX_OBJ_ALIASES)
464 return; 472 return;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 33bd260b8309..fb0a613f885b 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -389,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area)
389static int journal_erase_segment(struct logfs_area *area) 389static int journal_erase_segment(struct logfs_area *area)
390{ 390{
391 struct super_block *sb = area->a_sb; 391 struct super_block *sb = area->a_sb;
392 struct logfs_segment_header sh; 392 union {
393 struct logfs_segment_header sh;
394 unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)];
395 } u;
393 u64 ofs; 396 u64 ofs;
394 int err; 397 int err;
395 398
@@ -397,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area)
397 if (err) 400 if (err)
398 return err; 401 return err;
399 402
400 sh.pad = 0; 403 memset(&u, 0, sizeof(u));
401 sh.type = SEG_JOURNAL; 404 u.sh.pad = 0;
402 sh.level = 0; 405 u.sh.type = SEG_JOURNAL;
403 sh.segno = cpu_to_be32(area->a_segno); 406 u.sh.level = 0;
404 sh.ec = cpu_to_be32(area->a_erase_count); 407 u.sh.segno = cpu_to_be32(area->a_segno);
405 sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); 408 u.sh.ec = cpu_to_be32(area->a_erase_count);
406 sh.crc = logfs_crc32(&sh, sizeof(sh), 4); 409 u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
410 u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4);
407 411
408 /* This causes a bug in segment.c. Not yet. */ 412 /* This causes a bug in segment.c. Not yet. */
409 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); 413 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
410 414
411 ofs = dev_ofs(sb, area->a_segno, 0); 415 ofs = dev_ofs(sb, area->a_segno, 0);
412 area->a_used_bytes = ALIGN(sizeof(sh), 16); 416 area->a_used_bytes = sizeof(u);
413 logfs_buf_write(area, ofs, &sh, sizeof(sh)); 417 logfs_buf_write(area, ofs, &u, sizeof(u));
414 return 0; 418 return 0;
415} 419}
416 420
@@ -494,6 +498,8 @@ static void account_shadows(struct super_block *sb)
494 498
495 btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); 499 btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
496 btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); 500 btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
501 btree_grim_visitor32(&tree->segment_map, 0, NULL);
502 tree->no_shadowed_segments = 0;
497 503
498 if (li->li_block) { 504 if (li->li_block) {
499 /* 505 /*
@@ -607,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
607 if (len == 0) 613 if (len == 0)
608 return logfs_write_header(super, header, 0, type); 614 return logfs_write_header(super, header, 0, type);
609 615
616 BUG_ON(len > sb->s_blocksize);
610 compr_len = logfs_compress(buf, data, len, sb->s_blocksize); 617 compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
611 if (compr_len < 0 || type == JE_ANCHOR) { 618 if (compr_len < 0 || type == JE_ANCHOR) {
612 BUG_ON(len > sb->s_blocksize);
613 memcpy(data, buf, len); 619 memcpy(data, buf, len);
614 compr_len = len; 620 compr_len = len;
615 compr = COMPR_NONE; 621 compr = COMPR_NONE;
@@ -661,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
661 if (ofs < 0) 667 if (ofs < 0)
662 return ofs; 668 return ofs;
663 logfs_buf_write(area, ofs, super->s_compressed_je, len); 669 logfs_buf_write(area, ofs, super->s_compressed_je, len);
670 BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES);
664 super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); 671 super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
665 return 0; 672 return 0;
666} 673}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b84b0eec6024..0a3df1a0c936 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -257,10 +257,14 @@ struct logfs_shadow {
257 * struct shadow_tree 257 * struct shadow_tree
258 * @new: shadows where old_ofs==0, indexed by new_ofs 258 * @new: shadows where old_ofs==0, indexed by new_ofs
259 * @old: shadows where old_ofs!=0, indexed by old_ofs 259 * @old: shadows where old_ofs!=0, indexed by old_ofs
260 * @segment_map: bitfield of segments containing shadows
261 * @no_shadowed_segment: number of segments containing shadows
260 */ 262 */
261struct shadow_tree { 263struct shadow_tree {
262 struct btree_head64 new; 264 struct btree_head64 new;
263 struct btree_head64 old; 265 struct btree_head64 old;
266 struct btree_head32 segment_map;
267 int no_shadowed_segments;
264}; 268};
265 269
266struct object_alias_item { 270struct object_alias_item {
@@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
305 level_t level, int child_no, __be64 val); 309 level_t level, int child_no, __be64 val);
306struct logfs_block_ops { 310struct logfs_block_ops {
307 void (*write_block)(struct logfs_block *block); 311 void (*write_block)(struct logfs_block *block);
308 gc_level_t (*block_level)(struct logfs_block *block);
309 void (*free_block)(struct super_block *sb, struct logfs_block*block); 312 void (*free_block)(struct super_block *sb, struct logfs_block*block);
310 int (*write_alias)(struct super_block *sb, 313 int (*write_alias)(struct super_block *sb,
311 struct logfs_block *block, 314 struct logfs_block *block,
312 write_alias_t *write_one_alias); 315 write_alias_t *write_one_alias);
313}; 316};
314 317
318#define MAX_JOURNAL_ENTRIES 256
319
315struct logfs_super { 320struct logfs_super {
316 struct mtd_info *s_mtd; /* underlying device */ 321 struct mtd_info *s_mtd; /* underlying device */
317 struct block_device *s_bdev; /* underlying device */ 322 struct block_device *s_bdev; /* underlying device */
@@ -378,7 +383,7 @@ struct logfs_super {
378 u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ 383 u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
379 u64 s_last_version; 384 u64 s_last_version;
380 struct logfs_area *s_journal_area; /* open journal segment */ 385 struct logfs_area *s_journal_area; /* open journal segment */
381 __be64 s_je_array[64]; 386 __be64 s_je_array[MAX_JOURNAL_ENTRIES];
382 int s_no_je; 387 int s_no_je;
383 388
384 int s_sum_index; /* for the 12 summaries */ 389 int s_sum_index; /* for the 12 summaries */
@@ -722,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb,
722 return logfs_super(sb)->s_area[(__force u8)gc_level]; 727 return logfs_super(sb)->s_area[(__force u8)gc_level];
723} 728}
724 729
730static inline void logfs_mempool_destroy(mempool_t *pool)
731{
732 if (pool)
733 mempool_destroy(pool);
734}
735
725#endif 736#endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index bff40253dfb2..3159db6958e5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -430,25 +430,6 @@ static void inode_write_block(struct logfs_block *block)
430 } 430 }
431} 431}
432 432
433static gc_level_t inode_block_level(struct logfs_block *block)
434{
435 BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
436 return GC_LEVEL(LOGFS_MAX_LEVELS);
437}
438
439static gc_level_t indirect_block_level(struct logfs_block *block)
440{
441 struct page *page;
442 struct inode *inode;
443 u64 bix;
444 level_t level;
445
446 page = block->page;
447 inode = page->mapping->host;
448 logfs_unpack_index(page->index, &bix, &level);
449 return expand_level(inode->i_ino, level);
450}
451
452/* 433/*
453 * This silences a false, yet annoying gcc warning. I hate it when my editor 434 * This silences a false, yet annoying gcc warning. I hate it when my editor
454 * jumps into bitops.h each time I recompile this file. 435 * jumps into bitops.h each time I recompile this file.
@@ -587,14 +568,12 @@ static void indirect_free_block(struct super_block *sb,
587 568
588static struct logfs_block_ops inode_block_ops = { 569static struct logfs_block_ops inode_block_ops = {
589 .write_block = inode_write_block, 570 .write_block = inode_write_block,
590 .block_level = inode_block_level,
591 .free_block = inode_free_block, 571 .free_block = inode_free_block,
592 .write_alias = inode_write_alias, 572 .write_alias = inode_write_alias,
593}; 573};
594 574
595struct logfs_block_ops indirect_block_ops = { 575struct logfs_block_ops indirect_block_ops = {
596 .write_block = indirect_write_block, 576 .write_block = indirect_write_block,
597 .block_level = indirect_block_level,
598 .free_block = indirect_free_block, 577 .free_block = indirect_free_block,
599 .write_alias = indirect_write_alias, 578 .write_alias = indirect_write_alias,
600}; 579};
@@ -1241,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
1241 mempool_free(shadow, super->s_shadow_pool); 1220 mempool_free(shadow, super->s_shadow_pool);
1242} 1221}
1243 1222
1223static void mark_segment(struct shadow_tree *tree, u32 segno)
1224{
1225 int err;
1226
1227 if (!btree_lookup32(&tree->segment_map, segno)) {
1228 err = btree_insert32(&tree->segment_map, segno, (void *)1,
1229 GFP_NOFS);
1230 BUG_ON(err);
1231 tree->no_shadowed_segments++;
1232 }
1233}
1234
1244/** 1235/**
1245 * fill_shadow_tree - Propagate shadow tree changes due to a write 1236 * fill_shadow_tree - Propagate shadow tree changes due to a write
1246 * @inode: Inode owning the page 1237 * @inode: Inode owning the page
@@ -1288,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page,
1288 1279
1289 super->s_dirty_used_bytes += shadow->new_len; 1280 super->s_dirty_used_bytes += shadow->new_len;
1290 super->s_dirty_free_bytes += shadow->old_len; 1281 super->s_dirty_free_bytes += shadow->old_len;
1282 mark_segment(tree, shadow->old_ofs >> super->s_segshift);
1283 mark_segment(tree, shadow->new_ofs >> super->s_segshift);
1291 } 1284 }
1292} 1285}
1293 1286
@@ -1845,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size)
1845 return logfs_truncate_direct(inode, size); 1838 return logfs_truncate_direct(inode, size);
1846} 1839}
1847 1840
1848int logfs_truncate(struct inode *inode, u64 size) 1841/*
1842 * Truncate, by changing the segment file, can consume a fair amount
1843 * of resources. So back off from time to time and do some GC.
1844 * 8 or 2048 blocks should be well within safety limits even if
1845 * every single block resided in a different segment.
1846 */
1847#define TRUNCATE_STEP (8 * 1024 * 1024)
1848int logfs_truncate(struct inode *inode, u64 target)
1849{ 1849{
1850 struct super_block *sb = inode->i_sb; 1850 struct super_block *sb = inode->i_sb;
1851 int err; 1851 u64 size = i_size_read(inode);
1852 int err = 0;
1852 1853
1853 logfs_get_wblocks(sb, NULL, 1); 1854 size = ALIGN(size, TRUNCATE_STEP);
1854 err = __logfs_truncate(inode, size); 1855 while (size > target) {
1855 if (!err) 1856 if (size > TRUNCATE_STEP)
1856 err = __logfs_write_inode(inode, 0); 1857 size -= TRUNCATE_STEP;
1857 logfs_put_wblocks(sb, NULL, 1); 1858 else
1859 size = 0;
1860 if (size < target)
1861 size = target;
1862
1863 logfs_get_wblocks(sb, NULL, 1);
1864 err = __logfs_truncate(inode, target);
1865 if (!err)
1866 err = __logfs_write_inode(inode, 0);
1867 logfs_put_wblocks(sb, NULL, 1);
1868 }
1858 1869
1859 if (!err) 1870 if (!err)
1860 err = vmtruncate(inode, size); 1871 err = vmtruncate(inode, target);
1861 1872
1862 /* I don't trust error recovery yet. */ 1873 /* I don't trust error recovery yet. */
1863 WARN_ON(err); 1874 WARN_ON(err);
@@ -2251,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb)
2251 struct logfs_super *super = logfs_super(sb); 2262 struct logfs_super *super = logfs_super(sb);
2252 2263
2253 destroy_meta_inode(super->s_segfile_inode); 2264 destroy_meta_inode(super->s_segfile_inode);
2254 if (super->s_block_pool) 2265 logfs_mempool_destroy(super->s_block_pool);
2255 mempool_destroy(super->s_block_pool); 2266 logfs_mempool_destroy(super->s_shadow_pool);
2256 if (super->s_shadow_pool)
2257 mempool_destroy(super->s_shadow_pool);
2258} 2267}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 801a3a141625..f77ce2b470ba 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -183,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
183 return 0; 183 return 0;
184} 184}
185 185
186static gc_level_t btree_block_level(struct logfs_block *block)
187{
188 return expand_level(block->ino, block->level);
189}
190
191static struct logfs_block_ops btree_block_ops = { 186static struct logfs_block_ops btree_block_ops = {
192 .write_block = btree_write_block, 187 .write_block = btree_write_block,
193 .block_level = btree_block_level,
194 .free_block = __free_block, 188 .free_block = __free_block,
195 .write_alias = btree_write_alias, 189 .write_alias = btree_write_alias,
196}; 190};
@@ -919,7 +913,7 @@ err:
919 for (i--; i >= 0; i--) 913 for (i--; i >= 0; i--)
920 free_area(super->s_area[i]); 914 free_area(super->s_area[i]);
921 free_area(super->s_journal_area); 915 free_area(super->s_journal_area);
922 mempool_destroy(super->s_alias_pool); 916 logfs_mempool_destroy(super->s_alias_pool);
923 return -ENOMEM; 917 return -ENOMEM;
924} 918}
925 919
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index b60bfac3263c..d7c23ed8349a 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -12,6 +12,7 @@
12#include "logfs.h" 12#include "logfs.h"
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/blkdev.h>
15#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
16#include <linux/statfs.h> 17#include <linux/statfs.h>
17#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
@@ -137,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
137 sb->s_fs_info = super; 138 sb->s_fs_info = super;
138 sb->s_mtd = super->s_mtd; 139 sb->s_mtd = super->s_mtd;
139 sb->s_bdev = super->s_bdev; 140 sb->s_bdev = super->s_bdev;
141 if (sb->s_bdev)
142 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
143 if (sb->s_mtd)
144 sb->s_bdi = sb->s_mtd->backing_dev_info;
140 return 0; 145 return 0;
141} 146}
142 147
@@ -328,27 +333,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
328 goto fail; 333 goto fail;
329 334
330 sb->s_root = d_alloc_root(rootdir); 335 sb->s_root = d_alloc_root(rootdir);
331 if (!sb->s_root) 336 if (!sb->s_root) {
332 goto fail2; 337 iput(rootdir);
338 goto fail;
339 }
333 340
334 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 341 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
335 if (!super->s_erase_page) 342 if (!super->s_erase_page)
336 goto fail2; 343 goto fail;
337 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); 344 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
338 345
339 /* FIXME: check for read-only mounts */ 346 /* FIXME: check for read-only mounts */
340 err = logfs_make_writeable(sb); 347 err = logfs_make_writeable(sb);
341 if (err) 348 if (err)
342 goto fail3; 349 goto fail1;
343 350
344 log_super("LogFS: Finished mounting\n"); 351 log_super("LogFS: Finished mounting\n");
345 simple_set_mnt(mnt, sb); 352 simple_set_mnt(mnt, sb);
346 return 0; 353 return 0;
347 354
348fail3: 355fail1:
349 __free_page(super->s_erase_page); 356 __free_page(super->s_erase_page);
350fail2:
351 iput(rootdir);
352fail: 357fail:
353 iput(logfs_super(sb)->s_master_inode); 358 iput(logfs_super(sb)->s_master_inode);
354 return -EIO; 359 return -EIO;
@@ -452,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
452 457
453 btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); 458 btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
454 btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); 459 btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
460 btree_init_mempool32(&super->s_shadow_tree.segment_map,
461 super->s_btree_pool);
455 462
456 ret = logfs_init_mapping(sb); 463 ret = logfs_init_mapping(sb);
457 if (ret) 464 if (ret)
@@ -516,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb)
516 if (super->s_erase_page) 523 if (super->s_erase_page)
517 __free_page(super->s_erase_page); 524 __free_page(super->s_erase_page);
518 super->s_devops->put_device(sb); 525 super->s_devops->put_device(sb);
519 mempool_destroy(super->s_btree_pool); 526 logfs_mempool_destroy(super->s_btree_pool);
520 mempool_destroy(super->s_alias_pool); 527 logfs_mempool_destroy(super->s_alias_pool);
521 kfree(super); 528 kfree(super);
522 log_super("LogFS: Finished unmounting\n"); 529 log_super("LogFS: Finished unmounting\n");
523} 530}
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91a7e42..b86b96fe1dc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1641 if (nd->last.name[nd->last.len]) { 1641 if (nd->last.name[nd->last.len]) {
1642 if (open_flag & O_CREAT) 1642 if (open_flag & O_CREAT)
1643 goto exit; 1643 goto exit;
1644 nd->flags |= LOOKUP_DIRECTORY; 1644 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1645 } 1645 }
1646 1646
1647 /* just plain open? */ 1647 /* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
1830 } 1830 }
1831 if (open_flag & O_DIRECTORY) 1831 if (open_flag & O_DIRECTORY)
1832 nd.flags |= LOOKUP_DIRECTORY; 1832 nd.flags |= LOOKUP_DIRECTORY;
1833 if (!(open_flag & O_NOFOLLOW))
1834 nd.flags |= LOOKUP_FOLLOW;
1833 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 1835 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1834 while (unlikely(!filp)) { /* trailing symlink */ 1836 while (unlikely(!filp)) { /* trailing symlink */
1835 struct path holder; 1837 struct path holder;
@@ -1837,7 +1839,7 @@ reval:
1837 void *cookie; 1839 void *cookie;
1838 error = -ELOOP; 1840 error = -ELOOP;
1839 /* S_ISDIR part is a temporary automount kludge */ 1841 /* S_ISDIR part is a temporary automount kludge */
1840 if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) 1842 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
1841 goto exit_dput; 1843 goto exit_dput;
1842 if (count++ == 32) 1844 if (count++ == 32)
1843 goto exit_dput; 1845 goto exit_dput;
@@ -2174,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2174 error = security_inode_rmdir(dir, dentry); 2176 error = security_inode_rmdir(dir, dentry);
2175 if (!error) { 2177 if (!error) {
2176 error = dir->i_op->rmdir(dir, dentry); 2178 error = dir->i_op->rmdir(dir, dentry);
2177 if (!error) 2179 if (!error) {
2178 dentry->d_inode->i_flags |= S_DEAD; 2180 dentry->d_inode->i_flags |= S_DEAD;
2181 dont_mount(dentry);
2182 }
2179 } 2183 }
2180 } 2184 }
2181 mutex_unlock(&dentry->d_inode->i_mutex); 2185 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2259,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2259 if (!error) { 2263 if (!error) {
2260 error = dir->i_op->unlink(dir, dentry); 2264 error = dir->i_op->unlink(dir, dentry);
2261 if (!error) 2265 if (!error)
2262 dentry->d_inode->i_flags |= S_DEAD; 2266 dont_mount(dentry);
2263 } 2267 }
2264 } 2268 }
2265 mutex_unlock(&dentry->d_inode->i_mutex); 2269 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2570,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2570 return error; 2574 return error;
2571 2575
2572 target = new_dentry->d_inode; 2576 target = new_dentry->d_inode;
2573 if (target) { 2577 if (target)
2574 mutex_lock(&target->i_mutex); 2578 mutex_lock(&target->i_mutex);
2575 dentry_unhash(new_dentry);
2576 }
2577 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2579 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2578 error = -EBUSY; 2580 error = -EBUSY;
2579 else 2581 else {
2582 if (target)
2583 dentry_unhash(new_dentry);
2580 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2584 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2585 }
2581 if (target) { 2586 if (target) {
2582 if (!error) 2587 if (!error) {
2583 target->i_flags |= S_DEAD; 2588 target->i_flags |= S_DEAD;
2589 dont_mount(new_dentry);
2590 }
2584 mutex_unlock(&target->i_mutex); 2591 mutex_unlock(&target->i_mutex);
2585 if (d_unhashed(new_dentry)) 2592 if (d_unhashed(new_dentry))
2586 d_rehash(new_dentry); 2593 d_rehash(new_dentry);
@@ -2612,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2612 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2619 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2613 if (!error) { 2620 if (!error) {
2614 if (target) 2621 if (target)
2615 target->i_flags |= S_DEAD; 2622 dont_mount(new_dentry);
2616 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2623 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2617 d_move(old_dentry, new_dentry); 2624 d_move(old_dentry, new_dentry);
2618 } 2625 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c70..f20cb57d1067 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
1432 1432
1433 err = -ENOENT; 1433 err = -ENOENT;
1434 mutex_lock(&path->dentry->d_inode->i_mutex); 1434 mutex_lock(&path->dentry->d_inode->i_mutex);
1435 if (IS_DEADDIR(path->dentry->d_inode)) 1435 if (cant_mount(path->dentry))
1436 goto out_unlock; 1436 goto out_unlock;
1437 1437
1438 err = security_sb_check_sb(mnt, path); 1438 err = security_sb_check_sb(mnt, path);
@@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name)
1623 1623
1624 err = -ENOENT; 1624 err = -ENOENT;
1625 mutex_lock(&path->dentry->d_inode->i_mutex); 1625 mutex_lock(&path->dentry->d_inode->i_mutex);
1626 if (IS_DEADDIR(path->dentry->d_inode)) 1626 if (cant_mount(path->dentry))
1627 goto out1; 1627 goto out1;
1628 1628
1629 if (d_unlinked(path->dentry)) 1629 if (d_unlinked(path->dentry))
@@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2234 if (!check_mnt(root.mnt)) 2234 if (!check_mnt(root.mnt))
2235 goto out2; 2235 goto out2;
2236 error = -ENOENT; 2236 error = -ENOENT;
2237 if (IS_DEADDIR(new.dentry->d_inode)) 2237 if (cant_mount(old.dentry))
2238 goto out2; 2238 goto out2;
2239 if (d_unlinked(new.dentry)) 2239 if (d_unlinked(new.dentry))
2240 goto out2; 2240 goto out2;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cf98da1be23e..fa3385154023 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
526 sb->s_blocksize_bits = 10; 526 sb->s_blocksize_bits = 10;
527 sb->s_magic = NCP_SUPER_MAGIC; 527 sb->s_magic = NCP_SUPER_MAGIC;
528 sb->s_op = &ncp_sops; 528 sb->s_op = &ncp_sops;
529 sb->s_bdi = &server->bdi;
529 530
530 server = NCP_SBP(sb); 531 server = NCP_SBP(sb);
531 memset(server, 0, sizeof(*server)); 532 memset(server, 0, sizeof(*server));
532 533
534 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
535 if (error)
536 goto out_bdi;
537
533 server->ncp_filp = ncp_filp; 538 server->ncp_filp = ncp_filp;
534 server->ncp_sock = sock; 539 server->ncp_sock = sock;
535 540
@@ -719,6 +724,8 @@ out_fput2:
719 if (server->info_filp) 724 if (server->info_filp)
720 fput(server->info_filp); 725 fput(server->info_filp);
721out_fput: 726out_fput:
727 bdi_destroy(&server->bdi);
728out_bdi:
722 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 729 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
723 * 730 *
724 * The previously used put_filp(ncp_filp); was bogous, since 731 * The previously used put_filp(ncp_filp); was bogous, since
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb)
756 kill_pid(server->m.wdog_pid, SIGTERM, 1); 763 kill_pid(server->m.wdog_pid, SIGTERM, 1);
757 put_pid(server->m.wdog_pid); 764 put_pid(server->m.wdog_pid);
758 765
766 bdi_destroy(&server->bdi);
759 kfree(server->priv.data); 767 kfree(server->priv.data);
760 kfree(server->auth.object_name); 768 kfree(server->auth.object_name);
761 vfree(server->rxbuf); 769 vfree(server->rxbuf);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2a3d352c0bff..acc9c4943b84 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -966,6 +966,8 @@ out_error:
966static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) 966static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
967{ 967{
968 target->flags = source->flags; 968 target->flags = source->flags;
969 target->rsize = source->rsize;
970 target->wsize = source->wsize;
969 target->acregmin = source->acregmin; 971 target->acregmin = source->acregmin;
970 target->acregmax = source->acregmax; 972 target->acregmax = source->acregmax;
971 target->acdirmin = source->acdirmin; 973 target->acdirmin = source->acdirmin;
@@ -1294,7 +1296,8 @@ static int nfs4_init_server(struct nfs_server *server,
1294 1296
1295 /* Initialise the client representation from the mount data */ 1297 /* Initialise the client representation from the mount data */
1296 server->flags = data->flags; 1298 server->flags = data->flags;
1297 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; 1299 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
1300 NFS_CAP_POSIX_LOCK;
1298 server->options = data->options; 1301 server->options = data->options;
1299 1302
1300 /* Get a client record */ 1303 /* Get a client record */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 15671245c6ee..ea61d26e7871 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
24 24
25static void nfs_do_free_delegation(struct nfs_delegation *delegation) 25static void nfs_do_free_delegation(struct nfs_delegation *delegation)
26{ 26{
27 if (delegation->cred)
28 put_rpccred(delegation->cred);
27 kfree(delegation); 29 kfree(delegation);
28} 30}
29 31
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
36 38
37static void nfs_free_delegation(struct nfs_delegation *delegation) 39static void nfs_free_delegation(struct nfs_delegation *delegation)
38{ 40{
39 struct rpc_cred *cred;
40
41 cred = rcu_dereference(delegation->cred);
42 rcu_assign_pointer(delegation->cred, NULL);
43 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 41 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
44 if (cred)
45 put_rpccred(cred);
46} 42}
47 43
48void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) 44void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -129,21 +125,35 @@ again:
129 */ 125 */
130void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 126void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
131{ 127{
132 struct nfs_delegation *delegation = NFS_I(inode)->delegation; 128 struct nfs_delegation *delegation;
133 struct rpc_cred *oldcred; 129 struct rpc_cred *oldcred = NULL;
134 130
135 if (delegation == NULL) 131 rcu_read_lock();
136 return; 132 delegation = rcu_dereference(NFS_I(inode)->delegation);
137 memcpy(delegation->stateid.data, res->delegation.data, 133 if (delegation != NULL) {
138 sizeof(delegation->stateid.data)); 134 spin_lock(&delegation->lock);
139 delegation->type = res->delegation_type; 135 if (delegation->inode != NULL) {
140 delegation->maxsize = res->maxsize; 136 memcpy(delegation->stateid.data, res->delegation.data,
141 oldcred = delegation->cred; 137 sizeof(delegation->stateid.data));
142 delegation->cred = get_rpccred(cred); 138 delegation->type = res->delegation_type;
143 clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); 139 delegation->maxsize = res->maxsize;
144 NFS_I(inode)->delegation_state = delegation->type; 140 oldcred = delegation->cred;
145 smp_wmb(); 141 delegation->cred = get_rpccred(cred);
146 put_rpccred(oldcred); 142 clear_bit(NFS_DELEGATION_NEED_RECLAIM,
143 &delegation->flags);
144 NFS_I(inode)->delegation_state = delegation->type;
145 spin_unlock(&delegation->lock);
146 put_rpccred(oldcred);
147 rcu_read_unlock();
148 } else {
149 /* We appear to have raced with a delegation return. */
150 spin_unlock(&delegation->lock);
151 rcu_read_unlock();
152 nfs_inode_set_delegation(inode, cred, res);
153 }
154 } else {
155 rcu_read_unlock();
156 }
147} 157}
148 158
149static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 159static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
166 return inode; 176 return inode;
167} 177}
168 178
169static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) 179static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
180 const nfs4_stateid *stateid,
181 struct nfs_client *clp)
170{ 182{
171 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); 183 struct nfs_delegation *delegation =
184 rcu_dereference_protected(nfsi->delegation,
185 lockdep_is_held(&clp->cl_lock));
172 186
173 if (delegation == NULL) 187 if (delegation == NULL)
174 goto nomatch; 188 goto nomatch;
@@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
195{ 209{
196 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 210 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
197 struct nfs_inode *nfsi = NFS_I(inode); 211 struct nfs_inode *nfsi = NFS_I(inode);
198 struct nfs_delegation *delegation; 212 struct nfs_delegation *delegation, *old_delegation;
199 struct nfs_delegation *freeme = NULL; 213 struct nfs_delegation *freeme = NULL;
200 int status = 0; 214 int status = 0;
201 215
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
213 spin_lock_init(&delegation->lock); 227 spin_lock_init(&delegation->lock);
214 228
215 spin_lock(&clp->cl_lock); 229 spin_lock(&clp->cl_lock);
216 if (rcu_dereference(nfsi->delegation) != NULL) { 230 old_delegation = rcu_dereference_protected(nfsi->delegation,
217 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, 231 lockdep_is_held(&clp->cl_lock));
218 sizeof(delegation->stateid)) == 0 && 232 if (old_delegation != NULL) {
219 delegation->type == nfsi->delegation->type) { 233 if (memcmp(&delegation->stateid, &old_delegation->stateid,
234 sizeof(old_delegation->stateid)) == 0 &&
235 delegation->type == old_delegation->type) {
220 goto out; 236 goto out;
221 } 237 }
222 /* 238 /*
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
226 dfprintk(FILE, "%s: server %s handed out " 242 dfprintk(FILE, "%s: server %s handed out "
227 "a duplicate delegation!\n", 243 "a duplicate delegation!\n",
228 __func__, clp->cl_hostname); 244 __func__, clp->cl_hostname);
229 if (delegation->type <= nfsi->delegation->type) { 245 if (delegation->type <= old_delegation->type) {
230 freeme = delegation; 246 freeme = delegation;
231 delegation = NULL; 247 delegation = NULL;
232 goto out; 248 goto out;
233 } 249 }
234 freeme = nfs_detach_delegation_locked(nfsi, NULL); 250 freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
235 } 251 }
236 list_add_rcu(&delegation->super_list, &clp->cl_delegations); 252 list_add_rcu(&delegation->super_list, &clp->cl_delegations);
237 nfsi->delegation_state = delegation->type; 253 nfsi->delegation_state = delegation->type;
@@ -301,7 +317,7 @@ restart:
301 if (inode == NULL) 317 if (inode == NULL)
302 continue; 318 continue;
303 spin_lock(&clp->cl_lock); 319 spin_lock(&clp->cl_lock);
304 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 320 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
305 spin_unlock(&clp->cl_lock); 321 spin_unlock(&clp->cl_lock);
306 rcu_read_unlock(); 322 rcu_read_unlock();
307 if (delegation != NULL) { 323 if (delegation != NULL) {
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
330 struct nfs_inode *nfsi = NFS_I(inode); 346 struct nfs_inode *nfsi = NFS_I(inode);
331 struct nfs_delegation *delegation; 347 struct nfs_delegation *delegation;
332 348
333 if (rcu_dereference(nfsi->delegation) != NULL) { 349 if (rcu_access_pointer(nfsi->delegation) != NULL) {
334 spin_lock(&clp->cl_lock); 350 spin_lock(&clp->cl_lock);
335 delegation = nfs_detach_delegation_locked(nfsi, NULL); 351 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
336 spin_unlock(&clp->cl_lock); 352 spin_unlock(&clp->cl_lock);
337 if (delegation != NULL) 353 if (delegation != NULL)
338 nfs_do_return_delegation(inode, delegation, 0); 354 nfs_do_return_delegation(inode, delegation, 0);
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
346 struct nfs_delegation *delegation; 362 struct nfs_delegation *delegation;
347 int err = 0; 363 int err = 0;
348 364
349 if (rcu_dereference(nfsi->delegation) != NULL) { 365 if (rcu_access_pointer(nfsi->delegation) != NULL) {
350 spin_lock(&clp->cl_lock); 366 spin_lock(&clp->cl_lock);
351 delegation = nfs_detach_delegation_locked(nfsi, NULL); 367 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
352 spin_unlock(&clp->cl_lock); 368 spin_unlock(&clp->cl_lock);
353 if (delegation != NULL) { 369 if (delegation != NULL) {
354 nfs_msync_inode(inode); 370 nfs_msync_inode(inode);
@@ -526,7 +542,7 @@ restart:
526 if (inode == NULL) 542 if (inode == NULL)
527 continue; 543 continue;
528 spin_lock(&clp->cl_lock); 544 spin_lock(&clp->cl_lock);
529 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 545 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
530 spin_unlock(&clp->cl_lock); 546 spin_unlock(&clp->cl_lock);
531 rcu_read_unlock(); 547 rcu_read_unlock();
532 if (delegation != NULL) 548 if (delegation != NULL)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c6f2750648f4..a7bb5c694aa3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -837,6 +837,8 @@ out_zap_parent:
837 /* If we have submounts, don't unhash ! */ 837 /* If we have submounts, don't unhash ! */
838 if (have_submounts(dentry)) 838 if (have_submounts(dentry))
839 goto out_valid; 839 goto out_valid;
840 if (dentry->d_flags & DCACHE_DISCONNECTED)
841 goto out_valid;
840 shrink_dcache_parent(dentry); 842 shrink_dcache_parent(dentry);
841 } 843 }
842 d_drop(dentry); 844 d_drop(dentry);
@@ -1025,12 +1027,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1025 res = NULL; 1027 res = NULL;
1026 goto out; 1028 goto out;
1027 /* This turned out not to be a regular file */ 1029 /* This turned out not to be a regular file */
1030 case -EISDIR:
1028 case -ENOTDIR: 1031 case -ENOTDIR:
1029 goto no_open; 1032 goto no_open;
1030 case -ELOOP: 1033 case -ELOOP:
1031 if (!(nd->intent.open.flags & O_NOFOLLOW)) 1034 if (!(nd->intent.open.flags & O_NOFOLLOW))
1032 goto no_open; 1035 goto no_open;
1033 /* case -EISDIR: */
1034 /* case -EINVAL: */ 1036 /* case -EINVAL: */
1035 default: 1037 default:
1036 goto out; 1038 goto out;
@@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1050 struct inode *dir; 1052 struct inode *dir;
1051 int openflags, ret = 0; 1053 int openflags, ret = 0;
1052 1054
1053 if (!is_atomic_open(nd)) 1055 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1054 goto no_open; 1056 goto no_open;
1055 parent = dget_parent(dentry); 1057 parent = dget_parent(dentry);
1056 dir = parent->d_inode; 1058 dir = parent->d_inode;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 737128f777f3..50a56edca0b5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -623,10 +623,10 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
623 list_for_each_entry(pos, &nfsi->open_files, list) { 623 list_for_each_entry(pos, &nfsi->open_files, list) {
624 if (cred != NULL && pos->cred != cred) 624 if (cred != NULL && pos->cred != cred)
625 continue; 625 continue;
626 if ((pos->mode & mode) == mode) { 626 if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
627 ctx = get_nfs_open_context(pos); 627 continue;
628 break; 628 ctx = get_nfs_open_context(pos);
629 } 629 break;
630 } 630 }
631 spin_unlock(&inode->i_lock); 631 spin_unlock(&inode->i_lock);
632 return ctx; 632 return ctx;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fe0cd9eb1d4d..071fcedd517c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1523,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1523 nfs_post_op_update_inode(dir, o_res->dir_attr); 1523 nfs_post_op_update_inode(dir, o_res->dir_attr);
1524 } else 1524 } else
1525 nfs_refresh_inode(dir, o_res->dir_attr); 1525 nfs_refresh_inode(dir, o_res->dir_attr);
1526 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
1527 server->caps &= ~NFS_CAP_POSIX_LOCK;
1526 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1528 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1527 status = _nfs4_proc_open_confirm(data); 1529 status = _nfs4_proc_open_confirm(data);
1528 if (status != 0) 1530 if (status != 0)
@@ -1664,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1664 status = PTR_ERR(state); 1666 status = PTR_ERR(state);
1665 if (IS_ERR(state)) 1667 if (IS_ERR(state))
1666 goto err_opendata_put; 1668 goto err_opendata_put;
1667 if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) 1669 if (server->caps & NFS_CAP_POSIX_LOCK)
1668 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); 1670 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1669 nfs4_opendata_put(opendata); 1671 nfs4_opendata_put(opendata);
1670 nfs4_put_state_owner(sp); 1672 nfs4_put_state_owner(sp);
@@ -5216,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5216 msg.rpc_resp = &calldata->res; 5218 msg.rpc_resp = &calldata->res;
5217 task_setup_data.callback_data = calldata; 5219 task_setup_data.callback_data = calldata;
5218 task = rpc_run_task(&task_setup_data); 5220 task = rpc_run_task(&task_setup_data);
5219 if (IS_ERR(task)) 5221 if (IS_ERR(task)) {
5220 status = PTR_ERR(task); 5222 status = PTR_ERR(task);
5223 goto out;
5224 }
5221 rpc_put_task(task); 5225 rpc_put_task(task);
5226 return 0;
5222out: 5227out:
5223 dprintk("<-- %s status=%d\n", __func__, status); 5228 dprintk("<-- %s status=%d\n", __func__, status);
5224 return status; 5229 return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e01637240eeb..b4148fc00f9f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2187 if (data->version == 4) { 2187 if (data->version == 4) {
2188 error = nfs4_try_mount(flags, dev_name, data, mnt); 2188 error = nfs4_try_mount(flags, dev_name, data, mnt);
2189 kfree(data->client_address); 2189 kfree(data->client_address);
2190 kfree(data->nfs_server.export_path);
2190 goto out; 2191 goto out;
2191 } 2192 }
2192#endif /* CONFIG_NFS_V4 */ 2193#endif /* CONFIG_NFS_V4 */
@@ -2657,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2657 devname = nfs_path(path->mnt->mnt_devname, 2658 devname = nfs_path(path->mnt->mnt_devname,
2658 path->mnt->mnt_root, path->dentry, 2659 path->mnt->mnt_root, path->dentry,
2659 page, PAGE_SIZE); 2660 page, PAGE_SIZE);
2660 if (devname == NULL) 2661 if (IS_ERR(devname))
2661 goto out_freepage; 2662 goto out_freepage;
2662 tmp = kstrdup(devname, GFP_KERNEL); 2663 tmp = kstrdup(devname, GFP_KERNEL);
2663 if (tmp == NULL) 2664 if (tmp == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53ff70e23993..3aea3ca98ab7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -201,6 +201,7 @@ static int nfs_set_page_writeback(struct page *page)
201 struct inode *inode = page->mapping->host; 201 struct inode *inode = page->mapping->host;
202 struct nfs_server *nfss = NFS_SERVER(inode); 202 struct nfs_server *nfss = NFS_SERVER(inode);
203 203
204 page_cache_get(page);
204 if (atomic_long_inc_return(&nfss->writeback) > 205 if (atomic_long_inc_return(&nfss->writeback) >
205 NFS_CONGESTION_ON_THRESH) { 206 NFS_CONGESTION_ON_THRESH) {
206 set_bdi_congested(&nfss->backing_dev_info, 207 set_bdi_congested(&nfss->backing_dev_info,
@@ -216,6 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
216 struct nfs_server *nfss = NFS_SERVER(inode); 217 struct nfs_server *nfss = NFS_SERVER(inode);
217 218
218 end_page_writeback(page); 219 end_page_writeback(page);
220 page_cache_release(page);
219 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 221 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 222 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
221} 223}
@@ -421,6 +423,7 @@ static void
421nfs_mark_request_dirty(struct nfs_page *req) 423nfs_mark_request_dirty(struct nfs_page *req)
422{ 424{
423 __set_page_dirty_nobuffers(req->wb_page); 425 __set_page_dirty_nobuffers(req->wb_page);
426 __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
424} 427}
425 428
426#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 429#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -660,9 +663,11 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
660 req = nfs_setup_write_request(ctx, page, offset, count); 663 req = nfs_setup_write_request(ctx, page, offset, count);
661 if (IS_ERR(req)) 664 if (IS_ERR(req))
662 return PTR_ERR(req); 665 return PTR_ERR(req);
666 nfs_mark_request_dirty(req);
663 /* Update file length */ 667 /* Update file length */
664 nfs_grow_file(page, offset, count); 668 nfs_grow_file(page, offset, count);
665 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 669 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
670 nfs_mark_request_dirty(req);
666 nfs_clear_page_tag_locked(req); 671 nfs_clear_page_tag_locked(req);
667 return 0; 672 return 0;
668} 673}
@@ -739,8 +744,6 @@ int nfs_updatepage(struct file *file, struct page *page,
739 status = nfs_writepage_setup(ctx, page, offset, count); 744 status = nfs_writepage_setup(ctx, page, offset, count);
740 if (status < 0) 745 if (status < 0)
741 nfs_set_pageerror(page); 746 nfs_set_pageerror(page);
742 else
743 __set_page_dirty_nobuffers(page);
744 747
745 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 748 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
746 status, (long long)i_size_read(inode)); 749 status, (long long)i_size_read(inode));
@@ -749,13 +752,12 @@ int nfs_updatepage(struct file *file, struct page *page,
749 752
750static void nfs_writepage_release(struct nfs_page *req) 753static void nfs_writepage_release(struct nfs_page *req)
751{ 754{
755 struct page *page = req->wb_page;
752 756
753 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { 757 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req))
754 nfs_end_page_writeback(req->wb_page);
755 nfs_inode_remove_request(req); 758 nfs_inode_remove_request(req);
756 } else
757 nfs_end_page_writeback(req->wb_page);
758 nfs_clear_page_tag_locked(req); 759 nfs_clear_page_tag_locked(req);
760 nfs_end_page_writeback(page);
759} 761}
760 762
761static int flush_task_priority(int how) 763static int flush_task_priority(int how)
@@ -779,7 +781,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
779 int how) 781 int how)
780{ 782{
781 struct inode *inode = req->wb_context->path.dentry->d_inode; 783 struct inode *inode = req->wb_context->path.dentry->d_inode;
782 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
783 int priority = flush_task_priority(how); 784 int priority = flush_task_priority(how);
784 struct rpc_task *task; 785 struct rpc_task *task;
785 struct rpc_message msg = { 786 struct rpc_message msg = {
@@ -794,9 +795,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
794 .callback_ops = call_ops, 795 .callback_ops = call_ops,
795 .callback_data = data, 796 .callback_data = data,
796 .workqueue = nfsiod_workqueue, 797 .workqueue = nfsiod_workqueue,
797 .flags = flags, 798 .flags = RPC_TASK_ASYNC,
798 .priority = priority, 799 .priority = priority,
799 }; 800 };
801 int ret = 0;
800 802
801 /* Set up the RPC argument and reply structs 803 /* Set up the RPC argument and reply structs
802 * NB: take care not to mess about with data->commit et al. */ 804 * NB: take care not to mess about with data->commit et al. */
@@ -835,10 +837,18 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
835 (unsigned long long)data->args.offset); 837 (unsigned long long)data->args.offset);
836 838
837 task = rpc_run_task(&task_setup_data); 839 task = rpc_run_task(&task_setup_data);
838 if (IS_ERR(task)) 840 if (IS_ERR(task)) {
839 return PTR_ERR(task); 841 ret = PTR_ERR(task);
842 goto out;
843 }
844 if (how & FLUSH_SYNC) {
845 ret = rpc_wait_for_completion_task(task);
846 if (ret == 0)
847 ret = task->tk_status;
848 }
840 rpc_put_task(task); 849 rpc_put_task(task);
841 return 0; 850out:
851 return ret;
842} 852}
843 853
844/* If a nfs_flush_* function fails, it should remove reqs from @head and 854/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -847,9 +857,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
847 */ 857 */
848static void nfs_redirty_request(struct nfs_page *req) 858static void nfs_redirty_request(struct nfs_page *req)
849{ 859{
860 struct page *page = req->wb_page;
861
850 nfs_mark_request_dirty(req); 862 nfs_mark_request_dirty(req);
851 nfs_end_page_writeback(req->wb_page);
852 nfs_clear_page_tag_locked(req); 863 nfs_clear_page_tag_locked(req);
864 nfs_end_page_writeback(page);
853} 865}
854 866
855/* 867/*
@@ -1084,16 +1096,15 @@ static void nfs_writeback_release_full(void *calldata)
1084 if (nfs_write_need_commit(data)) { 1096 if (nfs_write_need_commit(data)) {
1085 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1097 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1086 nfs_mark_request_commit(req); 1098 nfs_mark_request_commit(req);
1087 nfs_end_page_writeback(page);
1088 dprintk(" marked for commit\n"); 1099 dprintk(" marked for commit\n");
1089 goto next; 1100 goto next;
1090 } 1101 }
1091 dprintk(" OK\n"); 1102 dprintk(" OK\n");
1092remove_request: 1103remove_request:
1093 nfs_end_page_writeback(page);
1094 nfs_inode_remove_request(req); 1104 nfs_inode_remove_request(req);
1095 next: 1105 next:
1096 nfs_clear_page_tag_locked(req); 1106 nfs_clear_page_tag_locked(req);
1107 nfs_end_page_writeback(page);
1097 } 1108 }
1098 nfs_writedata_release(calldata); 1109 nfs_writedata_release(calldata);
1099} 1110}
@@ -1190,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1190 1201
1191 1202
1192#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1203#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1204static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1205{
1206 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1207 return 1;
1208 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
1209 NFS_INO_COMMIT, nfs_wait_bit_killable,
1210 TASK_KILLABLE))
1211 return 1;
1212 return 0;
1213}
1214
1215static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1216{
1217 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1218 smp_mb__after_clear_bit();
1219 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1220}
1221
1222
1193static void nfs_commitdata_release(void *data) 1223static void nfs_commitdata_release(void *data)
1194{ 1224{
1195 struct nfs_write_data *wdata = data; 1225 struct nfs_write_data *wdata = data;
@@ -1207,7 +1237,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1207{ 1237{
1208 struct nfs_page *first = nfs_list_entry(head->next); 1238 struct nfs_page *first = nfs_list_entry(head->next);
1209 struct inode *inode = first->wb_context->path.dentry->d_inode; 1239 struct inode *inode = first->wb_context->path.dentry->d_inode;
1210 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
1211 int priority = flush_task_priority(how); 1240 int priority = flush_task_priority(how);
1212 struct rpc_task *task; 1241 struct rpc_task *task;
1213 struct rpc_message msg = { 1242 struct rpc_message msg = {
@@ -1222,7 +1251,7 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1222 .callback_ops = &nfs_commit_ops, 1251 .callback_ops = &nfs_commit_ops,
1223 .callback_data = data, 1252 .callback_data = data,
1224 .workqueue = nfsiod_workqueue, 1253 .workqueue = nfsiod_workqueue,
1225 .flags = flags, 1254 .flags = RPC_TASK_ASYNC,
1226 .priority = priority, 1255 .priority = priority,
1227 }; 1256 };
1228 1257
@@ -1282,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1282 BDI_RECLAIMABLE); 1311 BDI_RECLAIMABLE);
1283 nfs_clear_page_tag_locked(req); 1312 nfs_clear_page_tag_locked(req);
1284 } 1313 }
1314 nfs_commit_clear_lock(NFS_I(inode));
1285 return -ENOMEM; 1315 return -ENOMEM;
1286} 1316}
1287 1317
@@ -1337,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
1337 next: 1367 next:
1338 nfs_clear_page_tag_locked(req); 1368 nfs_clear_page_tag_locked(req);
1339 } 1369 }
1370 nfs_commit_clear_lock(NFS_I(data->inode));
1340 nfs_commitdata_release(calldata); 1371 nfs_commitdata_release(calldata);
1341} 1372}
1342 1373
@@ -1351,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
1351static int nfs_commit_inode(struct inode *inode, int how) 1382static int nfs_commit_inode(struct inode *inode, int how)
1352{ 1383{
1353 LIST_HEAD(head); 1384 LIST_HEAD(head);
1354 int res; 1385 int may_wait = how & FLUSH_SYNC;
1386 int res = 0;
1355 1387
1388 if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
1389 goto out;
1356 spin_lock(&inode->i_lock); 1390 spin_lock(&inode->i_lock);
1357 res = nfs_scan_commit(inode, &head, 0, 0); 1391 res = nfs_scan_commit(inode, &head, 0, 0);
1358 spin_unlock(&inode->i_lock); 1392 spin_unlock(&inode->i_lock);
@@ -1360,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
1360 int error = nfs_commit_list(inode, &head, how); 1394 int error = nfs_commit_list(inode, &head, how);
1361 if (error < 0) 1395 if (error < 0)
1362 return error; 1396 return error;
1363 } 1397 if (may_wait)
1398 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1399 nfs_wait_bit_killable,
1400 TASK_KILLABLE);
1401 } else
1402 nfs_commit_clear_lock(NFS_I(inode));
1403out:
1364 return res; 1404 return res;
1365} 1405}
1366 1406
@@ -1432,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1432 1472
1433 BUG_ON(!PageLocked(page)); 1473 BUG_ON(!PageLocked(page));
1434 for (;;) { 1474 for (;;) {
1475 wait_on_page_writeback(page);
1435 req = nfs_page_find_request(page); 1476 req = nfs_page_find_request(page);
1436 if (req == NULL) 1477 if (req == NULL)
1437 break; 1478 break;
@@ -1466,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page)
1466 .range_start = range_start, 1507 .range_start = range_start,
1467 .range_end = range_end, 1508 .range_end = range_end,
1468 }; 1509 };
1469 struct nfs_page *req;
1470 int need_commit;
1471 int ret; 1510 int ret;
1472 1511
1473 while(PagePrivate(page)) { 1512 while(PagePrivate(page)) {
1513 wait_on_page_writeback(page);
1474 if (clear_page_dirty_for_io(page)) { 1514 if (clear_page_dirty_for_io(page)) {
1475 ret = nfs_writepage_locked(page, &wbc); 1515 ret = nfs_writepage_locked(page, &wbc);
1476 if (ret < 0) 1516 if (ret < 0)
1477 goto out_error; 1517 goto out_error;
1478 } 1518 }
1479 req = nfs_find_and_lock_request(page); 1519 ret = sync_inode(inode, &wbc);
1480 if (!req) 1520 if (ret < 0)
1481 break;
1482 if (IS_ERR(req)) {
1483 ret = PTR_ERR(req);
1484 goto out_error; 1521 goto out_error;
1485 }
1486 need_commit = test_bit(PG_CLEAN, &req->wb_flags);
1487 nfs_clear_page_tag_locked(req);
1488 if (need_commit) {
1489 ret = nfs_commit_inode(inode, FLUSH_SYNC);
1490 if (ret < 0)
1491 goto out_error;
1492 }
1493 } 1522 }
1494 return 0; 1523 return 0;
1495out_error: 1524out_error:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e1703175ee28..34ccf815ea8a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
161 argp->p = page_address(argp->pagelist[0]); 161 argp->p = page_address(argp->pagelist[0]);
162 argp->pagelist++; 162 argp->pagelist++;
163 if (argp->pagelen < PAGE_SIZE) { 163 if (argp->pagelen < PAGE_SIZE) {
164 argp->end = p + (argp->pagelen>>2); 164 argp->end = argp->p + (argp->pagelen>>2);
165 argp->pagelen = 0; 165 argp->pagelen = 0;
166 } else { 166 } else {
167 argp->end = p + (PAGE_SIZE>>2); 167 argp->end = argp->p + (PAGE_SIZE>>2);
168 argp->pagelen -= PAGE_SIZE; 168 argp->pagelen -= PAGE_SIZE;
169 } 169 }
170 memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); 170 memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
@@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1426 argp->p = page_address(argp->pagelist[0]); 1426 argp->p = page_address(argp->pagelist[0]);
1427 argp->pagelist++; 1427 argp->pagelist++;
1428 if (argp->pagelen < PAGE_SIZE) { 1428 if (argp->pagelen < PAGE_SIZE) {
1429 argp->end = p + (argp->pagelen>>2); 1429 argp->end = argp->p + (argp->pagelen>>2);
1430 argp->pagelen = 0; 1430 argp->pagelen = 0;
1431 } else { 1431 } else {
1432 argp->end = p + (PAGE_SIZE>>2); 1432 argp->end = argp->p + (PAGE_SIZE>>2);
1433 argp->pagelen -= PAGE_SIZE; 1433 argp->pagelen -= PAGE_SIZE;
1434 } 1434 }
1435 } 1435 }
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e7655a..48145f505a6a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
749 sb->s_export_op = &nilfs_export_ops; 749 sb->s_export_op = &nilfs_export_ops;
750 sb->s_root = NULL; 750 sb->s_root = NULL;
751 sb->s_time_gran = 1; 751 sb->s_time_gran = 1;
752 sb->s_bdi = nilfs->ns_bdi;
752 753
753 err = load_nilfs(nilfs, sbi); 754 err = load_nilfs(nilfs, sbi);
754 if (err) 755 if (err)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbffe729..b3a159b21cfd 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
15 15
16config INOTIFY_USER 16config INOTIFY_USER
17 bool "Inotify support for userspace" 17 bool "Inotify support for userspace"
18 select ANON_INODES
18 select FSNOTIFY 19 select FSNOTIFY
19 default y 20 default y
20 ---help--- 21 ---help---
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1afb0a10229f..e27960cd76ab 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -28,6 +28,7 @@
28#include <linux/path.h> /* struct path */ 28#include <linux/path.h> /* struct path */
29#include <linux/slab.h> /* kmem_* */ 29#include <linux/slab.h> /* kmem_* */
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/sched.h>
31 32
32#include "inotify.h" 33#include "inotify.h"
33 34
@@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
146 idr_for_each(&group->inotify_data.idr, idr_callback, group); 147 idr_for_each(&group->inotify_data.idr, idr_callback, group);
147 idr_remove_all(&group->inotify_data.idr); 148 idr_remove_all(&group->inotify_data.idr);
148 idr_destroy(&group->inotify_data.idr); 149 idr_destroy(&group->inotify_data.idr);
150 free_uid(group->inotify_data.user);
149} 151}
150 152
151void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) 153void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 472cdf29ef82..e46ca685b9be 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -546,21 +546,24 @@ retry:
546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) 546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
547 goto out_err; 547 goto out_err;
548 548
549 /* we are putting the mark on the idr, take a reference */
550 fsnotify_get_mark(&tmp_ientry->fsn_entry);
551
549 spin_lock(&group->inotify_data.idr_lock); 552 spin_lock(&group->inotify_data.idr_lock);
550 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 553 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
551 group->inotify_data.last_wd+1, 554 group->inotify_data.last_wd+1,
552 &tmp_ientry->wd); 555 &tmp_ientry->wd);
553 spin_unlock(&group->inotify_data.idr_lock); 556 spin_unlock(&group->inotify_data.idr_lock);
554 if (ret) { 557 if (ret) {
558 /* we didn't get on the idr, drop the idr reference */
559 fsnotify_put_mark(&tmp_ientry->fsn_entry);
560
555 /* idr was out of memory allocate and try again */ 561 /* idr was out of memory allocate and try again */
556 if (ret == -EAGAIN) 562 if (ret == -EAGAIN)
557 goto retry; 563 goto retry;
558 goto out_err; 564 goto out_err;
559 } 565 }
560 566
561 /* we put the mark on the idr, take a reference */
562 fsnotify_get_mark(&tmp_ientry->fsn_entry);
563
564 /* we are on the idr, now get on the inode */ 567 /* we are on the idr, now get on the inode */
565 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); 568 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
566 if (ret) { 569 if (ret) {
@@ -578,16 +581,13 @@ retry:
578 /* return the watch descriptor for this new entry */ 581 /* return the watch descriptor for this new entry */
579 ret = tmp_ientry->wd; 582 ret = tmp_ientry->wd;
580 583
581 /* match the ref from fsnotify_init_markentry() */
582 fsnotify_put_mark(&tmp_ientry->fsn_entry);
583
584 /* if this mark added a new event update the group mask */ 584 /* if this mark added a new event update the group mask */
585 if (mask & ~group->mask) 585 if (mask & ~group->mask)
586 fsnotify_recalc_group_mask(group); 586 fsnotify_recalc_group_mask(group);
587 587
588out_err: 588out_err:
589 if (ret < 0) 589 /* match the ref from fsnotify_init_markentry() */
590 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); 590 fsnotify_put_mark(&tmp_ientry->fsn_entry);
591 591
592 return ret; 592 return ret;
593} 593}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index ecebb2276790..f9d5d3ffc75a 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
406 struct buffer_head *bh) 406 struct buffer_head *bh)
407{ 407{
408 int ret = 0; 408 int ret = 0;
409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
409 410
410 mlog_entry_void(); 411 mlog_entry_void();
411 412
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
425 426
426 get_bh(bh); /* for end_buffer_write_sync() */ 427 get_bh(bh); /* for end_buffer_write_sync() */
427 bh->b_end_io = end_buffer_write_sync; 428 bh->b_end_io = end_buffer_write_sync;
429 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
428 submit_bh(WRITE, bh); 430 submit_bh(WRITE, bh);
429 431
430 wait_on_buffer(bh); 432 wait_on_buffer(bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index a795eb91f4ea..12d5eb78a11a 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
184 BUG_ON(!lksb); 184 BUG_ON(!lksb);
185 185
186 /* only updates if this node masters the lockres */ 186 /* only updates if this node masters the lockres */
187 spin_lock(&res->spinlock);
187 if (res->owner == dlm->node_num) { 188 if (res->owner == dlm->node_num) {
188
189 spin_lock(&res->spinlock);
190 /* check the lksb flags for the direction */ 189 /* check the lksb flags for the direction */
191 if (lksb->flags & DLM_LKSB_GET_LVB) { 190 if (lksb->flags & DLM_LKSB_GET_LVB) {
192 mlog(0, "getting lvb from lockres for %s node\n", 191 mlog(0, "getting lvb from lockres for %s node\n",
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
201 * here. In the future we might want to clear it at the time 200 * here. In the future we might want to clear it at the time
202 * the put is actually done. 201 * the put is actually done.
203 */ 202 */
204 spin_unlock(&res->spinlock);
205 } 203 }
204 spin_unlock(&res->spinlock);
206 205
207 /* reset any lvb flags on the lksb */ 206 /* reset any lvb flags on the lksb */
208 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); 207 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de157a08c..b83d6107a1f5 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
112 * O_RDONLY -> PRMODE level 112 * O_RDONLY -> PRMODE level
113 * O_WRONLY -> EXMODE level 113 * O_WRONLY -> EXMODE level
114 * 114 *
115 * O_NONBLOCK -> LKM_NOQUEUE 115 * O_NONBLOCK -> NOQUEUE
116 */ 116 */
117static int dlmfs_decode_open_flags(int open_flags, 117static int dlmfs_decode_open_flags(int open_flags,
118 int *level, 118 int *level,
119 int *flags) 119 int *flags)
120{ 120{
121 if (open_flags & (O_WRONLY|O_RDWR)) 121 if (open_flags & (O_WRONLY|O_RDWR))
122 *level = LKM_EXMODE; 122 *level = DLM_LOCK_EX;
123 else 123 else
124 *level = LKM_PRMODE; 124 *level = DLM_LOCK_PR;
125 125
126 *flags = 0; 126 *flags = 0;
127 if (open_flags & O_NONBLOCK) 127 if (open_flags & O_NONBLOCK)
128 *flags |= LKM_NOQUEUE; 128 *flags |= DLM_LKF_NOQUEUE;
129 129
130 return 0; 130 return 0;
131} 131}
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
166 * to be able userspace to be able to distinguish a 166 * to be able userspace to be able to distinguish a
167 * valid lock request from one that simply couldn't be 167 * valid lock request from one that simply couldn't be
168 * granted. */ 168 * granted. */
169 if (flags & LKM_NOQUEUE && status == -EAGAIN) 169 if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
170 status = -ETXTBSY; 170 status = -ETXTBSY;
171 kfree(fp); 171 kfree(fp);
172 goto bail; 172 goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
193 status = 0; 193 status = 0;
194 if (fp) { 194 if (fp) {
195 level = fp->fp_lock_level; 195 level = fp->fp_lock_level;
196 if (level != LKM_IVMODE) 196 if (level != DLM_LOCK_IV)
197 user_dlm_cluster_unlock(&ip->ip_lockres, level); 197 user_dlm_cluster_unlock(&ip->ip_lockres, level);
198 198
199 kfree(fp); 199 kfree(fp);
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
262 if ((count + *ppos) > i_size_read(inode)) 262 if ((count + *ppos) > i_size_read(inode))
263 readlen = i_size_read(inode) - *ppos; 263 readlen = i_size_read(inode) - *ppos;
264 else 264 else
265 readlen = count - *ppos; 265 readlen = count;
266 266
267 lvb_buf = kmalloc(readlen, GFP_NOFS); 267 lvb_buf = kmalloc(readlen, GFP_NOFS);
268 if (!lvb_buf) 268 if (!lvb_buf)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc8341e..a5fbd9cea968 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -684,6 +684,7 @@ restarted_transaction:
684 if (why == RESTART_META) { 684 if (why == RESTART_META) {
685 mlog(0, "restarting function.\n"); 685 mlog(0, "restarting function.\n");
686 restart_func = 1; 686 restart_func = 1;
687 status = 0;
687 } else { 688 } else {
688 BUG_ON(why != RESTART_TRANS); 689 BUG_ON(why != RESTART_TRANS);
689 690
@@ -1981,18 +1982,18 @@ relock:
1981 /* communicate with ocfs2_dio_end_io */ 1982 /* communicate with ocfs2_dio_end_io */
1982 ocfs2_iocb_set_rw_locked(iocb, rw_level); 1983 ocfs2_iocb_set_rw_locked(iocb, rw_level);
1983 1984
1984 if (direct_io) { 1985 ret = generic_segment_checks(iov, &nr_segs, &ocount,
1985 ret = generic_segment_checks(iov, &nr_segs, &ocount, 1986 VERIFY_READ);
1986 VERIFY_READ); 1987 if (ret)
1987 if (ret) 1988 goto out_dio;
1988 goto out_dio;
1989 1989
1990 count = ocount; 1990 count = ocount;
1991 ret = generic_write_checks(file, ppos, &count, 1991 ret = generic_write_checks(file, ppos, &count,
1992 S_ISBLK(inode->i_mode)); 1992 S_ISBLK(inode->i_mode));
1993 if (ret) 1993 if (ret)
1994 goto out_dio; 1994 goto out_dio;
1995 1995
1996 if (direct_io) {
1996 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 1997 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
1997 ppos, count, ocount); 1998 ppos, count, ocount);
1998 if (written < 0) { 1999 if (written < 0) {
@@ -2007,7 +2008,10 @@ relock:
2007 goto out_dio; 2008 goto out_dio;
2008 } 2009 }
2009 } else { 2010 } else {
2010 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); 2011 current->backing_dev_info = file->f_mapping->backing_dev_info;
2012 written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
2013 ppos, count, 0);
2014 current->backing_dev_info = NULL;
2011 } 2015 }
2012 2016
2013out_dio: 2017out_dio:
@@ -2021,9 +2025,9 @@ out_dio:
2021 if (ret < 0) 2025 if (ret < 0)
2022 written = ret; 2026 written = ret;
2023 2027
2024 if (!ret && (old_size != i_size_read(inode) || 2028 if (!ret && ((old_size != i_size_read(inode)) ||
2025 old_clusters != OCFS2_I(inode)->ip_clusters || 2029 (old_clusters != OCFS2_I(inode)->ip_clusters) ||
2026 has_refcount)) { 2030 has_refcount)) {
2027 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2031 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2028 if (ret < 0) 2032 if (ret < 0)
2029 written = ret; 2033 written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb68b6d..af189887201c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -558,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
558 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 558 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
559 if (IS_ERR(handle)) { 559 if (IS_ERR(handle)) {
560 status = PTR_ERR(handle); 560 status = PTR_ERR(handle);
561 handle = NULL;
561 mlog_errno(status); 562 mlog_errno(status);
562 goto out; 563 goto out;
563 } 564 }
@@ -639,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode,
639 goto bail_unlock; 640 goto bail_unlock;
640 } 641 }
641 642
642 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, 643 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
643 orphan_dir_bh); 644 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
644 if (status < 0) { 645 orphan_dir_bh);
645 mlog_errno(status); 646 if (status < 0) {
646 goto bail_commit; 647 mlog_errno(status);
648 goto bail_commit;
649 }
647 } 650 }
648 651
649 /* set the inodes dtime */ 652 /* set the inodes dtime */
@@ -722,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
722static int ocfs2_wipe_inode(struct inode *inode, 725static int ocfs2_wipe_inode(struct inode *inode,
723 struct buffer_head *di_bh) 726 struct buffer_head *di_bh)
724{ 727{
725 int status, orphaned_slot; 728 int status, orphaned_slot = -1;
726 struct inode *orphan_dir_inode = NULL; 729 struct inode *orphan_dir_inode = NULL;
727 struct buffer_head *orphan_dir_bh = NULL; 730 struct buffer_head *orphan_dir_bh = NULL;
728 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 731 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
729 struct ocfs2_dinode *di; 732 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
730 733
731 di = (struct ocfs2_dinode *) di_bh->b_data; 734 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
732 orphaned_slot = le16_to_cpu(di->i_orphaned_slot); 735 orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
733 736
734 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 737 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
735 if (status) 738 if (status)
736 return status; 739 return status;
737 740
738 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 741 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
739 ORPHAN_DIR_SYSTEM_INODE, 742 ORPHAN_DIR_SYSTEM_INODE,
740 orphaned_slot); 743 orphaned_slot);
741 if (!orphan_dir_inode) { 744 if (!orphan_dir_inode) {
742 status = -EEXIST; 745 status = -EEXIST;
743 mlog_errno(status); 746 mlog_errno(status);
744 goto bail; 747 goto bail;
745 } 748 }
746 749
747 /* Lock the orphan dir. The lock will be held for the entire 750 /* Lock the orphan dir. The lock will be held for the entire
748 * delete_inode operation. We do this now to avoid races with 751 * delete_inode operation. We do this now to avoid races with
749 * recovery completion on other nodes. */ 752 * recovery completion on other nodes. */
750 mutex_lock(&orphan_dir_inode->i_mutex); 753 mutex_lock(&orphan_dir_inode->i_mutex);
751 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); 754 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
752 if (status < 0) { 755 if (status < 0) {
753 mutex_unlock(&orphan_dir_inode->i_mutex); 756 mutex_unlock(&orphan_dir_inode->i_mutex);
754 757
755 mlog_errno(status); 758 mlog_errno(status);
756 goto bail; 759 goto bail;
760 }
757 } 761 }
758 762
759 /* we do this while holding the orphan dir lock because we 763 /* we do this while holding the orphan dir lock because we
@@ -794,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
794 mlog_errno(status); 798 mlog_errno(status);
795 799
796bail_unlock_dir: 800bail_unlock_dir:
801 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
802 return status;
803
797 ocfs2_inode_unlock(orphan_dir_inode, 1); 804 ocfs2_inode_unlock(orphan_dir_inode, 1);
798 mutex_unlock(&orphan_dir_inode->i_mutex); 805 mutex_unlock(&orphan_dir_inode->i_mutex);
799 brelse(orphan_dir_bh); 806 brelse(orphan_dir_bh);
@@ -889,7 +896,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
889 896
890 /* Do some basic inode verification... */ 897 /* Do some basic inode verification... */
891 di = (struct ocfs2_dinode *) di_bh->b_data; 898 di = (struct ocfs2_dinode *) di_bh->b_data;
892 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { 899 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
900 !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
893 /* 901 /*
894 * Inodes in the orphan dir must have ORPHANED_FL. The only 902 * Inodes in the orphan dir must have ORPHANED_FL. The only
895 * inodes that come back out of the orphan dir are reflink 903 * inodes that come back out of the orphan dir are reflink
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07b293c..0b28e1921a39 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -100,6 +100,8 @@ struct ocfs2_inode_info
100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
101/* Does someone have the file open O_DIRECT */ 101/* Does someone have the file open O_DIRECT */
102#define OCFS2_INODE_OPEN_DIRECT 0x00000040 102#define OCFS2_INODE_OPEN_DIRECT 0x00000040
103/* Tell the inode wipe code it's not in orphan dir */
104#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080
103 105
104static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) 106static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
105{ 107{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50ae4097..4cbb18f26c5f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir,
408 } 408 }
409 } 409 }
410 410
411 status = ocfs2_add_entry(handle, dentry, inode, 411 /*
412 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 412 * Do this before adding the entry to the directory. We add
413 &lookup); 413 * also set d_op after success so that ->d_iput() will cleanup
414 if (status < 0) { 414 * the dentry lock even if ocfs2_add_entry() fails below.
415 */
416 status = ocfs2_dentry_attach_lock(dentry, inode,
417 OCFS2_I(dir)->ip_blkno);
418 if (status) {
415 mlog_errno(status); 419 mlog_errno(status);
416 goto leave; 420 goto leave;
417 } 421 }
422 dentry->d_op = &ocfs2_dentry_ops;
418 423
419 status = ocfs2_dentry_attach_lock(dentry, inode, 424 status = ocfs2_add_entry(handle, dentry, inode,
420 OCFS2_I(dir)->ip_blkno); 425 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
421 if (status) { 426 &lookup);
427 if (status < 0) {
422 mlog_errno(status); 428 mlog_errno(status);
423 goto leave; 429 goto leave;
424 } 430 }
425 431
426 insert_inode_hash(inode); 432 insert_inode_hash(inode);
427 dentry->d_op = &ocfs2_dentry_ops;
428 d_instantiate(dentry, inode); 433 d_instantiate(dentry, inode);
429 status = 0; 434 status = 0;
430leave: 435leave:
@@ -445,11 +450,6 @@ leave:
445 450
446 ocfs2_free_dir_lookup_result(&lookup); 451 ocfs2_free_dir_lookup_result(&lookup);
447 452
448 if ((status < 0) && inode) {
449 clear_nlink(inode);
450 iput(inode);
451 }
452
453 if (inode_ac) 453 if (inode_ac)
454 ocfs2_free_alloc_context(inode_ac); 454 ocfs2_free_alloc_context(inode_ac);
455 455
@@ -459,6 +459,17 @@ leave:
459 if (meta_ac) 459 if (meta_ac)
460 ocfs2_free_alloc_context(meta_ac); 460 ocfs2_free_alloc_context(meta_ac);
461 461
462 /*
463 * We should call iput after the i_mutex of the bitmap been
464 * unlocked in ocfs2_free_alloc_context, or the
465 * ocfs2_delete_inode will mutex_lock again.
466 */
467 if ((status < 0) && inode) {
468 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
469 clear_nlink(inode);
470 iput(inode);
471 }
472
462 mlog_exit(status); 473 mlog_exit(status);
463 474
464 return status; 475 return status;
@@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir,
1771 } 1782 }
1772 } 1783 }
1773 1784
1774 status = ocfs2_add_entry(handle, dentry, inode, 1785 /*
1775 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1786 * Do this before adding the entry to the directory. We add
1776 &lookup); 1787 * also set d_op after success so that ->d_iput() will cleanup
1777 if (status < 0) { 1788 * the dentry lock even if ocfs2_add_entry() fails below.
1789 */
1790 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1791 if (status) {
1778 mlog_errno(status); 1792 mlog_errno(status);
1779 goto bail; 1793 goto bail;
1780 } 1794 }
1795 dentry->d_op = &ocfs2_dentry_ops;
1781 1796
1782 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); 1797 status = ocfs2_add_entry(handle, dentry, inode,
1783 if (status) { 1798 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1799 &lookup);
1800 if (status < 0) {
1784 mlog_errno(status); 1801 mlog_errno(status);
1785 goto bail; 1802 goto bail;
1786 } 1803 }
1787 1804
1788 insert_inode_hash(inode); 1805 insert_inode_hash(inode);
1789 dentry->d_op = &ocfs2_dentry_ops;
1790 d_instantiate(dentry, inode); 1806 d_instantiate(dentry, inode);
1791bail: 1807bail:
1792 if (status < 0 && did_quota) 1808 if (status < 0 && did_quota)
@@ -1811,6 +1827,7 @@ bail:
1811 if (xattr_ac) 1827 if (xattr_ac)
1812 ocfs2_free_alloc_context(xattr_ac); 1828 ocfs2_free_alloc_context(xattr_ac);
1813 if ((status < 0) && inode) { 1829 if ((status < 0) && inode) {
1830 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
1814 clear_nlink(inode); 1831 clear_nlink(inode);
1815 iput(inode); 1832 iput(inode);
1816 } 1833 }
@@ -1976,6 +1993,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1976 } 1993 }
1977 1994
1978 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); 1995 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
1996 OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
1979 1997
1980 /* Record which orphan dir our inode now resides 1998 /* Record which orphan dir our inode now resides
1981 * in. delete_inode will use this to determine which orphan 1999 * in. delete_inode will use this to determine which orphan
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bd96f6c7877e..5cbcd0f008fc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
4083 di->i_attr = s_di->i_attr; 4083 di->i_attr = s_di->i_attr;
4084 4084
4085 if (preserve) { 4085 if (preserve) {
4086 t_inode->i_uid = s_inode->i_uid;
4087 t_inode->i_gid = s_inode->i_gid;
4088 t_inode->i_mode = s_inode->i_mode;
4086 di->i_uid = s_di->i_uid; 4089 di->i_uid = s_di->i_uid;
4087 di->i_gid = s_di->i_gid; 4090 di->i_gid = s_di->i_gid;
4088 di->i_mode = s_di->i_mode; 4091 di->i_mode = s_di->i_mode;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e5..885ab5513ac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
81#include <linux/pid_namespace.h> 81#include <linux/pid_namespace.h>
82#include <linux/ptrace.h> 82#include <linux/ptrace.h>
83#include <linux/tracehook.h> 83#include <linux/tracehook.h>
84#include <linux/swapops.h>
85 84
86#include <asm/pgtable.h> 85#include <asm/pgtable.h>
87#include <asm/processor.h> 86#include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
495 rsslim, 494 rsslim,
496 mm ? mm->start_code : 0, 495 mm ? mm->start_code : 0,
497 mm ? mm->end_code : 0, 496 mm ? mm->end_code : 0,
498 (permitted && mm) ? task->stack_start : 0, 497 (permitted && mm) ? mm->start_stack : 0,
499 esp, 498 esp,
500 eip, 499 eip,
501 /* The signal information here is obsolete. 500 /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7621db800a74..8418fcc0a6ab 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2909,7 +2909,7 @@ out_no_task:
2909 */ 2909 */
2910static const struct pid_entry tid_base_stuff[] = { 2910static const struct pid_entry tid_base_stuff[] = {
2911 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2911 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2912 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), 2912 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2913 REG("environ", S_IRUSR, proc_environ_operations), 2913 REG("environ", S_IRUSR, proc_environ_operations),
2914 INF("auxv", S_IRUSR, proc_pid_auxv), 2914 INF("auxv", S_IRUSR, proc_pid_auxv),
2915 ONE("status", S_IRUGO, proc_pid_status), 2915 ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd5..47f5b145f56e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
247 } else if (vma->vm_start <= mm->start_stack && 247 } else if (vma->vm_start <= mm->start_stack &&
248 vma->vm_end >= mm->start_stack) { 248 vma->vm_end >= mm->start_stack) {
249 name = "[stack]"; 249 name = "[stack]";
250 } else {
251 unsigned long stack_start;
252 struct proc_maps_private *pmp;
253
254 pmp = m->private;
255 stack_start = pmp->task->stack_start;
256
257 if (vma->vm_start <= stack_start &&
258 vma->vm_end >= stack_start) {
259 pad_len_spaces(m, len);
260 seq_printf(m,
261 "[threadstack:%08lx]",
262#ifdef CONFIG_STACK_GROWSUP
263 vma->vm_end - stack_start
264#else
265 stack_start - vma->vm_start
266#endif
267 );
268 }
269 } 250 }
270 } else { 251 } else {
271 name = "[vdso]"; 252 name = "[vdso]";
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index dad7fb247ddc..3e21b1e2ad3a 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -33,6 +33,14 @@ config PRINT_QUOTA_WARNING
33 Note that this behavior is currently deprecated and may go away in 33 Note that this behavior is currently deprecated and may go away in
34 future. Please use notification via netlink socket instead. 34 future. Please use notification via netlink socket instead.
35 35
36config QUOTA_DEBUG
37 bool "Additional quota sanity checks"
38 depends on QUOTA
39 default n
40 help
41 If you say Y here, quota subsystem will perform some additional
42 sanity checks of quota internal structures. If unsure, say N.
43
36# Generic support for tree structured quota files. Selected when needed. 44# Generic support for tree structured quota files. Selected when needed.
37config QUOTA_TREE 45config QUOTA_TREE
38 tristate 46 tristate
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a0a9405b202a..788b5802a7ce 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -80,8 +80,6 @@
80 80
81#include <asm/uaccess.h> 81#include <asm/uaccess.h>
82 82
83#define __DQUOT_PARANOIA
84
85/* 83/*
86 * There are three quota SMP locks. dq_list_lock protects all lists with quotas 84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas
87 * and quota formats, dqstats structure containing statistics about the lists 85 * and quota formats, dqstats structure containing statistics about the lists
@@ -695,7 +693,7 @@ void dqput(struct dquot *dquot)
695 693
696 if (!dquot) 694 if (!dquot)
697 return; 695 return;
698#ifdef __DQUOT_PARANOIA 696#ifdef CONFIG_QUOTA_DEBUG
699 if (!atomic_read(&dquot->dq_count)) { 697 if (!atomic_read(&dquot->dq_count)) {
700 printk("VFS: dqput: trying to free free dquot\n"); 698 printk("VFS: dqput: trying to free free dquot\n");
701 printk("VFS: device %s, dquot of %s %d\n", 699 printk("VFS: device %s, dquot of %s %d\n",
@@ -748,7 +746,7 @@ we_slept:
748 goto we_slept; 746 goto we_slept;
749 } 747 }
750 atomic_dec(&dquot->dq_count); 748 atomic_dec(&dquot->dq_count);
751#ifdef __DQUOT_PARANOIA 749#ifdef CONFIG_QUOTA_DEBUG
752 /* sanity check */ 750 /* sanity check */
753 BUG_ON(!list_empty(&dquot->dq_free)); 751 BUG_ON(!list_empty(&dquot->dq_free));
754#endif 752#endif
@@ -845,7 +843,7 @@ we_slept:
845 dquot = NULL; 843 dquot = NULL;
846 goto out; 844 goto out;
847 } 845 }
848#ifdef __DQUOT_PARANOIA 846#ifdef CONFIG_QUOTA_DEBUG
849 BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ 847 BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */
850#endif 848#endif
851out: 849out:
@@ -874,7 +872,7 @@ static int dqinit_needed(struct inode *inode, int type)
874static void add_dquot_ref(struct super_block *sb, int type) 872static void add_dquot_ref(struct super_block *sb, int type)
875{ 873{
876 struct inode *inode, *old_inode = NULL; 874 struct inode *inode, *old_inode = NULL;
877#ifdef __DQUOT_PARANOIA 875#ifdef CONFIG_QUOTA_DEBUG
878 int reserved = 0; 876 int reserved = 0;
879#endif 877#endif
880 878
@@ -882,7 +880,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
882 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 880 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
883 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 881 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
884 continue; 882 continue;
885#ifdef __DQUOT_PARANOIA 883#ifdef CONFIG_QUOTA_DEBUG
886 if (unlikely(inode_get_rsv_space(inode) > 0)) 884 if (unlikely(inode_get_rsv_space(inode) > 0))
887 reserved = 1; 885 reserved = 1;
888#endif 886#endif
@@ -907,7 +905,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
907 spin_unlock(&inode_lock); 905 spin_unlock(&inode_lock);
908 iput(old_inode); 906 iput(old_inode);
909 907
910#ifdef __DQUOT_PARANOIA 908#ifdef CONFIG_QUOTA_DEBUG
911 if (reserved) { 909 if (reserved) {
912 printk(KERN_WARNING "VFS (%s): Writes happened before quota" 910 printk(KERN_WARNING "VFS (%s): Writes happened before quota"
913 " was turned on thus quota information is probably " 911 " was turned on thus quota information is probably "
@@ -940,7 +938,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
940 inode->i_dquot[type] = NULL; 938 inode->i_dquot[type] = NULL;
941 if (dquot) { 939 if (dquot) {
942 if (dqput_blocks(dquot)) { 940 if (dqput_blocks(dquot)) {
943#ifdef __DQUOT_PARANOIA 941#ifdef CONFIG_QUOTA_DEBUG
944 if (atomic_read(&dquot->dq_count) != 1) 942 if (atomic_read(&dquot->dq_count) != 1)
945 printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); 943 printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count));
946#endif 944#endif
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index f8a6075abf50..07930449a958 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir,
46 struct reiserfs_de_head *deh) 46 struct reiserfs_de_head *deh)
47{ 47{
48 struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; 48 struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
49 if (reiserfs_expose_privroot(dir->d_sb))
50 return 0;
51 return (dir == dir->d_parent && privroot->d_inode && 49 return (dir == dir->d_parent && privroot->d_inode &&
52 deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); 50 deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
53} 51}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4f9586bb7631..e7cc00e636dc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
554 if (!err && new_size < i_size_read(dentry->d_inode)) { 554 if (!err && new_size < i_size_read(dentry->d_inode)) {
555 struct iattr newattrs = { 555 struct iattr newattrs = {
556 .ia_ctime = current_fs_time(inode->i_sb), 556 .ia_ctime = current_fs_time(inode->i_sb),
557 .ia_size = buffer_size, 557 .ia_size = new_size,
558 .ia_valid = ATTR_SIZE | ATTR_CTIME, 558 .ia_valid = ATTR_SIZE | ATTR_CTIME,
559 }; 559 };
560 560
@@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask)
973 return generic_permission(inode, mask, NULL); 973 return generic_permission(inode, mask, NULL);
974} 974}
975 975
976/* This will catch lookups from the fs root to .reiserfs_priv */ 976static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
977static int
978xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
979{ 977{
980 struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; 978 return -EPERM;
981 if (container_of(q1, struct dentry, d_name) == priv_root)
982 return -ENOENT;
983 if (q1->len == name->len &&
984 !memcmp(q1->name, name->name, name->len))
985 return 0;
986 return 1;
987} 979}
988 980
989static const struct dentry_operations xattr_lookup_poison_ops = { 981static const struct dentry_operations xattr_lookup_poison_ops = {
990 .d_compare = xattr_lookup_poison, 982 .d_revalidate = xattr_hide_revalidate,
991}; 983};
992 984
993int reiserfs_lookup_privroot(struct super_block *s) 985int reiserfs_lookup_privroot(struct super_block *s)
@@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
1001 strlen(PRIVROOT_NAME)); 993 strlen(PRIVROOT_NAME));
1002 if (!IS_ERR(dentry)) { 994 if (!IS_ERR(dentry)) {
1003 REISERFS_SB(s)->priv_root = dentry; 995 REISERFS_SB(s)->priv_root = dentry;
1004 if (!reiserfs_expose_privroot(s)) 996 dentry->d_op = &xattr_lookup_poison_ops;
1005 s->s_root->d_op = &xattr_lookup_poison_ops;
1006 if (dentry->d_inode) 997 if (dentry->d_inode)
1007 dentry->d_inode->i_flags |= S_PRIVATE; 998 dentry->d_inode->i_flags |= S_PRIVATE;
1008 } else 999 } else
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1c4c8f089970..dfa1d67f8fca 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb)
479 if (server->conn_pid) 479 if (server->conn_pid)
480 kill_pid(server->conn_pid, SIGTERM, 1); 480 kill_pid(server->conn_pid, SIGTERM, 1);
481 481
482 bdi_destroy(&server->bdi);
482 kfree(server->ops); 483 kfree(server->ops);
483 smb_unload_nls(server); 484 smb_unload_nls(server);
484 sb->s_fs_info = NULL; 485 sb->s_fs_info = NULL;
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
525 if (!server) 526 if (!server)
526 goto out_no_server; 527 goto out_no_server;
527 sb->s_fs_info = server; 528 sb->s_fs_info = server;
529
530 if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
531 goto out_bdi;
532
533 sb->s_bdi = &server->bdi;
528 534
529 server->super_block = sb; 535 server->super_block = sb;
530 server->mnt = NULL; 536 server->mnt = NULL;
@@ -624,6 +630,8 @@ out_no_smbiod:
624out_bad_option: 630out_bad_option:
625 kfree(mem); 631 kfree(mem);
626out_no_mem: 632out_no_mem:
633 bdi_destroy(&server->bdi);
634out_bdi:
627 if (!server->mnt) 635 if (!server->mnt)
628 printk(KERN_ERR "smb_fill_super: allocation failure\n"); 636 printk(KERN_ERR "smb_fill_super: allocation failure\n");
629 sb->s_fs_info = NULL; 637 sb->s_fs_info = NULL;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 1cb0d81b164b..653c030eb840 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
87 u64 cur_index = index >> msblk->devblksize_log2; 87 u64 cur_index = index >> msblk->devblksize_log2;
88 int bytes, compressed, b = 0, k = 0, page = 0, avail; 88 int bytes, compressed, b = 0, k = 0, page = 0, avail;
89 89
90 90 bh = kcalloc(((srclength + msblk->devblksize - 1)
91 bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1, 91 >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
92 sizeof(*bh), GFP_KERNEL);
93 if (bh == NULL) 92 if (bh == NULL)
94 return -ENOMEM; 93 return -ENOMEM;
95 94
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3550aec2f655..48b6f4a385a6 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -275,7 +275,8 @@ allocate_root:
275 275
276 err = squashfs_read_inode(root, root_inode); 276 err = squashfs_read_inode(root, root_inode);
277 if (err) { 277 if (err) {
278 iget_failed(root); 278 make_bad_inode(root);
279 iput(root);
279 goto failed_mount; 280 goto failed_mount;
280 } 281 }
281 insert_inode_hash(root); 282 insert_inode_hash(root);
@@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb)
353 kfree(sbi->id_table); 354 kfree(sbi->id_table);
354 kfree(sbi->fragment_index); 355 kfree(sbi->fragment_index);
355 kfree(sbi->meta_index); 356 kfree(sbi->meta_index);
357 kfree(sbi->inode_lookup_table);
356 kfree(sb->s_fs_info); 358 kfree(sb->s_fs_info);
357 sb->s_fs_info = NULL; 359 sb->s_fs_info = NULL;
358 } 360 }
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 15a03d0fb9f3..7a603874e483 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
128 goto release_mutex; 128 goto release_mutex;
129 } 129 }
130 130
131 length = stream->total_out;
131 mutex_unlock(&msblk->read_data_mutex); 132 mutex_unlock(&msblk->read_data_mutex);
132 return stream->total_out; 133 return length;
133 134
134release_mutex: 135release_mutex:
135 mutex_unlock(&msblk->read_data_mutex); 136 mutex_unlock(&msblk->read_data_mutex);
diff --git a/fs/super.c b/fs/super.c
index f35ac6022109..1527e6a0ee35 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
37#include <linux/kobject.h> 37#include <linux/kobject.h>
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/backing-dev.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include "internal.h" 42#include "internal.h"
42 43
@@ -693,6 +694,7 @@ int set_anon_super(struct super_block *s, void *data)
693 return -EMFILE; 694 return -EMFILE;
694 } 695 }
695 s->s_dev = MKDEV(0, dev & MINORMASK); 696 s->s_dev = MKDEV(0, dev & MINORMASK);
697 s->s_bdi = &noop_backing_dev_info;
696 return 0; 698 return 0;
697} 699}
698 700
@@ -954,10 +956,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
954 if (error < 0) 956 if (error < 0)
955 goto out_free_secdata; 957 goto out_free_secdata;
956 BUG_ON(!mnt->mnt_sb); 958 BUG_ON(!mnt->mnt_sb);
959 WARN_ON(!mnt->mnt_sb->s_bdi);
957 960
958 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 961 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
959 if (error) 962 if (error)
960 goto out_sb; 963 goto out_sb;
961 964
962 /* 965 /*
963 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 966 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
diff --git a/fs/sync.c b/fs/sync.c
index fc5c3d75cf3c..92b228176f7c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,6 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
17#include <linux/backing-dev.h>
17#include "internal.h" 18#include "internal.h"
18 19
19#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ 20#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
32 * This should be safe, as we require bdi backing to actually 33 * This should be safe, as we require bdi backing to actually
33 * write out data in the first place 34 * write out data in the first place
34 */ 35 */
35 if (!sb->s_bdi) 36 if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
36 return 0; 37 return 0;
37 38
38 if (sb->s_qcop && sb->s_qcop->quota_sync) 39 if (sb->s_qcop && sb->s_qcop->quota_sync)
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 4e50286a4cc3..1dabed286b4c 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -164,8 +164,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
164 name, de->name)) 164 name, de->name))
165 goto found; 165 goto found;
166 } 166 }
167 dir_put_page(page);
167 } 168 }
168 dir_put_page(page);
169 169
170 if (++n >= npages) 170 if (++n >= npages)
171 n = 0; 171 n = 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b487ced..29f1edca76de 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(
1209 1209
1210 xfs_unmountfs(mp); 1210 xfs_unmountfs(mp);
1211 xfs_freesb(mp); 1211 xfs_freesb(mp);
1212 xfs_inode_shrinker_unregister(mp);
1212 xfs_icsb_destroy_counters(mp); 1213 xfs_icsb_destroy_counters(mp);
1213 xfs_close_devices(mp); 1214 xfs_close_devices(mp);
1214 xfs_dmops_put(mp); 1215 xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
1622 if (error) 1623 if (error)
1623 goto fail_vnrele; 1624 goto fail_vnrele;
1624 1625
1626 xfs_inode_shrinker_register(mp);
1627
1625 kfree(mtpt); 1628 kfree(mtpt);
1626 return 0; 1629 return 0;
1627 1630
@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
1867 goto out_cleanup_procfs; 1870 goto out_cleanup_procfs;
1868 1871
1869 vfs_initquota(); 1872 vfs_initquota();
1873 xfs_inode_shrinker_init();
1870 1874
1871 error = register_filesystem(&xfs_fs_type); 1875 error = register_filesystem(&xfs_fs_type);
1872 if (error) 1876 if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
1894{ 1898{
1895 vfs_exitquota(); 1899 vfs_exitquota();
1896 unregister_filesystem(&xfs_fs_type); 1900 unregister_filesystem(&xfs_fs_type);
1901 xfs_inode_shrinker_destroy();
1897 xfs_sysctl_unregister(); 1902 xfs_sysctl_unregister();
1898 xfs_cleanup_procfs(); 1903 xfs_cleanup_procfs();
1899 xfs_buf_terminate(); 1904 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 05cd85317f6f..a427c638d909 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
95 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
96 int flags, 96 int flags,
97 int tag, 97 int tag,
98 int exclusive) 98 int exclusive,
99 int *nr_to_scan)
99{ 100{
100 uint32_t first_index; 101 uint32_t first_index;
101 int last_error = 0; 102 int last_error = 0;
@@ -134,7 +135,7 @@ restart:
134 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
135 break; 136 break;
136 137
137 } while (1); 138 } while ((*nr_to_scan)--);
138 139
139 if (skipped) { 140 if (skipped) {
140 delay(1); 141 delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
150 struct xfs_perag *pag, int flags), 151 struct xfs_perag *pag, int flags),
151 int flags, 152 int flags,
152 int tag, 153 int tag,
153 int exclusive) 154 int exclusive,
155 int *nr_to_scan)
154{ 156{
155 int error = 0; 157 int error = 0;
156 int last_error = 0; 158 int last_error = 0;
157 xfs_agnumber_t ag; 159 xfs_agnumber_t ag;
160 int nr;
158 161
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
160 struct xfs_perag *pag; 164 struct xfs_perag *pag;
161 165
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
165 continue; 169 continue;
166 } 170 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive); 172 exclusive, &nr);
169 xfs_perag_put(pag); 173 xfs_perag_put(pag);
170 if (error) { 174 if (error) {
171 last_error = error; 175 last_error = error;
172 if (error == EFSCORRUPTED) 176 if (error == EFSCORRUPTED)
173 break; 177 break;
174 } 178 }
179 if (nr <= 0)
180 break;
175 } 181 }
182 if (nr_to_scan)
183 *nr_to_scan = nr;
176 return XFS_ERROR(last_error); 184 return XFS_ERROR(last_error);
177} 185}
178 186
@@ -291,7 +299,7 @@ xfs_sync_data(
291 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 299 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
292 300
293 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 301 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
294 XFS_ICI_NO_TAG, 0); 302 XFS_ICI_NO_TAG, 0, NULL);
295 if (error) 303 if (error)
296 return XFS_ERROR(error); 304 return XFS_ERROR(error);
297 305
@@ -310,7 +318,7 @@ xfs_sync_attr(
310 ASSERT((flags & ~SYNC_WAIT) == 0); 318 ASSERT((flags & ~SYNC_WAIT) == 0);
311 319
312 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 320 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
313 XFS_ICI_NO_TAG, 0); 321 XFS_ICI_NO_TAG, 0, NULL);
314} 322}
315 323
316STATIC int 324STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
673 radix_tree_tag_set(&pag->pag_ici_root, 681 radix_tree_tag_set(&pag->pag_ici_root,
674 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 682 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
675 XFS_ICI_RECLAIM_TAG); 683 XFS_ICI_RECLAIM_TAG);
684 pag->pag_ici_reclaimable++;
676} 685}
677 686
678/* 687/*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
705{ 714{
706 radix_tree_tag_clear(&pag->pag_ici_root, 715 radix_tree_tag_clear(&pag->pag_ici_root,
707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 716 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
717 pag->pag_ici_reclaimable--;
708} 718}
709 719
710/* 720/*
@@ -820,10 +830,10 @@ xfs_reclaim_inode(
820 * call into reclaim to find it in a clean state instead of waiting for 830 * call into reclaim to find it in a clean state instead of waiting for
821 * it now. We also don't return errors here - if the error is transient 831 * it now. We also don't return errors here - if the error is transient
822 * then the next reclaim pass will flush the inode, and if the error 832 * then the next reclaim pass will flush the inode, and if the error
823 * is permanent then the next sync reclaim will relcaim the inode and 833 * is permanent then the next sync reclaim will reclaim the inode and
824 * pass on the error. 834 * pass on the error.
825 */ 835 */
826 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { 836 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
827 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 837 xfs_fs_cmn_err(CE_WARN, ip->i_mount,
828 "inode 0x%llx background reclaim flush failed with %d", 838 "inode 0x%llx background reclaim flush failed with %d",
829 (long long)ip->i_ino, error); 839 (long long)ip->i_ino, error);
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
854 int mode) 864 int mode)
855{ 865{
856 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 866 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
857 XFS_ICI_RECLAIM_TAG, 1); 867 XFS_ICI_RECLAIM_TAG, 1, NULL);
868}
869
870/*
871 * Shrinker infrastructure.
872 *
873 * This is all far more complex than it needs to be. It adds a global list of
874 * mounts because the shrinkers can only call a global context. We need to make
875 * the shrinkers pass a context to avoid the need for global state.
876 */
877static LIST_HEAD(xfs_mount_list);
878static struct rw_semaphore xfs_mount_list_lock;
879
880static int
881xfs_reclaim_inode_shrink(
882 int nr_to_scan,
883 gfp_t gfp_mask)
884{
885 struct xfs_mount *mp;
886 struct xfs_perag *pag;
887 xfs_agnumber_t ag;
888 int reclaimable = 0;
889
890 if (nr_to_scan) {
891 if (!(gfp_mask & __GFP_FS))
892 return -1;
893
894 down_read(&xfs_mount_list_lock);
895 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
896 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
897 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
898 if (nr_to_scan <= 0)
899 break;
900 }
901 up_read(&xfs_mount_list_lock);
902 }
903
904 down_read(&xfs_mount_list_lock);
905 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
906 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
907
908 pag = xfs_perag_get(mp, ag);
909 if (!pag->pag_ici_init) {
910 xfs_perag_put(pag);
911 continue;
912 }
913 reclaimable += pag->pag_ici_reclaimable;
914 xfs_perag_put(pag);
915 }
916 }
917 up_read(&xfs_mount_list_lock);
918 return reclaimable;
919}
920
921static struct shrinker xfs_inode_shrinker = {
922 .shrink = xfs_reclaim_inode_shrink,
923 .seeks = DEFAULT_SEEKS,
924};
925
926void __init
927xfs_inode_shrinker_init(void)
928{
929 init_rwsem(&xfs_mount_list_lock);
930 register_shrinker(&xfs_inode_shrinker);
931}
932
933void
934xfs_inode_shrinker_destroy(void)
935{
936 ASSERT(list_empty(&xfs_mount_list));
937 unregister_shrinker(&xfs_inode_shrinker);
938}
939
940void
941xfs_inode_shrinker_register(
942 struct xfs_mount *mp)
943{
944 down_write(&xfs_mount_list_lock);
945 list_add_tail(&mp->m_mplist, &xfs_mount_list);
946 up_write(&xfs_mount_list_lock);
947}
948
949void
950xfs_inode_shrinker_unregister(
951 struct xfs_mount *mp)
952{
953 down_write(&xfs_mount_list_lock);
954 list_del(&mp->m_mplist);
955 up_write(&xfs_mount_list_lock);
858} 956}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c346cabb..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
54int xfs_inode_ag_iterator(struct xfs_mount *mp, 54int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
57 62
58#endif 63#endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d492db..50bee07d6b0e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
895 XFS_ICI_NO_TAG, 0, NULL);
895} 896}
896 897
897/*------------------------------------------------------------------------*/ 898/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1ff88ea..abb8222b88c9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
223 int pag_ici_init; /* incore inode cache initialised */ 223 int pag_ici_init; /* incore inode cache initialised */
224 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
225 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
226 int pag_ici_reclaimable; /* reclaimable inodes */
226#endif 227#endif
227 int pagb_count; /* pagb slots in use */ 228 int pagb_count; /* pagb slots in use */
228 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ 229 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index cd27c9d6c71f..5bba29a07812 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -177,16 +177,26 @@ xfs_swap_extents_check_format(
177 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) 177 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
178 return EINVAL; 178 return EINVAL;
179 179
180 /* Check root block of temp in btree form to max in target */ 180 /*
181 * If we are in a btree format, check that the temp root block will fit
182 * in the target and that it has enough extents to be in btree format
183 * in the target.
184 *
185 * Note that we have to be careful to allow btree->extent conversions
186 * (a common defrag case) which will occur when the temp inode is in
187 * extent format...
188 */
181 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && 189 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(ip) && 190 ((XFS_IFORK_BOFF(ip) &&
183 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) 191 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
192 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
184 return EINVAL; 193 return EINVAL;
185 194
186 /* Check root block of target in btree form to max in temp */ 195 /* Reciprocal target->temp btree format checks */
187 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && 196 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
188 XFS_IFORK_BOFF(tip) && 197 ((XFS_IFORK_BOFF(tip) &&
189 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) 198 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
199 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
190 return EINVAL; 200 return EINVAL;
191 201
192 return 0; 202 return 0;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e8fba92d7cd9..2be019136287 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -745,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp,
745 745
746/* 746/*
747 * Determine if we have a transaction that has gone to disk 747 * Determine if we have a transaction that has gone to disk
748 * that needs to be covered. Log activity needs to be idle (no AIL and 748 * that needs to be covered. To begin the transition to the idle state
749 * nothing in the iclogs). And, we need to be in the right state indicating 749 * firstly the log needs to be idle (no AIL and nothing in the iclogs).
750 * something has gone out. 750 * If we are then in a state where covering is needed, the caller is informed
751 * that dummy transactions are required to move the log into the idle state.
752 *
753 * Because this is called as part of the sync process, we should also indicate
754 * that dummy transactions should be issued in anything but the covered or
755 * idle states. This ensures that the log tail is accurately reflected in
756 * the log at the end of the sync, hence if a crash occurrs avoids replay
757 * of transactions where the metadata is already on disk.
751 */ 758 */
752int 759int
753xfs_log_need_covered(xfs_mount_t *mp) 760xfs_log_need_covered(xfs_mount_t *mp)
@@ -759,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp)
759 return 0; 766 return 0;
760 767
761 spin_lock(&log->l_icloglock); 768 spin_lock(&log->l_icloglock);
762 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 769 switch (log->l_covered_state) {
763 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 770 case XLOG_STATE_COVER_DONE:
764 && !xfs_trans_ail_tail(log->l_ailp) 771 case XLOG_STATE_COVER_DONE2:
765 && xlog_iclogs_empty(log)) { 772 case XLOG_STATE_COVER_IDLE:
766 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 773 break;
767 log->l_covered_state = XLOG_STATE_COVER_DONE; 774 case XLOG_STATE_COVER_NEED:
768 else { 775 case XLOG_STATE_COVER_NEED2:
769 ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); 776 if (!xfs_trans_ail_tail(log->l_ailp) &&
770 log->l_covered_state = XLOG_STATE_COVER_DONE2; 777 xlog_iclogs_empty(log)) {
778 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
779 log->l_covered_state = XLOG_STATE_COVER_DONE;
780 else
781 log->l_covered_state = XLOG_STATE_COVER_DONE2;
771 } 782 }
783 /* FALLTHRU */
784 default:
772 needed = 1; 785 needed = 1;
786 break;
773 } 787 }
774 spin_unlock(&log->l_icloglock); 788 spin_unlock(&log->l_icloglock);
775 return needed; 789 return needed;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7b983e..9ff48a16a7ee 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */
262} xfs_mount_t; 263} xfs_mount_t;
263 264
264/* 265/*