aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2010-05-13 18:19:32 -0400
committerSteve French <sfrench@us.ibm.com>2010-05-13 18:19:32 -0400
commitbaa456331738b4e76a92318b62b354377a30ad80 (patch)
tree75c828a7c8a9f1b5f7f41b2e53271eafb7e561ef /fs
parentaa3e5572c538d753dce11bf93532a75f95d22b40 (diff)
parent4462dc02842698f173f518c1f5ce79c0fb89395a (diff)
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: fs/cifs/inode.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c10
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_super.c1
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c24
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/afs/volume.c7
-rw-r--r--fs/autofs4/root.c5
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c98
-rw-r--r--fs/cachefiles/security.c4
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.h2
-rw-r--r--fs/ceph/auth_x.c32
-rw-r--r--fs/ceph/caps.c21
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c8
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/messenger.c39
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c26
-rw-r--r--fs/ceph/osd_client.h3
-rw-r--r--fs/ceph/osdmap.c29
-rw-r--r--fs/ceph/osdmap.h2
-rw-r--r--fs/ceph/rados.h1
-rw-r--r--fs/ceph/snap.c24
-rw-r--r--fs/ceph/super.c30
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifs_fs_sb.h3
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/inode.c21
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/main.c10
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/exofs/super.c8
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/ext4/mballoc.c21
-rw-r--r--fs/ioctl.c92
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/jfs_dmap.c16
-rw-r--r--fs/jfs/jfs_dmap.h6
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/namei.c4
-rw-r--r--fs/jfs/resize.c6
-rw-r--r--fs/jfs/symlink.c14
-rw-r--r--fs/logfs/gc.c8
-rw-r--r--fs/logfs/journal.c29
-rw-r--r--fs/logfs/logfs.h15
-rw-r--r--fs/logfs/readwrite.c75
-rw-r--r--fs/logfs/segment.c8
-rw-r--r--fs/logfs/super.c11
-rw-r--r--fs/namei.c6
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c86
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/nfs4proc.c5
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfs/write.c55
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nilfs2/super.c1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c5
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/file.c32
-rw-r--r--fs/ocfs2/inode.c68
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/namei.c58
-rw-r--r--fs/ocfs2/refcounttree.c3
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/task_mmu.c19
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/xattr.c19
-rw-r--r--fs/smbfs/inode.c8
-rw-r--r--fs/squashfs/block.c5
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/squashfs/zlib_wrapper.c3
-rw-r--r--fs/super.c9
-rw-r--r--fs/sync.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c112
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_dfrag.c22
-rw-r--r--fs/xfs/xfs_mount.h1
101 files changed, 975 insertions, 468 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5c5bc8480070..f8b86e92cd66 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
238 return ERR_PTR(-ENOMEM); 238 return ERR_PTR(-ENOMEM);
239 } 239 }
240 240
241 rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
242 if (rc) {
243 __putname(v9ses->aname);
244 __putname(v9ses->uname);
245 return ERR_PTR(rc);
246 }
247
241 spin_lock(&v9fs_sessionlist_lock); 248 spin_lock(&v9fs_sessionlist_lock);
242 list_add(&v9ses->slist, &v9fs_sessionlist); 249 list_add(&v9ses->slist, &v9fs_sessionlist);
243 spin_unlock(&v9fs_sessionlist_lock); 250 spin_unlock(&v9fs_sessionlist_lock);
@@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
301 return fid; 308 return fid;
302 309
303error: 310error:
311 bdi_destroy(&v9ses->bdi);
304 return ERR_PTR(retval); 312 return ERR_PTR(retval);
305} 313}
306 314
@@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
326 __putname(v9ses->uname); 334 __putname(v9ses->uname);
327 __putname(v9ses->aname); 335 __putname(v9ses->aname);
328 336
337 bdi_destroy(&v9ses->bdi);
338
329 spin_lock(&v9fs_sessionlist_lock); 339 spin_lock(&v9fs_sessionlist_lock);
330 list_del(&v9ses->slist); 340 list_del(&v9ses->slist);
331 spin_unlock(&v9fs_sessionlist_lock); 341 spin_unlock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a0a8d3dd1361..bec4d0bcb458 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,7 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#include <linux/backing-dev.h>
23 24
24/** 25/**
25 * enum p9_session_flags - option flags for each 9P session 26 * enum p9_session_flags - option flags for each 9P session
@@ -102,6 +103,7 @@ struct v9fs_session_info {
102 u32 uid; /* if ACCESS_SINGLE, the uid that has access */ 103 u32 uid; /* if ACCESS_SINGLE, the uid that has access */
103 struct p9_client *clnt; /* 9p client */ 104 struct p9_client *clnt; /* 9p client */
104 struct list_head slist; /* list of sessions registered with v9fs */ 105 struct list_head slist; /* list of sessions registered with v9fs */
106 struct backing_dev_info bdi;
105}; 107};
106 108
107struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 109struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 491108bd6e0d..806da5d3b3a0 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
77 sb->s_blocksize = 1 << sb->s_blocksize_bits; 77 sb->s_blocksize = 1 << sb->s_blocksize_bits;
78 sb->s_magic = V9FS_MAGIC; 78 sb->s_magic = V9FS_MAGIC;
79 sb->s_op = &v9fs_super_ops; 79 sb->s_op = &v9fs_super_ops;
80 sb->s_bdi = &v9ses->bdi;
80 81
81 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 82 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
82 MS_NOATIME; 83 MS_NOATIME;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c54dad4e6063..a10f2582844f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -19,6 +19,7 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/fscache.h> 21#include <linux/fscache.h>
22#include <linux/backing-dev.h>
22 23
23#include "afs.h" 24#include "afs.h"
24#include "afs_vl.h" 25#include "afs_vl.h"
@@ -313,6 +314,7 @@ struct afs_volume {
313 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ 314 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
314 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ 315 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
315 struct rw_semaphore server_sem; /* lock for accessing current server */ 316 struct rw_semaphore server_sem; /* lock for accessing current server */
317 struct backing_dev_info bdi;
316}; 318};
317 319
318/* 320/*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5e813a816ce4..b3feddc4f7d6 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
138{ 138{
139 struct afs_super_info *super; 139 struct afs_super_info *super;
140 struct vfsmount *mnt; 140 struct vfsmount *mnt;
141 struct page *page = NULL; 141 struct page *page;
142 size_t size; 142 size_t size;
143 char *buf, *devname = NULL, *options = NULL; 143 char *buf, *devname, *options;
144 int ret; 144 int ret;
145 145
146 _enter("{%s}", mntpt->d_name.name); 146 _enter("{%s}", mntpt->d_name.name);
@@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
150 ret = -EINVAL; 150 ret = -EINVAL;
151 size = mntpt->d_inode->i_size; 151 size = mntpt->d_inode->i_size;
152 if (size > PAGE_SIZE - 1) 152 if (size > PAGE_SIZE - 1)
153 goto error; 153 goto error_no_devname;
154 154
155 ret = -ENOMEM; 155 ret = -ENOMEM;
156 devname = (char *) get_zeroed_page(GFP_KERNEL); 156 devname = (char *) get_zeroed_page(GFP_KERNEL);
157 if (!devname) 157 if (!devname)
158 goto error; 158 goto error_no_devname;
159 159
160 options = (char *) get_zeroed_page(GFP_KERNEL); 160 options = (char *) get_zeroed_page(GFP_KERNEL);
161 if (!options) 161 if (!options)
162 goto error; 162 goto error_no_options;
163 163
164 /* read the contents of the AFS special symlink */ 164 /* read the contents of the AFS special symlink */
165 page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); 165 page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
166 if (IS_ERR(page)) { 166 if (IS_ERR(page)) {
167 ret = PTR_ERR(page); 167 ret = PTR_ERR(page);
168 goto error; 168 goto error_no_page;
169 } 169 }
170 170
171 ret = -EIO; 171 ret = -EIO;
@@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
196 return mnt; 196 return mnt;
197 197
198error: 198error:
199 if (page) 199 page_cache_release(page);
200 page_cache_release(page); 200error_no_page:
201 if (devname) 201 free_page((unsigned long) options);
202 free_page((unsigned long) devname); 202error_no_options:
203 if (options) 203 free_page((unsigned long) devname);
204 free_page((unsigned long) options); 204error_no_devname:
205 _leave(" = %d", ret); 205 _leave(" = %d", ret);
206 return ERR_PTR(ret); 206 return ERR_PTR(ret);
207} 207}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 14f6431598ad..e932e5a3a0c1 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
311 sb->s_magic = AFS_FS_MAGIC; 311 sb->s_magic = AFS_FS_MAGIC;
312 sb->s_op = &afs_super_ops; 312 sb->s_op = &afs_super_ops;
313 sb->s_fs_info = as; 313 sb->s_fs_info = as;
314 sb->s_bdi = &as->volume->bdi;
314 315
315 /* allocate the root inode and dentry */ 316 /* allocate the root inode and dentry */
316 fid.vid = as->volume->vid; 317 fid.vid = as->volume->vid;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index a353e69e2391..401eeb21869f 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
106 volume->cell = params->cell; 106 volume->cell = params->cell;
107 volume->vid = vlocation->vldb.vid[params->type]; 107 volume->vid = vlocation->vldb.vid[params->type];
108 108
109 ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
110 if (ret)
111 goto error_bdi;
112
109 init_rwsem(&volume->server_sem); 113 init_rwsem(&volume->server_sem);
110 114
111 /* look up all the applicable server records */ 115 /* look up all the applicable server records */
@@ -151,6 +155,8 @@ error:
151 return ERR_PTR(ret); 155 return ERR_PTR(ret);
152 156
153error_discard: 157error_discard:
158 bdi_destroy(&volume->bdi);
159error_bdi:
154 up_write(&params->cell->vl_sem); 160 up_write(&params->cell->vl_sem);
155 161
156 for (loop = volume->nservers - 1; loop >= 0; loop--) 162 for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume)
200 for (loop = volume->nservers - 1; loop >= 0; loop--) 206 for (loop = volume->nservers - 1; loop >= 0; loop--)
201 afs_put_server(volume->servers[loop]); 207 afs_put_server(volume->servers[loop]);
202 208
209 bdi_destroy(&volume->bdi);
203 kfree(volume); 210 kfree(volume);
204 211
205 _leave(" [destroyed]"); 212 _leave(" [destroyed]");
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c606d92..e8e5e63ac950 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
177 } 177 }
178 /* Trigger mount for path component or follow link */ 178 /* Trigger mount for path component or follow link */
179 } else if (ino->flags & AUTOFS_INF_PENDING || 179 } else if (ino->flags & AUTOFS_INF_PENDING ||
180 autofs4_need_mount(flags) || 180 autofs4_need_mount(flags)) {
181 current->link_count) {
182 DPRINTK("waiting for mount name=%.*s", 181 DPRINTK("waiting for mount name=%.*s",
183 dentry->d_name.len, dentry->d_name.name); 182 dentry->d_name.len, dentry->d_name.name);
184 183
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
262 spin_unlock(&dcache_lock); 261 spin_unlock(&dcache_lock);
263 spin_unlock(&sbi->fs_lock); 262 spin_unlock(&sbi->fs_lock);
264 263
265 status = try_to_fill_dentry(dentry, 0); 264 status = try_to_fill_dentry(dentry, nd->flags);
266 if (status) 265 if (status)
267 goto out_error; 266 goto out_error;
268 267
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7ab23e006e4c..2c5f9a0e5d72 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
1005 } 1005 }
1006 } else if (!mm->start_data) { 1006 } else if (!mm->start_data) {
1007 mm->start_data = seg->addr; 1007 mm->start_data = seg->addr;
1008#ifndef CONFIG_MMU
1009 mm->end_data = seg->addr + phdr->p_memsz; 1008 mm->end_data = seg->addr + phdr->p_memsz;
1010#endif
1011 } 1009 }
1012
1013#ifdef CONFIG_MMU
1014 if (seg->addr + phdr->p_memsz > mm->end_data)
1015 mm->end_data = seg->addr + phdr->p_memsz;
1016#endif
1017 } 1010 }
1018 1011
1019 seg++; 1012 seg++;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e0e769bdca59..49566c1687d8 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
355 355
356 if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { 356 if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
357 printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)", 357 printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)",
358 (int) r,(int)(start_brk-start_code),(int)text_len); 358 (int) r,(int)(start_brk-start_data+text_len),(int)text_len);
359 goto failed; 359 goto failed;
360 } 360 }
361 361
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2a6d0193f139..6dcee88c2e5d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -406,16 +406,23 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
406 406
407int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) 407int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
408{ 408{
409 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 409 struct inode *bd_inode = filp->f_mapping->host;
410 struct block_device *bdev = I_BDEV(bd_inode);
410 int error; 411 int error;
411 412
412 error = sync_blockdev(bdev); 413 /*
413 if (error) 414 * There is no need to serialise calls to blkdev_issue_flush with
414 return error; 415 * i_mutex and doing so causes performance issues with concurrent
415 416 * O_SYNC writers to a block device.
417 */
418 mutex_unlock(&bd_inode->i_mutex);
419
416 error = blkdev_issue_flush(bdev, NULL); 420 error = blkdev_issue_flush(bdev, NULL);
417 if (error == -EOPNOTSUPP) 421 if (error == -EOPNOTSUPP)
418 error = 0; 422 error = 0;
423
424 mutex_lock(&bd_inode->i_mutex);
425
419 return error; 426 return error;
420} 427}
421EXPORT_SYMBOL(blkdev_fsync); 428EXPORT_SYMBOL(blkdev_fsync);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b8f2c89ccb..feca04197d02 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops;
44static void end_workqueue_fn(struct btrfs_work *work); 44static void end_workqueue_fn(struct btrfs_work *work);
45static void free_fs_root(struct btrfs_root *root); 45static void free_fs_root(struct btrfs_root *root);
46 46
47static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
48
49/* 47/*
50 * end_io_wq structs are used to do processing in task context when an IO is 48 * end_io_wq structs are used to do processing in task context when an IO is
51 * complete. This is used during reads to verify checksums, and it is used 49 * complete. This is used during reads to verify checksums, and it is used
@@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1375{ 1373{
1376 int err; 1374 int err;
1377 1375
1378 bdi->name = "btrfs";
1379 bdi->capabilities = BDI_CAP_MAP_COPY; 1376 bdi->capabilities = BDI_CAP_MAP_COPY;
1380 err = bdi_init(bdi); 1377 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1381 if (err) 1378 if (err)
1382 return err; 1379 return err;
1383 1380
1384 err = bdi_register(bdi, NULL, "btrfs-%d",
1385 atomic_inc_return(&btrfs_bdi_num));
1386 if (err) {
1387 bdi_destroy(bdi);
1388 return err;
1389 }
1390
1391 bdi->ra_pages = default_backing_dev_info.ra_pages; 1381 bdi->ra_pages = default_backing_dev_info.ra_pages;
1392 bdi->unplug_io_fn = btrfs_unplug_io_fn; 1382 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1393 bdi->unplug_io_data = info; 1383 bdi->unplug_io_data = info;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f9c624..a8cd821226da 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
34 loff_t i_size; /* object size */ 34 loff_t i_size; /* object size */
35 unsigned long flags; 35 unsigned long flags;
36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ 36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
37#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
37 atomic_t usage; /* object usage count */ 38 atomic_t usage; /* object usage count */
38 uint8_t type; /* object type */ 39 uint8_t type; /* object type */
39 uint8_t new; /* T if object new */ 40 uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1ee0d..f4a7840bf42c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
93} 93}
94 94
95/* 95/*
96 * mark the owner of a dentry, if there is one, to indicate that that dentry
97 * has been preemptively deleted
98 * - the caller must hold the i_mutex on the dentry's parent as required to
99 * call vfs_unlink(), vfs_rmdir() or vfs_rename()
100 */
101static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
102 struct dentry *dentry)
103{
104 struct cachefiles_object *object;
105 struct rb_node *p;
106
107 _enter(",'%*.*s'",
108 dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
109
110 write_lock(&cache->active_lock);
111
112 p = cache->active_nodes.rb_node;
113 while (p) {
114 object = rb_entry(p, struct cachefiles_object, active_node);
115 if (object->dentry > dentry)
116 p = p->rb_left;
117 else if (object->dentry < dentry)
118 p = p->rb_right;
119 else
120 goto found_dentry;
121 }
122
123 write_unlock(&cache->active_lock);
124 _leave(" [no owner]");
125 return;
126
127 /* found the dentry for */
128found_dentry:
129 kdebug("preemptive burial: OBJ%x [%s] %p",
130 object->fscache.debug_id,
131 fscache_object_states[object->fscache.state],
132 dentry);
133
134 if (object->fscache.state < FSCACHE_OBJECT_DYING) {
135 printk(KERN_ERR "\n");
136 printk(KERN_ERR "CacheFiles: Error:"
137 " Can't preemptively bury live object\n");
138 cachefiles_printk_object(object, NULL);
139 } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
140 printk(KERN_ERR "CacheFiles: Error:"
141 " Object already preemptively buried\n");
142 }
143
144 write_unlock(&cache->active_lock);
145 _leave(" [owner marked]");
146}
147
148/*
96 * record the fact that an object is now active 149 * record the fact that an object is now active
97 */ 150 */
98static int cachefiles_mark_object_active(struct cachefiles_cache *cache, 151static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
219 */ 272 */
220static int cachefiles_bury_object(struct cachefiles_cache *cache, 273static int cachefiles_bury_object(struct cachefiles_cache *cache,
221 struct dentry *dir, 274 struct dentry *dir,
222 struct dentry *rep) 275 struct dentry *rep,
276 bool preemptive)
223{ 277{
224 struct dentry *grave, *trap; 278 struct dentry *grave, *trap;
225 char nbuffer[8 + 8 + 1]; 279 char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
229 dir->d_name.len, dir->d_name.len, dir->d_name.name, 283 dir->d_name.len, dir->d_name.len, dir->d_name.name,
230 rep->d_name.len, rep->d_name.len, rep->d_name.name); 284 rep->d_name.len, rep->d_name.len, rep->d_name.name);
231 285
286 _debug("remove %p from %p", rep, dir);
287
232 /* non-directories can just be unlinked */ 288 /* non-directories can just be unlinked */
233 if (!S_ISDIR(rep->d_inode->i_mode)) { 289 if (!S_ISDIR(rep->d_inode->i_mode)) {
234 _debug("unlink stale object"); 290 _debug("unlink stale object");
235 ret = vfs_unlink(dir->d_inode, rep); 291 ret = vfs_unlink(dir->d_inode, rep);
236 292
293 if (preemptive)
294 cachefiles_mark_object_buried(cache, rep);
295
237 mutex_unlock(&dir->d_inode->i_mutex); 296 mutex_unlock(&dir->d_inode->i_mutex);
238 297
239 if (ret == -EIO) 298 if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
325 if (ret != 0 && ret != -ENOMEM) 384 if (ret != 0 && ret != -ENOMEM)
326 cachefiles_io_error(cache, "Rename failed with error %d", ret); 385 cachefiles_io_error(cache, "Rename failed with error %d", ret);
327 386
387 if (preemptive)
388 cachefiles_mark_object_buried(cache, rep);
389
328 unlock_rename(cache->graveyard, dir); 390 unlock_rename(cache->graveyard, dir);
329 dput(grave); 391 dput(grave);
330 _leave(" = 0"); 392 _leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
340 struct dentry *dir; 402 struct dentry *dir;
341 int ret; 403 int ret;
342 404
343 _enter(",{%p}", object->dentry); 405 _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
344 406
345 ASSERT(object->dentry); 407 ASSERT(object->dentry);
346 ASSERT(object->dentry->d_inode); 408 ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
350 412
351 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 413 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
352 414
353 /* we need to check that our parent is _still_ our parent - it may have 415 if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
354 * been renamed */ 416 /* object allocation for the same key preemptively deleted this
355 if (dir == object->dentry->d_parent) { 417 * object's file so that it could create its own file */
356 ret = cachefiles_bury_object(cache, dir, object->dentry); 418 _debug("object preemptively buried");
357 } else {
358 /* it got moved, presumably by cachefilesd culling it, so it's
359 * no longer in the key path and we can ignore it */
360 mutex_unlock(&dir->d_inode->i_mutex); 419 mutex_unlock(&dir->d_inode->i_mutex);
361 ret = 0; 420 ret = 0;
421 } else {
422 /* we need to check that our parent is _still_ our parent - it
423 * may have been renamed */
424 if (dir == object->dentry->d_parent) {
425 ret = cachefiles_bury_object(cache, dir,
426 object->dentry, false);
427 } else {
428 /* it got moved, presumably by cachefilesd culling it,
429 * so it's no longer in the key path and we can ignore
430 * it */
431 mutex_unlock(&dir->d_inode->i_mutex);
432 ret = 0;
433 }
362 } 434 }
363 435
364 dput(dir); 436 dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
381 const char *name; 453 const char *name;
382 int ret, nlen; 454 int ret, nlen;
383 455
384 _enter("{%p},,%s,", parent->dentry, key); 456 _enter("OBJ%x{%p},OBJ%x,%s,",
457 parent->fscache.debug_id, parent->dentry,
458 object->fscache.debug_id, key);
385 459
386 cache = container_of(parent->fscache.cache, 460 cache = container_of(parent->fscache.cache,
387 struct cachefiles_cache, cache); 461 struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
509 * mutex) */ 583 * mutex) */
510 object->dentry = NULL; 584 object->dentry = NULL;
511 585
512 ret = cachefiles_bury_object(cache, dir, next); 586 ret = cachefiles_bury_object(cache, dir, next, true);
513 dput(next); 587 dput(next);
514 next = NULL; 588 next = NULL;
515 589
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
828 /* actually remove the victim (drops the dir mutex) */ 902 /* actually remove the victim (drops the dir mutex) */
829 _debug("bury"); 903 _debug("bury");
830 904
831 ret = cachefiles_bury_object(cache, dir, victim); 905 ret = cachefiles_bury_object(cache, dir, victim, false);
832 if (ret < 0) 906 if (ret < 0)
833 goto error; 907 goto error;
834 908
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb2232..039b5011d83b 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
77/* 77/*
78 * check the security details of the on-disk cache 78 * check the security details of the on-disk cache
79 * - must be called with security override in force 79 * - must be called with security override in force
80 * - must return with a security override in force - even in the case of an
81 * error
80 */ 82 */
81int cachefiles_determine_cache_security(struct cachefiles_cache *cache, 83int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
82 struct dentry *root, 84 struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
99 * which create files */ 101 * which create files */
100 ret = set_create_files_as(new, root->d_inode); 102 ret = set_create_files_as(new, root->d_inode);
101 if (ret < 0) { 103 if (ret < 0) {
104 abort_creds(new);
105 cachefiles_begin_secure(cache, _saved_cred);
102 _leave(" = %d [cfa]", ret); 106 _leave(" = %d [cfa]", ret);
103 return ret; 107 return ret;
104 } 108 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 412593703d1e..a9005d862ed4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req,
504 int i; 504 int i;
505 struct ceph_snap_context *snapc = req->r_snapc; 505 struct ceph_snap_context *snapc = req->r_snapc;
506 struct address_space *mapping = inode->i_mapping; 506 struct address_space *mapping = inode->i_mapping;
507 struct writeback_control *wbc = req->r_wbc;
508 __s32 rc = -EIO; 507 __s32 rc = -EIO;
509 u64 bytes = 0; 508 u64 bytes = 0;
510 struct ceph_client *client = ceph_inode_to_client(inode); 509 struct ceph_client *client = ceph_inode_to_client(inode);
511 long writeback_stat; 510 long writeback_stat;
512 unsigned issued = __ceph_caps_issued(ci, NULL); 511 unsigned issued = ceph_caps_issued(ci);
513 512
514 /* parse reply */ 513 /* parse reply */
515 replyhead = msg->front.iov_base; 514 replyhead = msg->front.iov_base;
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
546 clear_bdi_congested(&client->backing_dev_info, 545 clear_bdi_congested(&client->backing_dev_info,
547 BLK_RW_ASYNC); 546 BLK_RW_ASYNC);
548 547
549 if (i >= wrote) {
550 dout("inode %p skipping page %p\n", inode, page);
551 wbc->pages_skipped++;
552 }
553 ceph_put_snap_context((void *)page->private); 548 ceph_put_snap_context((void *)page->private);
554 page->private = 0; 549 page->private = 0;
555 ClearPagePrivate(page); 550 ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
799 alloc_page_vec(client, req); 794 alloc_page_vec(client, req);
800 req->r_callback = writepages_finish; 795 req->r_callback = writepages_finish;
801 req->r_inode = inode; 796 req->r_inode = inode;
802 req->r_wbc = wbc;
803 } 797 }
804 798
805 /* note position of first page in pvec */ 799 /* note position of first page in pvec */
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b94b866..818afe72e6c7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/slab.h>
6 7
7#include "types.h" 8#include "types.h"
8#include "auth_none.h" 9#include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c05533a31c..8164df1a08be 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
1#ifndef _FS_CEPH_AUTH_NONE_H 1#ifndef _FS_CEPH_AUTH_NONE_H
2#define _FS_CEPH_AUTH_NONE_H 2#define _FS_CEPH_AUTH_NONE_H
3 3
4#include <linux/slab.h>
5
4#include "auth.h" 6#include "auth.h"
5 7
6/* 8/*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4dc8cc..fee5a08da881 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
12#include "auth.h" 12#include "auth.h"
13#include "decode.h" 13#include "decode.h"
14 14
15struct kmem_cache *ceph_x_ticketbuf_cachep;
16
17#define TEMP_TICKET_BUF_LEN 256 15#define TEMP_TICKET_BUF_LEN 256
18 16
19static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); 17static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
131 char *ticket_buf; 129 char *ticket_buf;
132 u8 struct_v; 130 u8 struct_v;
133 131
134 dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC); 132 dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
135 if (!dbuf) 133 if (!dbuf)
136 return -ENOMEM; 134 return -ENOMEM;
137 135
138 ret = -ENOMEM; 136 ret = -ENOMEM;
139 ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, 137 ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
140 GFP_NOFS | GFP_ATOMIC);
141 if (!ticket_buf) 138 if (!ticket_buf)
142 goto out_dbuf; 139 goto out_dbuf;
143 140
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
251 248
252 ret = 0; 249 ret = 0;
253out: 250out:
254 kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf); 251 kfree(ticket_buf);
255out_dbuf: 252out_dbuf:
256 kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf); 253 kfree(dbuf);
257 return ret; 254 return ret;
258 255
259bad: 256bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
605 remove_ticket_handler(ac, th); 602 remove_ticket_handler(ac, th);
606 } 603 }
607 604
608 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
609
610 kfree(ac->private); 605 kfree(ac->private);
611 ac->private = NULL; 606 ac->private = NULL;
612} 607}
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
641 int ret; 636 int ret;
642 637
643 dout("ceph_x_init %p\n", ac); 638 dout("ceph_x_init %p\n", ac);
639 ret = -ENOMEM;
644 xi = kzalloc(sizeof(*xi), GFP_NOFS); 640 xi = kzalloc(sizeof(*xi), GFP_NOFS);
645 if (!xi) 641 if (!xi)
646 return -ENOMEM; 642 goto out;
647 643
648 ret = -ENOMEM;
649 ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
650 TEMP_TICKET_BUF_LEN, 8,
651 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
652 NULL);
653 if (!ceph_x_ticketbuf_cachep)
654 goto done_nomem;
655 ret = -EINVAL; 644 ret = -EINVAL;
656 if (!ac->secret) { 645 if (!ac->secret) {
657 pr_err("no secret set (for auth_x protocol)\n"); 646 pr_err("no secret set (for auth_x protocol)\n");
658 goto done_nomem; 647 goto out_nomem;
659 } 648 }
660 649
661 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); 650 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
662 if (ret) 651 if (ret)
663 goto done_nomem; 652 goto out_nomem;
664 653
665 xi->starting = true; 654 xi->starting = true;
666 xi->ticket_handlers = RB_ROOT; 655 xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
670 ac->ops = &ceph_x_ops; 659 ac->ops = &ceph_x_ops;
671 return 0; 660 return 0;
672 661
673done_nomem: 662out_nomem:
674 kfree(xi); 663 kfree(xi);
675 if (ceph_x_ticketbuf_cachep) 664out:
676 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
677 return ret; 665 return ret;
678} 666}
679 667
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa2239fa9a3b..d9400534b279 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
858} 858}
859 859
860/* 860/*
861 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
862 *
861 * caller should hold i_lock. 863 * caller should hold i_lock.
862 * caller will not hold session s_mutex if called from destroy_inode. 864 * caller will not hold session s_mutex if called from destroy_inode.
863 */ 865 */
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
866 struct ceph_mds_session *session = cap->session; 868 struct ceph_mds_session *session = cap->session;
867 struct ceph_inode_info *ci = cap->ci; 869 struct ceph_inode_info *ci = cap->ci;
868 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; 870 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
871 int removed = 0;
869 872
870 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); 873 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
871 874
872 /* remove from inode list */
873 rb_erase(&cap->ci_node, &ci->i_caps);
874 cap->ci = NULL;
875 if (ci->i_auth_cap == cap)
876 ci->i_auth_cap = NULL;
877
878 /* remove from session list */ 875 /* remove from session list */
879 spin_lock(&session->s_cap_lock); 876 spin_lock(&session->s_cap_lock);
880 if (session->s_cap_iterator == cap) { 877 if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
885 list_del_init(&cap->session_caps); 882 list_del_init(&cap->session_caps);
886 session->s_nr_caps--; 883 session->s_nr_caps--;
887 cap->session = NULL; 884 cap->session = NULL;
885 removed = 1;
888 } 886 }
887 /* protect backpointer with s_cap_lock: see iterate_session_caps */
888 cap->ci = NULL;
889 spin_unlock(&session->s_cap_lock); 889 spin_unlock(&session->s_cap_lock);
890 890
891 if (cap->session == NULL) 891 /* remove from inode list */
892 rb_erase(&cap->ci_node, &ci->i_caps);
893 if (ci->i_auth_cap == cap)
894 ci->i_auth_cap = NULL;
895
896 if (removed)
892 ceph_put_cap(cap); 897 ceph_put_cap(cap);
893 898
894 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 899 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
@@ -1861,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1861 } else { 1866 } else {
1862 pr_err("%p auth cap %p not mds%d ???\n", inode, 1867 pr_err("%p auth cap %p not mds%d ???\n", inode,
1863 cap, session->s_mds); 1868 cap, session->s_mds);
1864 spin_unlock(&inode->i_lock);
1865 } 1869 }
1870 spin_unlock(&inode->i_lock);
1866 } 1871 }
1867} 1872}
1868 1873
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ea8ee2e526aa..650d2db5ed26 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -880,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
880 * do_request, above). If there is no trace, we need 880 * do_request, above). If there is no trace, we need
881 * to do it here. 881 * to do it here.
882 */ 882 */
883
884 /* d_move screws up d_subdirs order */
885 ceph_i_clear(new_dir, CEPH_I_COMPLETE);
886
883 d_move(old_dentry, new_dentry); 887 d_move(old_dentry, new_dentry);
888
889 /* ensure target dentry is invalidated, despite
890 rehashing bug in vfs_rename_dir */
891 new_dentry->d_time = jiffies;
892 ceph_dentry(new_dentry)->lease_shared_gen = 0;
884 } 893 }
885 ceph_mdsc_put_request(req); 894 ceph_mdsc_put_request(req);
886 return err; 895 return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5da2c1..ed6f19721d6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
665 * throw out any page cache pages in this range. this 665 * throw out any page cache pages in this range. this
666 * may block. 666 * may block.
667 */ 667 */
668 truncate_inode_pages_range(inode->i_mapping, pos, pos+len); 668 truncate_inode_pages_range(inode->i_mapping, pos,
669 (pos+len) | (PAGE_CACHE_SIZE-1));
669 } else { 670 } else {
670 pages = alloc_page_vector(num_pages); 671 pages = alloc_page_vector(num_pages);
671 if (IS_ERR(pages)) { 672 if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 26f883c275e8..85b4d2ffdeba 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
733 __ceph_get_fmode(ci, cap_fmode); 733 __ceph_get_fmode(ci, cap_fmode);
734 spin_unlock(&inode->i_lock); 734 spin_unlock(&inode->i_lock);
735 } 735 }
736 } else if (cap_fmode >= 0) {
737 pr_warning("mds issued no caps on %llx.%llx\n",
738 ceph_vinop(inode));
739 __ceph_get_fmode(ci, cap_fmode);
736 } 740 }
737 741
738 /* update delegation info? */ 742 /* update delegation info? */
@@ -997,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
997 dn, dn->d_name.len, dn->d_name.name); 1001 dn, dn->d_name.len, dn->d_name.name);
998 dout("fill_trace doing d_move %p -> %p\n", 1002 dout("fill_trace doing d_move %p -> %p\n",
999 req->r_old_dentry, dn); 1003 req->r_old_dentry, dn);
1004
1005 /* d_move screws up d_subdirs order */
1006 ceph_i_clear(dir, CEPH_I_COMPLETE);
1007
1000 d_move(req->r_old_dentry, dn); 1008 d_move(req->r_old_dentry, dn);
1001 dout(" src %p '%.*s' dst %p '%.*s'\n", 1009 dout(" src %p '%.*s' dst %p '%.*s'\n",
1002 req->r_old_dentry, 1010 req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47be..24561a557e01 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
736} 736}
737 737
738/* 738/*
739 * Helper to safely iterate over all caps associated with a session. 739 * Helper to safely iterate over all caps associated with a session, with
740 * special care taken to handle a racing __ceph_remove_cap().
740 * 741 *
741 * caller must hold session s_mutex 742 * Caller must hold session s_mutex.
742 */ 743 */
743static int iterate_session_caps(struct ceph_mds_session *session, 744static int iterate_session_caps(struct ceph_mds_session *session,
744 int (*cb)(struct inode *, struct ceph_cap *, 745 int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2136 struct ceph_mds_session *session = NULL; 2137 struct ceph_mds_session *session = NULL;
2137 struct ceph_msg *reply; 2138 struct ceph_msg *reply;
2138 struct rb_node *p; 2139 struct rb_node *p;
2139 int err; 2140 int err = -ENOMEM;
2140 struct ceph_pagelist *pagelist; 2141 struct ceph_pagelist *pagelist;
2141 2142
2142 pr_info("reconnect to recovering mds%d\n", mds); 2143 pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2185 goto fail; 2186 goto fail;
2186 err = iterate_session_caps(session, encode_caps_cb, pagelist); 2187 err = iterate_session_caps(session, encode_caps_cb, pagelist);
2187 if (err < 0) 2188 if (err < 0)
2188 goto out; 2189 goto fail;
2189 2190
2190 /* 2191 /*
2191 * snaprealms. we provide mds with the ino, seq (version), and 2192 * snaprealms. we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
2213 reply->nr_pages = calc_pages_for(0, pagelist->length); 2214 reply->nr_pages = calc_pages_for(0, pagelist->length);
2214 ceph_con_send(&session->s_con, reply); 2215 ceph_con_send(&session->s_con, reply);
2215 2216
2216 if (session) { 2217 session->s_state = CEPH_MDS_SESSION_OPEN;
2217 session->s_state = CEPH_MDS_SESSION_OPEN; 2218 mutex_unlock(&session->s_mutex);
2218 __wake_requests(mdsc, &session->s_waiting); 2219
2219 } 2220 mutex_lock(&mdsc->mutex);
2221 __wake_requests(mdsc, &session->s_waiting);
2222 mutex_unlock(&mdsc->mutex);
2223
2224 ceph_put_mds_session(session);
2220 2225
2221out:
2222 up_read(&mdsc->snap_rwsem); 2226 up_read(&mdsc->snap_rwsem);
2223 if (session) {
2224 mutex_unlock(&session->s_mutex);
2225 ceph_put_mds_session(session);
2226 }
2227 mutex_lock(&mdsc->mutex); 2227 mutex_lock(&mdsc->mutex);
2228 return; 2228 return;
2229 2229
2230fail: 2230fail:
2231 ceph_msg_put(reply); 2231 ceph_msg_put(reply);
2232 up_read(&mdsc->snap_rwsem);
2233 mutex_unlock(&session->s_mutex);
2234 ceph_put_mds_session(session);
2232fail_nomsg: 2235fail_nomsg:
2233 ceph_pagelist_release(pagelist); 2236 ceph_pagelist_release(pagelist);
2234 kfree(pagelist); 2237 kfree(pagelist);
2235fail_nopagelist: 2238fail_nopagelist:
2236 pr_err("ENOMEM preparing reconnect for mds%d\n", mds); 2239 pr_err("error %d preparing reconnect for mds%d\n", err, mds);
2237 goto out; 2240 mutex_lock(&mdsc->mutex);
2241 return;
2238} 2242}
2239 2243
2240 2244
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cdaaa131add3..cd4fadb6491a 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
492 list_move_tail(&m->list_head, &con->out_sent); 492 list_move_tail(&m->list_head, &con->out_sent);
493 } 493 }
494 494
495 m->hdr.seq = cpu_to_le64(++con->out_seq); 495 /*
496 * only assign outgoing seq # if we haven't sent this message
497 * yet. if it is requeued, resend with it's original seq.
498 */
499 if (m->needs_out_seq) {
500 m->hdr.seq = cpu_to_le64(++con->out_seq);
501 m->needs_out_seq = false;
502 }
496 503
497 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 504 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
498 m, con->out_seq, le16_to_cpu(m->hdr.type), 505 m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1334,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con)
1334 unsigned front_len, middle_len, data_len, data_off; 1341 unsigned front_len, middle_len, data_len, data_off;
1335 int datacrc = con->msgr->nocrc; 1342 int datacrc = con->msgr->nocrc;
1336 int skip; 1343 int skip;
1344 u64 seq;
1337 1345
1338 dout("read_partial_message con %p msg %p\n", con, m); 1346 dout("read_partial_message con %p msg %p\n", con, m);
1339 1347
@@ -1368,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con)
1368 return -EIO; 1376 return -EIO;
1369 data_off = le16_to_cpu(con->in_hdr.data_off); 1377 data_off = le16_to_cpu(con->in_hdr.data_off);
1370 1378
1379 /* verify seq# */
1380 seq = le64_to_cpu(con->in_hdr.seq);
1381 if ((s64)seq - (s64)con->in_seq < 1) {
1382 pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
1383 ENTITY_NAME(con->peer_name),
1384 pr_addr(&con->peer_addr.in_addr),
1385 seq, con->in_seq + 1);
1386 con->in_base_pos = -front_len - middle_len - data_len -
1387 sizeof(m->footer);
1388 con->in_tag = CEPH_MSGR_TAG_READY;
1389 con->in_seq++;
1390 return 0;
1391 } else if ((s64)seq - (s64)con->in_seq > 1) {
1392 pr_err("read_partial_message bad seq %lld expected %lld\n",
1393 seq, con->in_seq + 1);
1394 con->error_msg = "bad message sequence # for incoming message";
1395 return -EBADMSG;
1396 }
1397
1371 /* allocate message? */ 1398 /* allocate message? */
1372 if (!con->in_msg) { 1399 if (!con->in_msg) {
1373 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 1400 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1379,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con)
1379 con->in_base_pos = -front_len - middle_len - data_len - 1406 con->in_base_pos = -front_len - middle_len - data_len -
1380 sizeof(m->footer); 1407 sizeof(m->footer);
1381 con->in_tag = CEPH_MSGR_TAG_READY; 1408 con->in_tag = CEPH_MSGR_TAG_READY;
1409 con->in_seq++;
1382 return 0; 1410 return 0;
1383 } 1411 }
1384 if (IS_ERR(con->in_msg)) { 1412 if (IS_ERR(con->in_msg)) {
@@ -1965,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
1965 1993
1966 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); 1994 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
1967 1995
1996 msg->needs_out_seq = true;
1997
1968 /* queue */ 1998 /* queue */
1969 mutex_lock(&con->mutex); 1999 mutex_lock(&con->mutex);
1970 BUG_ON(!list_empty(&msg->list_head)); 2000 BUG_ON(!list_empty(&msg->list_head));
@@ -2030,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2030 ceph_msg_put(con->in_msg); 2060 ceph_msg_put(con->in_msg);
2031 con->in_msg = NULL; 2061 con->in_msg = NULL;
2032 con->in_tag = CEPH_MSGR_TAG_READY; 2062 con->in_tag = CEPH_MSGR_TAG_READY;
2063 con->in_seq++;
2033 } else { 2064 } else {
2034 dout("con_revoke_pages %p msg %p pages %p no-op\n", 2065 dout("con_revoke_pages %p msg %p pages %p no-op\n",
2035 con, con->in_msg, msg); 2066 con, con->in_msg, msg);
@@ -2063,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
2063 kref_init(&m->kref); 2094 kref_init(&m->kref);
2064 INIT_LIST_HEAD(&m->list_head); 2095 INIT_LIST_HEAD(&m->list_head);
2065 2096
2097 m->hdr.tid = 0;
2066 m->hdr.type = cpu_to_le16(type); 2098 m->hdr.type = cpu_to_le16(type);
2099 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2100 m->hdr.version = 0;
2067 m->hdr.front_len = cpu_to_le32(front_len); 2101 m->hdr.front_len = cpu_to_le32(front_len);
2068 m->hdr.middle_len = 0; 2102 m->hdr.middle_len = 0;
2069 m->hdr.data_len = cpu_to_le32(page_len); 2103 m->hdr.data_len = cpu_to_le32(page_len);
2070 m->hdr.data_off = cpu_to_le16(page_off); 2104 m->hdr.data_off = cpu_to_le16(page_off);
2071 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 2105 m->hdr.reserved = 0;
2072 m->footer.front_crc = 0; 2106 m->footer.front_crc = 0;
2073 m->footer.middle_crc = 0; 2107 m->footer.middle_crc = 0;
2074 m->footer.data_crc = 0; 2108 m->footer.data_crc = 0;
2109 m->footer.flags = 0;
2075 m->front_max = front_len; 2110 m->front_max = front_len;
2076 m->front_is_vmalloc = false; 2111 m->front_is_vmalloc = false;
2077 m->more_to_follow = false; 2112 m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae73cdc..a5caf91cc971 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
86 struct kref kref; 86 struct kref kref;
87 bool front_is_vmalloc; 87 bool front_is_vmalloc;
88 bool more_to_follow; 88 bool more_to_follow;
89 bool needs_out_seq;
89 int front_max; 90 int front_max;
90 91
91 struct ceph_msgpool *pool; 92 struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4dedaace6..3514f71ff85f 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
565{ 565{
566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
567 struct ceph_pg pgid; 567 struct ceph_pg pgid;
568 int o = -1; 568 int acting[CEPH_PG_MAX_SIZE];
569 int o = -1, num = 0;
569 int err; 570 int err;
570 571
571 dout("map_osds %p tid %lld\n", req, req->r_tid); 572 dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
576 pgid = reqhead->layout.ol_pgid; 577 pgid = reqhead->layout.ol_pgid;
577 req->r_pgid = pgid; 578 req->r_pgid = pgid;
578 579
579 o = ceph_calc_pg_primary(osdc->osdmap, pgid); 580 err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
581 if (err > 0) {
582 o = acting[0];
583 num = err;
584 }
580 585
581 if ((req->r_osd && req->r_osd->o_osd == o && 586 if ((req->r_osd && req->r_osd->o_osd == o &&
582 req->r_sent >= req->r_osd->o_incarnation) || 587 req->r_sent >= req->r_osd->o_incarnation &&
588 req->r_num_pg_osds == num &&
589 memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
583 (req->r_osd == NULL && o == -1)) 590 (req->r_osd == NULL && o == -1))
584 return 0; /* no change */ 591 return 0; /* no change */
585 592
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
587 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, 594 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
588 req->r_osd ? req->r_osd->o_osd : -1); 595 req->r_osd ? req->r_osd->o_osd : -1);
589 596
597 /* record full pg acting set */
598 memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
599 req->r_num_pg_osds = num;
600
590 if (req->r_osd) { 601 if (req->r_osd) {
591 __cancel_request(req); 602 __cancel_request(req);
592 list_del_init(&req->r_osd_item); 603 list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
612 __remove_osd_from_lru(req->r_osd); 623 __remove_osd_from_lru(req->r_osd);
613 list_add(&req->r_osd_item, &req->r_osd->o_requests); 624 list_add(&req->r_osd_item, &req->r_osd->o_requests);
614 } 625 }
615 err = 1; /* osd changed */ 626 err = 1; /* osd or pg changed */
616 627
617out: 628out:
618 return err; 629 return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
779 struct ceph_osd_request *req; 790 struct ceph_osd_request *req;
780 u64 tid; 791 u64 tid;
781 int numops, object_len, flags; 792 int numops, object_len, flags;
793 s32 result;
782 794
783 tid = le64_to_cpu(msg->hdr.tid); 795 tid = le64_to_cpu(msg->hdr.tid);
784 if (msg->front.iov_len < sizeof(*rhead)) 796 if (msg->front.iov_len < sizeof(*rhead))
785 goto bad; 797 goto bad;
786 numops = le32_to_cpu(rhead->num_ops); 798 numops = le32_to_cpu(rhead->num_ops);
787 object_len = le32_to_cpu(rhead->object_len); 799 object_len = le32_to_cpu(rhead->object_len);
800 result = le32_to_cpu(rhead->result);
788 if (msg->front.iov_len != sizeof(*rhead) + object_len + 801 if (msg->front.iov_len != sizeof(*rhead) + object_len +
789 numops * sizeof(struct ceph_osd_op)) 802 numops * sizeof(struct ceph_osd_op))
790 goto bad; 803 goto bad;
791 dout("handle_reply %p tid %llu\n", msg, tid); 804 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
792 805
793 /* lookup */ 806 /* lookup */
794 mutex_lock(&osdc->request_mutex); 807 mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
834 dout("handle_reply tid %llu flags %d\n", tid, flags); 847 dout("handle_reply tid %llu flags %d\n", tid, flags);
835 848
836 /* either this is a read, or we got the safe response */ 849 /* either this is a read, or we got the safe response */
837 if ((flags & CEPH_OSD_FLAG_ONDISK) || 850 if (result < 0 ||
851 (flags & CEPH_OSD_FLAG_ONDISK) ||
838 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 852 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
839 __unregister_request(osdc, req); 853 __unregister_request(osdc, req);
840 854
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b0759911e7c3..ce776989ef6a 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
48 struct list_head r_osd_item; 48 struct list_head r_osd_item;
49 struct ceph_osd *r_osd; 49 struct ceph_osd *r_osd;
50 struct ceph_pg r_pgid; 50 struct ceph_pg r_pgid;
51 int r_pg_osds[CEPH_PG_MAX_SIZE];
52 int r_num_pg_osds;
51 53
52 struct ceph_connection *r_con_filling_msg; 54 struct ceph_connection *r_con_filling_msg;
53 55
@@ -66,7 +68,6 @@ struct ceph_osd_request {
66 struct list_head r_unsafe_item; 68 struct list_head r_unsafe_item;
67 69
68 struct inode *r_inode; /* for use by callbacks */ 70 struct inode *r_inode; /* for use by callbacks */
69 struct writeback_control *r_wbc; /* ditto */
70 71
71 char r_oid[40]; /* object name */ 72 char r_oid[40]; /* object name */
72 int r_oid_len; 73 int r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 2e2c15eed82a..cfdd8f4388b7 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1041} 1041}
1042 1042
1043/* 1043/*
1044 * Return acting set for given pgid.
1045 */
1046int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1047 int *acting)
1048{
1049 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1050 int i, o, num = CEPH_PG_MAX_SIZE;
1051
1052 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1053 if (!osds)
1054 return -1;
1055
1056 /* primary is first up osd */
1057 o = 0;
1058 for (i = 0; i < num; i++)
1059 if (ceph_osd_is_up(osdmap, osds[i]))
1060 acting[o++] = osds[i];
1061 return o;
1062}
1063
1064/*
1044 * Return primary osd for given pgid, or -1 if none. 1065 * Return primary osd for given pgid, or -1 if none.
1045 */ 1066 */
1046int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) 1067int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1047{ 1068{
1048 int rawosds[10], *osds; 1069 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1049 int i, num = ARRAY_SIZE(rawosds); 1070 int i, num = CEPH_PG_MAX_SIZE;
1050 1071
1051 osds = calc_pg_raw(osdmap, pgid, rawosds, &num); 1072 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1052 if (!osds) 1073 if (!osds)
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1054 1075
1055 /* primary is first up osd */ 1076 /* primary is first up osd */
1056 for (i = 0; i < num; i++) 1077 for (i = 0; i < num; i++)
1057 if (ceph_osd_is_up(osdmap, osds[i])) { 1078 if (ceph_osd_is_up(osdmap, osds[i]))
1058 return osds[i]; 1079 return osds[i];
1059 break;
1060 }
1061 return -1; 1080 return -1;
1062} 1081}
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 8bc9f1e4f562..970b547e510d 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
120 const char *oid, 120 const char *oid,
121 struct ceph_file_layout *fl, 121 struct ceph_file_layout *fl,
122 struct ceph_osdmap *osdmap); 122 struct ceph_osdmap *osdmap);
123extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
124 int *acting);
123extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, 125extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
124 struct ceph_pg pgid); 126 struct ceph_pg pgid);
125 127
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a1fc1d017b58..fd56451a871f 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -58,6 +58,7 @@ struct ceph_timespec {
58#define CEPH_PG_LAYOUT_LINEAR 2 58#define CEPH_PG_LAYOUT_LINEAR 2
59#define CEPH_PG_LAYOUT_HYBRID 3 59#define CEPH_PG_LAYOUT_HYBRID 3
60 60
61#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
61 62
62/* 63/*
63 * placement group. 64 * placement group.
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2b881262ef67..d5114db70453 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -869,16 +869,20 @@ skip_inode:
869 continue; 869 continue;
870 ci = ceph_inode(inode); 870 ci = ceph_inode(inode);
871 spin_lock(&inode->i_lock); 871 spin_lock(&inode->i_lock);
872 if (!ci->i_snap_realm) 872 if (list_empty(&ci->i_snap_realm_item)) {
873 goto split_skip_inode; 873 struct ceph_snap_realm *oldrealm =
874 ceph_put_snap_realm(mdsc, ci->i_snap_realm); 874 ci->i_snap_realm;
875 spin_lock(&realm->inodes_with_caps_lock); 875
876 list_add(&ci->i_snap_realm_item, 876 dout(" moving %p to split realm %llx %p\n",
877 &realm->inodes_with_caps); 877 inode, realm->ino, realm);
878 ci->i_snap_realm = realm; 878 spin_lock(&realm->inodes_with_caps_lock);
879 spin_unlock(&realm->inodes_with_caps_lock); 879 list_add(&ci->i_snap_realm_item,
880 ceph_get_snap_realm(mdsc, realm); 880 &realm->inodes_with_caps);
881split_skip_inode: 881 ci->i_snap_realm = realm;
882 spin_unlock(&realm->inodes_with_caps_lock);
883 ceph_get_snap_realm(mdsc, realm);
884 ceph_put_snap_realm(mdsc, oldrealm);
885 }
882 spin_unlock(&inode->i_lock); 886 spin_unlock(&inode->i_lock);
883 iput(inode); 887 iput(inode);
884 } 888 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02eaa1279..110857ba9269 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
47 */ 47 */
48static void ceph_put_super(struct super_block *s) 48static void ceph_put_super(struct super_block *s)
49{ 49{
50 struct ceph_client *cl = ceph_client(s); 50 struct ceph_client *client = ceph_sb_to_client(s);
51 51
52 dout("put_super\n"); 52 dout("put_super\n");
53 ceph_mdsc_close_sessions(&cl->mdsc); 53 ceph_mdsc_close_sessions(&client->mdsc);
54
55 /*
56 * ensure we release the bdi before put_anon_super releases
57 * the device name.
58 */
59 if (s->s_bdi == &client->backing_dev_info) {
60 bdi_unregister(&client->backing_dev_info);
61 s->s_bdi = NULL;
62 }
63
54 return; 64 return;
55} 65}
56 66
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
636 destroy_workqueue(client->pg_inv_wq); 646 destroy_workqueue(client->pg_inv_wq);
637 destroy_workqueue(client->trunc_wq); 647 destroy_workqueue(client->trunc_wq);
638 648
649 bdi_destroy(&client->backing_dev_info);
650
639 if (client->msgr) 651 if (client->msgr)
640 ceph_messenger_destroy(client->msgr); 652 ceph_messenger_destroy(client->msgr);
641 mempool_destroy(client->wb_pagevec_pool); 653 mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
876{ 888{
877 int err; 889 int err;
878 890
879 sb->s_bdi = &client->backing_dev_info;
880
881 /* set ra_pages based on rsize mount option? */ 891 /* set ra_pages based on rsize mount option? */
882 if (client->mount_args->rsize >= PAGE_CACHE_SIZE) 892 if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
883 client->backing_dev_info.ra_pages = 893 client->backing_dev_info.ra_pages =
884 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) 894 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
885 >> PAGE_SHIFT; 895 >> PAGE_SHIFT;
886 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); 896 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
897 if (!err)
898 sb->s_bdi = &client->backing_dev_info;
887 return err; 899 return err;
888} 900}
889 901
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
957 dout("kill_sb %p\n", s); 969 dout("kill_sb %p\n", s);
958 ceph_mdsc_pre_umount(&client->mdsc); 970 ceph_mdsc_pre_umount(&client->mdsc);
959 kill_anon_super(s); /* will call put_super after sb is r/o */ 971 kill_anon_super(s); /* will call put_super after sb is r/o */
960 if (s->s_bdi == &client->backing_dev_info)
961 bdi_unregister(&client->backing_dev_info);
962 bdi_destroy(&client->backing_dev_info);
963 ceph_destroy_client(client); 972 ceph_destroy_client(client);
964} 973}
965 974
@@ -996,9 +1005,10 @@ static int __init init_ceph(void)
996 if (ret) 1005 if (ret)
997 goto out_icache; 1006 goto out_icache;
998 1007
999 pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n", 1008 pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
1000 CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH, 1009 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
1001 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL); 1010 CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
1011 CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
1002 return 0; 1012 return 0;
1003 1013
1004out_icache: 1014out_icache:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e30dfbb056c3..13513b80d87f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mempool.h> 11#include <linux/mempool.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/slab.h>
13#include <linux/wait.h> 14#include <linux/wait.h>
14#include <linux/writeback.h> 15#include <linux/writeback.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 4797787c6a44..246a167cb913 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -18,6 +18,8 @@
18#ifndef _CIFS_FS_SB_H 18#ifndef _CIFS_FS_SB_H
19#define _CIFS_FS_SB_H 19#define _CIFS_FS_SB_H
20 20
21#include <linux/backing-dev.h>
22
21#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ 23#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */
22#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */ 24#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */
23#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ 25#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */
@@ -50,5 +52,6 @@ struct cifs_sb_info {
50#ifdef CONFIG_CIFS_DFS_UPCALL 52#ifdef CONFIG_CIFS_DFS_UPCALL
51 char *mountdata; /* mount options received at mount time */ 53 char *mountdata; /* mount options received at mount time */
52#endif 54#endif
55 struct backing_dev_info bdi;
53}; 56};
54#endif /* _CIFS_FS_SB_H */ 57#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 833166372a08..78c02eb4cb1f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -97,6 +97,12 @@ cifs_read_super(struct super_block *sb, void *data,
97 if (cifs_sb == NULL) 97 if (cifs_sb == NULL)
98 return -ENOMEM; 98 return -ENOMEM;
99 99
100 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
101 if (rc) {
102 kfree(cifs_sb);
103 return rc;
104 }
105
100#ifdef CONFIG_CIFS_DFS_UPCALL 106#ifdef CONFIG_CIFS_DFS_UPCALL
101 /* copy mount params to sb for use in submounts */ 107 /* copy mount params to sb for use in submounts */
102 /* BB: should we move this after the mount so we 108 /* BB: should we move this after the mount so we
@@ -109,6 +115,7 @@ cifs_read_super(struct super_block *sb, void *data,
109 int len = strlen(data); 115 int len = strlen(data);
110 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); 116 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
111 if (cifs_sb->mountdata == NULL) { 117 if (cifs_sb->mountdata == NULL) {
118 bdi_destroy(&cifs_sb->bdi);
112 kfree(sb->s_fs_info); 119 kfree(sb->s_fs_info);
113 sb->s_fs_info = NULL; 120 sb->s_fs_info = NULL;
114 return -ENOMEM; 121 return -ENOMEM;
@@ -128,6 +135,7 @@ cifs_read_super(struct super_block *sb, void *data,
128 135
129 sb->s_magic = CIFS_MAGIC_NUMBER; 136 sb->s_magic = CIFS_MAGIC_NUMBER;
130 sb->s_op = &cifs_super_ops; 137 sb->s_op = &cifs_super_ops;
138 sb->s_bdi = &cifs_sb->bdi;
131/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) 139/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
132 sb->s_blocksize = 140 sb->s_blocksize =
133 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ 141 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
@@ -173,6 +181,7 @@ out_mount_failed:
173 } 181 }
174#endif 182#endif
175 unload_nls(cifs_sb->local_nls); 183 unload_nls(cifs_sb->local_nls);
184 bdi_destroy(&cifs_sb->bdi);
176 kfree(cifs_sb); 185 kfree(cifs_sb);
177 } 186 }
178 return rc; 187 return rc;
@@ -204,6 +213,7 @@ cifs_put_super(struct super_block *sb)
204#endif 213#endif
205 214
206 unload_nls(cifs_sb->local_nls); 215 unload_nls(cifs_sb->local_nls);
216 bdi_destroy(&cifs_sb->bdi);
207 kfree(cifs_sb); 217 kfree(cifs_sb);
208 218
209 unlock_kernel(); 219 unlock_kernel();
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4a99487400f3..a88479ceaad5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -507,6 +507,7 @@ struct dfs_info3_param {
507#define CIFS_FATTR_DFS_REFERRAL 0x1 507#define CIFS_FATTR_DFS_REFERRAL 0x1
508#define CIFS_FATTR_DELETE_PENDING 0x2 508#define CIFS_FATTR_DELETE_PENDING 0x2
509#define CIFS_FATTR_NEED_REVAL 0x4 509#define CIFS_FATTR_NEED_REVAL 0x4
510#define CIFS_FATTR_INO_COLLISION 0x8
510 511
511struct cifs_fattr { 512struct cifs_fattr {
512 u32 cf_flags; 513 u32 cf_flags;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b35cb031c20c..5b042fc46450 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque)
715 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) 715 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
716 return 0; 716 return 0;
717 717
718 /*
719 * uh oh -- it's a directory. We can't use it since hardlinked dirs are
720 * verboten. Disable serverino and return it as if it were found, the
721 * caller can discard it, generate a uniqueid and retry the find
722 */
723 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
724 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
725 cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
726 }
727
718 return 1; 728 return 1;
719} 729}
720 730
@@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
734 unsigned long hash; 744 unsigned long hash;
735 struct inode *inode; 745 struct inode *inode;
736 746
747retry_iget5_locked:
737 cFYI(1, "looking for uniqueid=%llu", fattr->cf_uniqueid); 748 cFYI(1, "looking for uniqueid=%llu", fattr->cf_uniqueid);
738 749
739 /* hash down to 32-bits on 32-bit arch */ 750 /* hash down to 32-bits on 32-bit arch */
740 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); 751 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
741 752
742 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); 753 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
743
744 /* we have fattrs in hand, update the inode */
745 if (inode) { 754 if (inode) {
755 /* was there a problematic inode number collision? */
756 if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
757 iput(inode);
758 fattr->cf_uniqueid = iunique(sb, ROOT_I);
759 fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
760 goto retry_iget5_locked;
761 }
762
746 cifs_fattr_to_inode(inode, fattr); 763 cifs_fattr_to_inode(inode, fattr);
747 if (sb->s_flags & MS_NOATIME) 764 if (sb->s_flags & MS_NOATIME)
748 inode->i_flags |= S_NOATIME | S_NOCMTIME; 765 inode->i_flags |= S_NOATIME | S_NOCMTIME;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index a1695dcadd99..d97f9935a028 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
167 return -EBUSY; 167 return -EBUSY;
168 } 168 }
169 169
170 error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
171 if (error)
172 goto bdi_err;
173
170 vc->vc_sb = sb; 174 vc->vc_sb = sb;
171 175
172 sb->s_fs_info = vc; 176 sb->s_fs_info = vc;
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
175 sb->s_blocksize_bits = 12; 179 sb->s_blocksize_bits = 12;
176 sb->s_magic = CODA_SUPER_MAGIC; 180 sb->s_magic = CODA_SUPER_MAGIC;
177 sb->s_op = &coda_super_operations; 181 sb->s_op = &coda_super_operations;
182 sb->s_bdi = &vc->bdi;
178 183
179 /* get root fid from Venus: this needs the root inode */ 184 /* get root fid from Venus: this needs the root inode */
180 error = venus_rootfid(sb, &fid); 185 error = venus_rootfid(sb, &fid);
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
200 return 0; 205 return 0;
201 206
202 error: 207 error:
208 bdi_destroy(&vc->bdi);
209 bdi_err:
203 if (root) 210 if (root)
204 iput(root); 211 iput(root);
205 if (vc) 212 if (vc)
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
210 217
211static void coda_put_super(struct super_block *sb) 218static void coda_put_super(struct super_block *sb)
212{ 219{
220 bdi_destroy(&coda_vcp(sb)->bdi);
213 coda_vcp(sb)->vc_sb = NULL; 221 coda_vcp(sb)->vc_sb = NULL;
214 sb->s_fs_info = NULL; 222 sb->s_fs_info = NULL;
215 223
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc478..05448730f840 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
1531 if (retval < 0) 1531 if (retval < 0)
1532 goto out; 1532 goto out;
1533 1533
1534 current->stack_start = current->mm->start_stack;
1535
1536 /* execve succeeded */ 1534 /* execve succeeded */
1537 current->fs->in_exec = 0; 1535 current->fs->in_exec = 0;
1538 current->in_execve = 0; 1536 current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c32a1b6a856b..641640dc7ae5 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -102,7 +102,6 @@
102#include <linux/nbd.h> 102#include <linux/nbd.h>
103#include <linux/random.h> 103#include <linux/random.h>
104#include <linux/filter.h> 104#include <linux/filter.h>
105#include <linux/pktcdvd.h>
106 105
107#include <linux/hiddev.h> 106#include <linux/hiddev.h>
108 107
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
1126COMPATIBLE_IOCTL(PPGETPHASE) 1125COMPATIBLE_IOCTL(PPGETPHASE)
1127COMPATIBLE_IOCTL(PPGETFLAGS) 1126COMPATIBLE_IOCTL(PPGETFLAGS)
1128COMPATIBLE_IOCTL(PPSETFLAGS) 1127COMPATIBLE_IOCTL(PPSETFLAGS)
1129/* pktcdvd */
1130COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
1131/* Big A */ 1128/* Big A */
1132/* sparc only */ 1129/* sparc only */
1133/* Big Q for sound/OSS */ 1130/* Big Q for sound/OSS */
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index bc7115403f38..bfc2e0f78f00 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -35,6 +35,7 @@
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/nsproxy.h> 37#include <linux/nsproxy.h>
38#include <linux/backing-dev.h>
38 39
39/* Version verification for shared data structures w/ userspace */ 40/* Version verification for shared data structures w/ userspace */
40#define ECRYPTFS_VERSION_MAJOR 0x00 41#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat {
393struct ecryptfs_sb_info { 394struct ecryptfs_sb_info {
394 struct super_block *wsi_sb; 395 struct super_block *wsi_sb;
395 struct ecryptfs_mount_crypt_stat mount_crypt_stat; 396 struct ecryptfs_mount_crypt_stat mount_crypt_stat;
397 struct backing_dev_info bdi;
396}; 398};
397 399
398/* file private data. */ 400/* file private data. */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index af1a8f01ebac..760983d0f25e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache;
497static int 497static int
498ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) 498ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
499{ 499{
500 struct ecryptfs_sb_info *esi;
500 int rc = 0; 501 int rc = 0;
501 502
502 /* Released in ecryptfs_put_super() */ 503 /* Released in ecryptfs_put_super() */
503 ecryptfs_set_superblock_private(sb, 504 ecryptfs_set_superblock_private(sb,
504 kmem_cache_zalloc(ecryptfs_sb_info_cache, 505 kmem_cache_zalloc(ecryptfs_sb_info_cache,
505 GFP_KERNEL)); 506 GFP_KERNEL));
506 if (!ecryptfs_superblock_to_private(sb)) { 507 esi = ecryptfs_superblock_to_private(sb);
508 if (!esi) {
507 ecryptfs_printk(KERN_WARNING, "Out of memory\n"); 509 ecryptfs_printk(KERN_WARNING, "Out of memory\n");
508 rc = -ENOMEM; 510 rc = -ENOMEM;
509 goto out; 511 goto out;
510 } 512 }
513
514 rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
515 if (rc)
516 goto out;
517
518 sb->s_bdi = &esi->bdi;
511 sb->s_op = &ecryptfs_sops; 519 sb->s_op = &ecryptfs_sops;
512 /* Released through deactivate_super(sb) from get_sb_nodev */ 520 /* Released through deactivate_super(sb) from get_sb_nodev */
513 sb->s_root = d_alloc(NULL, &(const struct qstr) { 521 sb->s_root = d_alloc(NULL, &(const struct qstr) {
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 278743c7716a..0c0ae491d231 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -122,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb)
122 lock_kernel(); 122 lock_kernel();
123 123
124 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); 124 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
125 bdi_destroy(&sb_info->bdi);
125 kmem_cache_free(ecryptfs_sb_info_cache, sb_info); 126 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
126 ecryptfs_set_superblock_private(sb, NULL); 127 ecryptfs_set_superblock_private(sb, NULL);
127 128
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b9..e6e94c626c2c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
1387 if (retval < 0) 1387 if (retval < 0)
1388 goto out; 1388 goto out;
1389 1389
1390 current->stack_start = current->mm->start_stack;
1391
1392 /* execve succeeded */ 1390 /* execve succeeded */
1393 current->fs->in_exec = 0; 1391 current->fs->in_exec = 0;
1394 current->in_execve = 0; 1392 current->in_execve = 0;
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 8442e353309f..22721b2fd890 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -35,6 +35,7 @@
35 35
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/backing-dev.h>
38#include "common.h" 39#include "common.h"
39 40
40/* FIXME: Remove once pnfs hits mainline 41/* FIXME: Remove once pnfs hits mainline
@@ -84,6 +85,7 @@ struct exofs_sb_info {
84 u32 s_next_generation; /* next gen # to use */ 85 u32 s_next_generation; /* next gen # to use */
85 atomic_t s_curr_pending; /* number of pending commands */ 86 atomic_t s_curr_pending; /* number of pending commands */
86 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ 87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
88 struct backing_dev_info bdi; /* register our bdi with VFS */
87 89
88 struct pnfs_osd_data_map data_map; /* Default raid to use 90 struct pnfs_osd_data_map data_map; /* Default raid to use
89 * FIXME: Needed ? 91 * FIXME: Needed ?
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 18e57ea1e5b4..03149b9a5178 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb)
302 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], 302 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
303 sbi->layout.s_pid); 303 sbi->layout.s_pid);
304 304
305 bdi_destroy(&sbi->bdi);
305 exofs_free_sbi(sbi); 306 exofs_free_sbi(sbi);
306 sb->s_fs_info = NULL; 307 sb->s_fs_info = NULL;
307} 308}
@@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
546 if (!sbi) 547 if (!sbi)
547 return -ENOMEM; 548 return -ENOMEM;
548 549
550 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
551 if (ret)
552 goto free_bdi;
553
549 /* use mount options to fill superblock */ 554 /* use mount options to fill superblock */
550 od = osduld_path_lookup(opts->dev_name); 555 od = osduld_path_lookup(opts->dev_name);
551 if (IS_ERR(od)) { 556 if (IS_ERR(od)) {
@@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
612 } 617 }
613 618
614 /* set up operation vectors */ 619 /* set up operation vectors */
620 sb->s_bdi = &sbi->bdi;
615 sb->s_fs_info = sbi; 621 sb->s_fs_info = sbi;
616 sb->s_op = &exofs_sops; 622 sb->s_op = &exofs_sops;
617 sb->s_export_op = &exofs_export_ops; 623 sb->s_export_op = &exofs_export_ops;
@@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
643 return 0; 649 return 0;
644 650
645free_sbi: 651free_sbi:
652 bdi_destroy(&sbi->bdi);
653free_bdi:
646 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 654 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
647 opts->dev_name, sbi->layout.s_pid, ret); 655 opts->dev_name, sbi->layout.s_pid, ret);
648 exofs_free_sbi(sbi); 656 exofs_free_sbi(sbi);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 94c8ee81f5e1..236b834b4ca8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
3879 physical += offset; 3879 physical += offset;
3880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset; 3880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3881 flags |= FIEMAP_EXTENT_DATA_INLINE; 3881 flags |= FIEMAP_EXTENT_DATA_INLINE;
3882 brelse(iloc.bh);
3882 } else { /* external block */ 3883 } else { /* external block */
3883 physical = EXT4_I(inode)->i_file_acl << blockbits; 3884 physical = EXT4_I(inode)->i_file_acl << blockbits;
3884 length = inode->i_sb->s_blocksize; 3885 length = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5381802d6052..81d605412844 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5375,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5375 } else { 5375 } else {
5376 struct ext4_iloc iloc; 5376 struct ext4_iloc iloc;
5377 5377
5378 err = ext4_get_inode_loc(inode, &iloc); 5378 err = __ext4_get_inode_loc(inode, &iloc, 0);
5379 if (err) 5379 if (err)
5380 return err; 5380 return err;
5381 if (wbc->sync_mode == WB_SYNC_ALL) 5381 if (wbc->sync_mode == WB_SYNC_ALL)
@@ -5386,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5386 (unsigned long long)iloc.bh->b_blocknr); 5386 (unsigned long long)iloc.bh->b_blocknr);
5387 err = -EIO; 5387 err = -EIO;
5388 } 5388 }
5389 brelse(iloc.bh);
5389 } 5390 }
5390 return err; 5391 return err;
5391} 5392}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bde9d0b170c2..b423a364dca3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2535,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2535 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2535 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2536 entry->count, entry->group, entry); 2536 entry->count, entry->group, entry);
2537 2537
2538 if (test_opt(sb, DISCARD)) {
2539 ext4_fsblk_t discard_block;
2540
2541 discard_block = entry->start_blk +
2542 ext4_group_first_block_no(sb, entry->group);
2543 trace_ext4_discard_blocks(sb,
2544 (unsigned long long)discard_block,
2545 entry->count);
2546 sb_issue_discard(sb, discard_block, entry->count);
2547 }
2548
2538 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2549 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2539 /* we expect to find existing buddy because it's pinned */ 2550 /* we expect to find existing buddy because it's pinned */
2540 BUG_ON(err != 0); 2551 BUG_ON(err != 0);
@@ -2556,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2556 page_cache_release(e4b.bd_bitmap_page); 2567 page_cache_release(e4b.bd_bitmap_page);
2557 } 2568 }
2558 ext4_unlock_group(sb, entry->group); 2569 ext4_unlock_group(sb, entry->group);
2559 if (test_opt(sb, DISCARD)) {
2560 ext4_fsblk_t discard_block;
2561
2562 discard_block = entry->start_blk +
2563 ext4_group_first_block_no(sb, entry->group);
2564 trace_ext4_discard_blocks(sb,
2565 (unsigned long long)discard_block,
2566 entry->count);
2567 sb_issue_discard(sb, discard_block, entry->count);
2568 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2570 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2571 ext4_mb_release_desc(&e4b);
2571 } 2572 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c751106c2e5..7faefb4da939 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
228 228
229#ifdef CONFIG_BLOCK 229#ifdef CONFIG_BLOCK
230 230
231#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) 231static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
232#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); 232{
233 return (offset >> inode->i_blkbits);
234}
235
236static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
237{
238 return (blk << inode->i_blkbits);
239}
233 240
234/** 241/**
235 * __generic_block_fiemap - FIEMAP for block based inodes (no locking) 242 * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
236 * @inode - the inode to map 243 * @inode: the inode to map
237 * @arg - the pointer to userspace where we copy everything to 244 * @fieinfo: the fiemap info struct that will be passed back to userspace
238 * @get_block - the fs's get_block function 245 * @start: where to start mapping in the inode
246 * @len: how much space to map
247 * @get_block: the fs's get_block function
239 * 248 *
240 * This does FIEMAP for block based inodes. Basically it will just loop 249 * This does FIEMAP for block based inodes. Basically it will just loop
241 * through get_block until we hit the number of extents we want to map, or we 250 * through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
250 */ 259 */
251 260
252int __generic_block_fiemap(struct inode *inode, 261int __generic_block_fiemap(struct inode *inode,
253 struct fiemap_extent_info *fieinfo, u64 start, 262 struct fiemap_extent_info *fieinfo, loff_t start,
254 u64 len, get_block_t *get_block) 263 loff_t len, get_block_t *get_block)
255{ 264{
256 struct buffer_head tmp; 265 struct buffer_head map_bh;
257 unsigned long long start_blk; 266 sector_t start_blk, last_blk;
258 long long length = 0, map_len = 0; 267 loff_t isize = i_size_read(inode);
259 u64 logical = 0, phys = 0, size = 0; 268 u64 logical = 0, phys = 0, size = 0;
260 u32 flags = FIEMAP_EXTENT_MERGED; 269 u32 flags = FIEMAP_EXTENT_MERGED;
261 int ret = 0, past_eof = 0, whole_file = 0; 270 bool past_eof = false, whole_file = false;
271 int ret = 0;
262 272
263 if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) 273 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
274 if (ret)
264 return ret; 275 return ret;
265 276
266 start_blk = logical_to_blk(inode, start); 277 /*
267 278 * Either the i_mutex or other appropriate locking needs to be held
268 length = (long long)min_t(u64, len, i_size_read(inode)); 279 * since we expect isize to not change at all through the duration of
269 if (length < len) 280 * this call.
270 whole_file = 1; 281 */
282 if (len >= isize) {
283 whole_file = true;
284 len = isize;
285 }
271 286
272 map_len = length; 287 start_blk = logical_to_blk(inode, start);
288 last_blk = logical_to_blk(inode, start + len - 1);
273 289
274 do { 290 do {
275 /* 291 /*
276 * we set b_size to the total size we want so it will map as 292 * we set b_size to the total size we want so it will map as
277 * many contiguous blocks as possible at once 293 * many contiguous blocks as possible at once
278 */ 294 */
279 memset(&tmp, 0, sizeof(struct buffer_head)); 295 memset(&map_bh, 0, sizeof(struct buffer_head));
280 tmp.b_size = map_len; 296 map_bh.b_size = len;
281 297
282 ret = get_block(inode, start_blk, &tmp, 0); 298 ret = get_block(inode, start_blk, &map_bh, 0);
283 if (ret) 299 if (ret)
284 break; 300 break;
285 301
286 /* HOLE */ 302 /* HOLE */
287 if (!buffer_mapped(&tmp)) { 303 if (!buffer_mapped(&map_bh)) {
288 length -= blk_to_logical(inode, 1);
289 start_blk++; 304 start_blk++;
290 305
291 /* 306 /*
292 * we want to handle the case where there is an 307 * We want to handle the case where there is an
293 * allocated block at the front of the file, and then 308 * allocated block at the front of the file, and then
294 * nothing but holes up to the end of the file properly, 309 * nothing but holes up to the end of the file properly,
295 * to make sure that extent at the front gets properly 310 * to make sure that extent at the front gets properly
296 * marked with FIEMAP_EXTENT_LAST 311 * marked with FIEMAP_EXTENT_LAST
297 */ 312 */
298 if (!past_eof && 313 if (!past_eof &&
299 blk_to_logical(inode, start_blk) >= 314 blk_to_logical(inode, start_blk) >= isize)
300 blk_to_logical(inode, 0)+i_size_read(inode))
301 past_eof = 1; 315 past_eof = 1;
302 316
303 /* 317 /*
304 * first hole after going past the EOF, this is our 318 * First hole after going past the EOF, this is our
305 * last extent 319 * last extent
306 */ 320 */
307 if (past_eof && size) { 321 if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
309 ret = fiemap_fill_next_extent(fieinfo, logical, 323 ret = fiemap_fill_next_extent(fieinfo, logical,
310 phys, size, 324 phys, size,
311 flags); 325 flags);
312 break; 326 } else if (size) {
327 ret = fiemap_fill_next_extent(fieinfo, logical,
328 phys, size, flags);
329 size = 0;
313 } 330 }
314 331
315 /* if we have holes up to/past EOF then we're done */ 332 /* if we have holes up to/past EOF then we're done */
316 if (length <= 0 || past_eof) 333 if (start_blk > last_blk || past_eof || ret)
317 break; 334 break;
318 } else { 335 } else {
319 /* 336 /*
320 * we have gone over the length of what we wanted to 337 * We have gone over the length of what we wanted to
321 * map, and it wasn't the entire file, so add the extent 338 * map, and it wasn't the entire file, so add the extent
322 * we got last time and exit. 339 * we got last time and exit.
323 * 340 *
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
331 * are good to go, just add the extent to the fieinfo 348 * are good to go, just add the extent to the fieinfo
332 * and break 349 * and break
333 */ 350 */
334 if (length <= 0 && !whole_file) { 351 if (start_blk > last_blk && !whole_file) {
335 ret = fiemap_fill_next_extent(fieinfo, logical, 352 ret = fiemap_fill_next_extent(fieinfo, logical,
336 phys, size, 353 phys, size,
337 flags); 354 flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
351 } 368 }
352 369
353 logical = blk_to_logical(inode, start_blk); 370 logical = blk_to_logical(inode, start_blk);
354 phys = blk_to_logical(inode, tmp.b_blocknr); 371 phys = blk_to_logical(inode, map_bh.b_blocknr);
355 size = tmp.b_size; 372 size = map_bh.b_size;
356 flags = FIEMAP_EXTENT_MERGED; 373 flags = FIEMAP_EXTENT_MERGED;
357 374
358 length -= tmp.b_size;
359 start_blk += logical_to_blk(inode, size); 375 start_blk += logical_to_blk(inode, size);
360 376
361 /* 377 /*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
363 * soon as we find a hole that the last extent we found 379 * soon as we find a hole that the last extent we found
364 * is marked with FIEMAP_EXTENT_LAST 380 * is marked with FIEMAP_EXTENT_LAST
365 */ 381 */
366 if (!past_eof && 382 if (!past_eof && logical + size >= isize)
367 logical+size >= 383 past_eof = true;
368 blk_to_logical(inode, 0)+i_size_read(inode))
369 past_eof = 1;
370 } 384 }
371 cond_resched(); 385 cond_resched();
372 } while (1); 386 } while (1);
373 387
374 /* if ret is 1 then we just hit the end of the extent array */ 388 /* If ret is 1 then we just hit the end of the extent array */
375 if (ret == 1) 389 if (ret == 1)
376 ret = 0; 390 ret = 0;
377 391
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9dd126276c9f..ed9ba6fe04f5 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
61 inode->i_op = &page_symlink_inode_operations; 61 inode->i_op = &page_symlink_inode_operations;
62 inode->i_mapping->a_ops = &jfs_aops; 62 inode->i_mapping->a_ops = &jfs_aops;
63 } else { 63 } else {
64 inode->i_op = &jfs_symlink_inode_operations; 64 inode->i_op = &jfs_fast_symlink_inode_operations;
65 /* 65 /*
66 * The inline data should be null-terminated, but 66 * The inline data should be null-terminated, but
67 * don't let on-disk corruption crash the kernel 67 * don't let on-disk corruption crash the kernel
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 6c4dfcbf3f55..9e2f6a721668 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -196,7 +196,7 @@ int dbMount(struct inode *ipbmap)
196 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); 196 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
197 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); 197 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
198 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); 198 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
199 bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); 199 bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
200 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); 200 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
201 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); 201 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
202 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); 202 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
@@ -288,7 +288,7 @@ int dbSync(struct inode *ipbmap)
288 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); 288 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
289 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); 289 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
290 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); 290 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
291 dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); 291 dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
292 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); 292 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
293 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); 293 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
294 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); 294 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
@@ -1441,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1441 * tree index of this allocation group within the control page. 1441 * tree index of this allocation group within the control page.
1442 */ 1442 */
1443 agperlev = 1443 agperlev =
1444 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; 1444 (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
1445 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); 1445 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1446 1446
1447 /* dmap control page trees fan-out by 4 and a single allocation 1447 /* dmap control page trees fan-out by 4 and a single allocation
@@ -1460,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1460 * the subtree to find the leftmost leaf that describes this 1460 * the subtree to find the leftmost leaf that describes this
1461 * free space. 1461 * free space.
1462 */ 1462 */
1463 for (k = bmp->db_agheigth; k > 0; k--) { 1463 for (k = bmp->db_agheight; k > 0; k--) {
1464 for (n = 0, m = (ti << 2) + 1; n < 4; n++) { 1464 for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
1465 if (l2nb <= dcp->stree[m + n]) { 1465 if (l2nb <= dcp->stree[m + n]) {
1466 ti = m + n; 1466 ti = m + n;
@@ -3607,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
3607 } 3607 }
3608 3608
3609 /* 3609 /*
3610 * compute db_aglevel, db_agheigth, db_width, db_agstart: 3610 * compute db_aglevel, db_agheight, db_width, db_agstart:
3611 * an ag is covered in aglevel dmapctl summary tree, 3611 * an ag is covered in aglevel dmapctl summary tree,
3612 * at agheight level height (from leaf) with agwidth number of nodes 3612 * at agheight level height (from leaf) with agwidth number of nodes
3613 * each, which starts at agstart index node of the smmary tree node 3613 * each, which starts at agstart index node of the smmary tree node
@@ -3616,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap)
3616 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); 3616 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
3617 l2nl = 3617 l2nl =
3618 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); 3618 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
3619 bmp->db_agheigth = l2nl >> 1; 3619 bmp->db_agheight = l2nl >> 1;
3620 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); 3620 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
3621 for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; 3621 for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
3622 i--) { 3622 i--) {
3623 bmp->db_agstart += n; 3623 bmp->db_agstart += n;
3624 n <<= 2; 3624 n <<= 2;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 1a6eb41569bc..6dcb906c55d8 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -210,7 +210,7 @@ struct dbmap_disk {
210 __le32 dn_maxag; /* 4: max active alloc group number */ 210 __le32 dn_maxag; /* 4: max active alloc group number */
211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */ 211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */
212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ 212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */
213 __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ 213 __le32 dn_agheight; /* 4: height in dmapctl of the AG */
214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ 214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */
215 __le32 dn_agstart; /* 4: start tree index at AG height */ 215 __le32 dn_agstart; /* 4: start tree index at AG height */
216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ 216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
@@ -229,7 +229,7 @@ struct dbmap {
229 int dn_maxag; /* max active alloc group number */ 229 int dn_maxag; /* max active alloc group number */
230 int dn_agpref; /* preferred alloc group (hint) */ 230 int dn_agpref; /* preferred alloc group (hint) */
231 int dn_aglevel; /* dmapctl level holding the AG */ 231 int dn_aglevel; /* dmapctl level holding the AG */
232 int dn_agheigth; /* height in dmapctl of the AG */ 232 int dn_agheight; /* height in dmapctl of the AG */
233 int dn_agwidth; /* width in dmapctl of the AG */ 233 int dn_agwidth; /* width in dmapctl of the AG */
234 int dn_agstart; /* start tree index at AG height */ 234 int dn_agstart; /* start tree index at AG height */
235 int dn_agl2size; /* l2 num of blks per alloc group */ 235 int dn_agl2size; /* l2 num of blks per alloc group */
@@ -255,7 +255,7 @@ struct bmap {
255#define db_agsize db_bmap.dn_agsize 255#define db_agsize db_bmap.dn_agsize
256#define db_agl2size db_bmap.dn_agl2size 256#define db_agl2size db_bmap.dn_agl2size
257#define db_agwidth db_bmap.dn_agwidth 257#define db_agwidth db_bmap.dn_agwidth
258#define db_agheigth db_bmap.dn_agheigth 258#define db_agheight db_bmap.dn_agheight
259#define db_agstart db_bmap.dn_agstart 259#define db_agstart db_bmap.dn_agstart
260#define db_numag db_bmap.dn_numag 260#define db_numag db_bmap.dn_numag
261#define db_maxlevel db_bmap.dn_maxlevel 261#define db_maxlevel db_bmap.dn_maxlevel
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 79e2c79661df..9e6bda30a6e8 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations;
48extern const struct inode_operations jfs_file_inode_operations; 48extern const struct inode_operations jfs_file_inode_operations;
49extern const struct file_operations jfs_file_operations; 49extern const struct file_operations jfs_file_operations;
50extern const struct inode_operations jfs_symlink_inode_operations; 50extern const struct inode_operations jfs_symlink_inode_operations;
51extern const struct inode_operations jfs_fast_symlink_inode_operations;
51extern const struct dentry_operations jfs_ci_dentry_operations; 52extern const struct dentry_operations jfs_ci_dentry_operations;
52#endif /* _H_JFS_INODE */ 53#endif /* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 4a3e9f39c21d..a9cf8e8675be 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
956 */ 956 */
957 957
958 if (ssize <= IDATASIZE) { 958 if (ssize <= IDATASIZE) {
959 ip->i_op = &jfs_symlink_inode_operations; 959 ip->i_op = &jfs_fast_symlink_inode_operations;
960 960
961 i_fastsymlink = JFS_IP(ip)->i_inline; 961 i_fastsymlink = JFS_IP(ip)->i_inline;
962 memcpy(i_fastsymlink, name, ssize); 962 memcpy(i_fastsymlink, name, ssize);
@@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
978 else { 978 else {
979 jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); 979 jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
980 980
981 ip->i_op = &page_symlink_inode_operations; 981 ip->i_op = &jfs_symlink_inode_operations;
982 ip->i_mapping->a_ops = &jfs_aops; 982 ip->i_mapping->a_ops = &jfs_aops;
983 983
984 /* 984 /*
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 7f24a0bb08ca..1aba0039f1c9 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
81 struct inode *iplist[1]; 81 struct inode *iplist[1];
82 struct jfs_superblock *j_sb, *j_sb2; 82 struct jfs_superblock *j_sb, *j_sb2;
83 uint old_agsize; 83 uint old_agsize;
84 int agsizechanged = 0;
84 struct buffer_head *bh, *bh2; 85 struct buffer_head *bh, *bh2;
85 86
86 /* If the volume hasn't grown, get out now */ 87 /* If the volume hasn't grown, get out now */
@@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
333 */ 334 */
334 if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) 335 if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
335 goto error_out; 336 goto error_out;
337
338 agsizechanged |= (bmp->db_agsize != old_agsize);
339
336 /* 340 /*
337 * the map now has extended to cover additional nblocks: 341 * the map now has extended to cover additional nblocks:
338 * dn_mapsize = oldMapsize + nblocks; 342 * dn_mapsize = oldMapsize + nblocks;
@@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
432 * will correctly identify the new ag); 436 * will correctly identify the new ag);
433 */ 437 */
434 /* if new AG size the same as old AG size, done! */ 438 /* if new AG size the same as old AG size, done! */
435 if (bmp->db_agsize != old_agsize) { 439 if (agsizechanged) {
436 if ((rc = diExtendFS(ipimap, ipbmap))) 440 if ((rc = diExtendFS(ipimap, ipbmap)))
437 goto error_out; 441 goto error_out;
438 442
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 4af1a05aad0a..205b946d8e0d 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
29 return NULL; 29 return NULL;
30} 30}
31 31
32const struct inode_operations jfs_symlink_inode_operations = { 32const struct inode_operations jfs_fast_symlink_inode_operations = {
33 .readlink = generic_readlink, 33 .readlink = generic_readlink,
34 .follow_link = jfs_follow_link, 34 .follow_link = jfs_follow_link,
35 .setattr = jfs_setattr,
36 .setxattr = jfs_setxattr,
37 .getxattr = jfs_getxattr,
38 .listxattr = jfs_listxattr,
39 .removexattr = jfs_removexattr,
40};
41
42const struct inode_operations jfs_symlink_inode_operations = {
43 .readlink = generic_readlink,
44 .follow_link = page_follow_link_light,
45 .put_link = page_put_link,
46 .setattr = jfs_setattr,
35 .setxattr = jfs_setxattr, 47 .setxattr = jfs_setxattr,
36 .getxattr = jfs_getxattr, 48 .getxattr = jfs_getxattr,
37 .listxattr = jfs_listxattr, 49 .listxattr = jfs_listxattr,
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 84e36f52fe95..76c242fbe1b0 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -459,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target)
459 struct logfs_block *block; 459 struct logfs_block *block;
460 int round, progress, last_progress = 0; 460 int round, progress, last_progress = 0;
461 461
462 /*
463 * Doing too many changes to the segfile at once would result
464 * in a large number of aliases. Write the journal before
465 * things get out of hand.
466 */
467 if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
468 logfs_write_anchor(sb);
469
462 if (no_free_segments(sb) >= target && 470 if (no_free_segments(sb) >= target &&
463 super->s_no_object_aliases < MAX_OBJ_ALIASES) 471 super->s_no_object_aliases < MAX_OBJ_ALIASES)
464 return; 472 return;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 33bd260b8309..fb0a613f885b 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -389,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area)
389static int journal_erase_segment(struct logfs_area *area) 389static int journal_erase_segment(struct logfs_area *area)
390{ 390{
391 struct super_block *sb = area->a_sb; 391 struct super_block *sb = area->a_sb;
392 struct logfs_segment_header sh; 392 union {
393 struct logfs_segment_header sh;
394 unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)];
395 } u;
393 u64 ofs; 396 u64 ofs;
394 int err; 397 int err;
395 398
@@ -397,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area)
397 if (err) 400 if (err)
398 return err; 401 return err;
399 402
400 sh.pad = 0; 403 memset(&u, 0, sizeof(u));
401 sh.type = SEG_JOURNAL; 404 u.sh.pad = 0;
402 sh.level = 0; 405 u.sh.type = SEG_JOURNAL;
403 sh.segno = cpu_to_be32(area->a_segno); 406 u.sh.level = 0;
404 sh.ec = cpu_to_be32(area->a_erase_count); 407 u.sh.segno = cpu_to_be32(area->a_segno);
405 sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); 408 u.sh.ec = cpu_to_be32(area->a_erase_count);
406 sh.crc = logfs_crc32(&sh, sizeof(sh), 4); 409 u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
410 u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4);
407 411
408 /* This causes a bug in segment.c. Not yet. */ 412 /* This causes a bug in segment.c. Not yet. */
409 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); 413 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
410 414
411 ofs = dev_ofs(sb, area->a_segno, 0); 415 ofs = dev_ofs(sb, area->a_segno, 0);
412 area->a_used_bytes = ALIGN(sizeof(sh), 16); 416 area->a_used_bytes = sizeof(u);
413 logfs_buf_write(area, ofs, &sh, sizeof(sh)); 417 logfs_buf_write(area, ofs, &u, sizeof(u));
414 return 0; 418 return 0;
415} 419}
416 420
@@ -494,6 +498,8 @@ static void account_shadows(struct super_block *sb)
494 498
495 btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); 499 btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
496 btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); 500 btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
501 btree_grim_visitor32(&tree->segment_map, 0, NULL);
502 tree->no_shadowed_segments = 0;
497 503
498 if (li->li_block) { 504 if (li->li_block) {
499 /* 505 /*
@@ -607,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
607 if (len == 0) 613 if (len == 0)
608 return logfs_write_header(super, header, 0, type); 614 return logfs_write_header(super, header, 0, type);
609 615
616 BUG_ON(len > sb->s_blocksize);
610 compr_len = logfs_compress(buf, data, len, sb->s_blocksize); 617 compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
611 if (compr_len < 0 || type == JE_ANCHOR) { 618 if (compr_len < 0 || type == JE_ANCHOR) {
612 BUG_ON(len > sb->s_blocksize);
613 memcpy(data, buf, len); 619 memcpy(data, buf, len);
614 compr_len = len; 620 compr_len = len;
615 compr = COMPR_NONE; 621 compr = COMPR_NONE;
@@ -661,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
661 if (ofs < 0) 667 if (ofs < 0)
662 return ofs; 668 return ofs;
663 logfs_buf_write(area, ofs, super->s_compressed_je, len); 669 logfs_buf_write(area, ofs, super->s_compressed_je, len);
670 BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES);
664 super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); 671 super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
665 return 0; 672 return 0;
666} 673}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b84b0eec6024..0a3df1a0c936 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -257,10 +257,14 @@ struct logfs_shadow {
257 * struct shadow_tree 257 * struct shadow_tree
258 * @new: shadows where old_ofs==0, indexed by new_ofs 258 * @new: shadows where old_ofs==0, indexed by new_ofs
259 * @old: shadows where old_ofs!=0, indexed by old_ofs 259 * @old: shadows where old_ofs!=0, indexed by old_ofs
260 * @segment_map: bitfield of segments containing shadows
261 * @no_shadowed_segment: number of segments containing shadows
260 */ 262 */
261struct shadow_tree { 263struct shadow_tree {
262 struct btree_head64 new; 264 struct btree_head64 new;
263 struct btree_head64 old; 265 struct btree_head64 old;
266 struct btree_head32 segment_map;
267 int no_shadowed_segments;
264}; 268};
265 269
266struct object_alias_item { 270struct object_alias_item {
@@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
305 level_t level, int child_no, __be64 val); 309 level_t level, int child_no, __be64 val);
306struct logfs_block_ops { 310struct logfs_block_ops {
307 void (*write_block)(struct logfs_block *block); 311 void (*write_block)(struct logfs_block *block);
308 gc_level_t (*block_level)(struct logfs_block *block);
309 void (*free_block)(struct super_block *sb, struct logfs_block*block); 312 void (*free_block)(struct super_block *sb, struct logfs_block*block);
310 int (*write_alias)(struct super_block *sb, 313 int (*write_alias)(struct super_block *sb,
311 struct logfs_block *block, 314 struct logfs_block *block,
312 write_alias_t *write_one_alias); 315 write_alias_t *write_one_alias);
313}; 316};
314 317
318#define MAX_JOURNAL_ENTRIES 256
319
315struct logfs_super { 320struct logfs_super {
316 struct mtd_info *s_mtd; /* underlying device */ 321 struct mtd_info *s_mtd; /* underlying device */
317 struct block_device *s_bdev; /* underlying device */ 322 struct block_device *s_bdev; /* underlying device */
@@ -378,7 +383,7 @@ struct logfs_super {
378 u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ 383 u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
379 u64 s_last_version; 384 u64 s_last_version;
380 struct logfs_area *s_journal_area; /* open journal segment */ 385 struct logfs_area *s_journal_area; /* open journal segment */
381 __be64 s_je_array[64]; 386 __be64 s_je_array[MAX_JOURNAL_ENTRIES];
382 int s_no_je; 387 int s_no_je;
383 388
384 int s_sum_index; /* for the 12 summaries */ 389 int s_sum_index; /* for the 12 summaries */
@@ -722,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb,
722 return logfs_super(sb)->s_area[(__force u8)gc_level]; 727 return logfs_super(sb)->s_area[(__force u8)gc_level];
723} 728}
724 729
730static inline void logfs_mempool_destroy(mempool_t *pool)
731{
732 if (pool)
733 mempool_destroy(pool);
734}
735
725#endif 736#endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index bff40253dfb2..3159db6958e5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -430,25 +430,6 @@ static void inode_write_block(struct logfs_block *block)
430 } 430 }
431} 431}
432 432
433static gc_level_t inode_block_level(struct logfs_block *block)
434{
435 BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
436 return GC_LEVEL(LOGFS_MAX_LEVELS);
437}
438
439static gc_level_t indirect_block_level(struct logfs_block *block)
440{
441 struct page *page;
442 struct inode *inode;
443 u64 bix;
444 level_t level;
445
446 page = block->page;
447 inode = page->mapping->host;
448 logfs_unpack_index(page->index, &bix, &level);
449 return expand_level(inode->i_ino, level);
450}
451
452/* 433/*
453 * This silences a false, yet annoying gcc warning. I hate it when my editor 434 * This silences a false, yet annoying gcc warning. I hate it when my editor
454 * jumps into bitops.h each time I recompile this file. 435 * jumps into bitops.h each time I recompile this file.
@@ -587,14 +568,12 @@ static void indirect_free_block(struct super_block *sb,
587 568
588static struct logfs_block_ops inode_block_ops = { 569static struct logfs_block_ops inode_block_ops = {
589 .write_block = inode_write_block, 570 .write_block = inode_write_block,
590 .block_level = inode_block_level,
591 .free_block = inode_free_block, 571 .free_block = inode_free_block,
592 .write_alias = inode_write_alias, 572 .write_alias = inode_write_alias,
593}; 573};
594 574
595struct logfs_block_ops indirect_block_ops = { 575struct logfs_block_ops indirect_block_ops = {
596 .write_block = indirect_write_block, 576 .write_block = indirect_write_block,
597 .block_level = indirect_block_level,
598 .free_block = indirect_free_block, 577 .free_block = indirect_free_block,
599 .write_alias = indirect_write_alias, 578 .write_alias = indirect_write_alias,
600}; 579};
@@ -1241,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
1241 mempool_free(shadow, super->s_shadow_pool); 1220 mempool_free(shadow, super->s_shadow_pool);
1242} 1221}
1243 1222
1223static void mark_segment(struct shadow_tree *tree, u32 segno)
1224{
1225 int err;
1226
1227 if (!btree_lookup32(&tree->segment_map, segno)) {
1228 err = btree_insert32(&tree->segment_map, segno, (void *)1,
1229 GFP_NOFS);
1230 BUG_ON(err);
1231 tree->no_shadowed_segments++;
1232 }
1233}
1234
1244/** 1235/**
1245 * fill_shadow_tree - Propagate shadow tree changes due to a write 1236 * fill_shadow_tree - Propagate shadow tree changes due to a write
1246 * @inode: Inode owning the page 1237 * @inode: Inode owning the page
@@ -1288,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page,
1288 1279
1289 super->s_dirty_used_bytes += shadow->new_len; 1280 super->s_dirty_used_bytes += shadow->new_len;
1290 super->s_dirty_free_bytes += shadow->old_len; 1281 super->s_dirty_free_bytes += shadow->old_len;
1282 mark_segment(tree, shadow->old_ofs >> super->s_segshift);
1283 mark_segment(tree, shadow->new_ofs >> super->s_segshift);
1291 } 1284 }
1292} 1285}
1293 1286
@@ -1845,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size)
1845 return logfs_truncate_direct(inode, size); 1838 return logfs_truncate_direct(inode, size);
1846} 1839}
1847 1840
1848int logfs_truncate(struct inode *inode, u64 size) 1841/*
1842 * Truncate, by changing the segment file, can consume a fair amount
1843 * of resources. So back off from time to time and do some GC.
1844 * 8 or 2048 blocks should be well within safety limits even if
1845 * every single block resided in a different segment.
1846 */
1847#define TRUNCATE_STEP (8 * 1024 * 1024)
1848int logfs_truncate(struct inode *inode, u64 target)
1849{ 1849{
1850 struct super_block *sb = inode->i_sb; 1850 struct super_block *sb = inode->i_sb;
1851 int err; 1851 u64 size = i_size_read(inode);
1852 int err = 0;
1852 1853
1853 logfs_get_wblocks(sb, NULL, 1); 1854 size = ALIGN(size, TRUNCATE_STEP);
1854 err = __logfs_truncate(inode, size); 1855 while (size > target) {
1855 if (!err) 1856 if (size > TRUNCATE_STEP)
1856 err = __logfs_write_inode(inode, 0); 1857 size -= TRUNCATE_STEP;
1857 logfs_put_wblocks(sb, NULL, 1); 1858 else
1859 size = 0;
1860 if (size < target)
1861 size = target;
1862
1863 logfs_get_wblocks(sb, NULL, 1);
1864 err = __logfs_truncate(inode, target);
1865 if (!err)
1866 err = __logfs_write_inode(inode, 0);
1867 logfs_put_wblocks(sb, NULL, 1);
1868 }
1858 1869
1859 if (!err) 1870 if (!err)
1860 err = vmtruncate(inode, size); 1871 err = vmtruncate(inode, target);
1861 1872
1862 /* I don't trust error recovery yet. */ 1873 /* I don't trust error recovery yet. */
1863 WARN_ON(err); 1874 WARN_ON(err);
@@ -2251,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb)
2251 struct logfs_super *super = logfs_super(sb); 2262 struct logfs_super *super = logfs_super(sb);
2252 2263
2253 destroy_meta_inode(super->s_segfile_inode); 2264 destroy_meta_inode(super->s_segfile_inode);
2254 if (super->s_block_pool) 2265 logfs_mempool_destroy(super->s_block_pool);
2255 mempool_destroy(super->s_block_pool); 2266 logfs_mempool_destroy(super->s_shadow_pool);
2256 if (super->s_shadow_pool)
2257 mempool_destroy(super->s_shadow_pool);
2258} 2267}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 801a3a141625..f77ce2b470ba 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -183,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
183 return 0; 183 return 0;
184} 184}
185 185
186static gc_level_t btree_block_level(struct logfs_block *block)
187{
188 return expand_level(block->ino, block->level);
189}
190
191static struct logfs_block_ops btree_block_ops = { 186static struct logfs_block_ops btree_block_ops = {
192 .write_block = btree_write_block, 187 .write_block = btree_write_block,
193 .block_level = btree_block_level,
194 .free_block = __free_block, 188 .free_block = __free_block,
195 .write_alias = btree_write_alias, 189 .write_alias = btree_write_alias,
196}; 190};
@@ -919,7 +913,7 @@ err:
919 for (i--; i >= 0; i--) 913 for (i--; i >= 0; i--)
920 free_area(super->s_area[i]); 914 free_area(super->s_area[i]);
921 free_area(super->s_journal_area); 915 free_area(super->s_journal_area);
922 mempool_destroy(super->s_alias_pool); 916 logfs_mempool_destroy(super->s_alias_pool);
923 return -ENOMEM; 917 return -ENOMEM;
924} 918}
925 919
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index b60bfac3263c..5866ee6e1327 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -12,6 +12,7 @@
12#include "logfs.h" 12#include "logfs.h"
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/blkdev.h>
15#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
16#include <linux/statfs.h> 17#include <linux/statfs.h>
17#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
@@ -137,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
137 sb->s_fs_info = super; 138 sb->s_fs_info = super;
138 sb->s_mtd = super->s_mtd; 139 sb->s_mtd = super->s_mtd;
139 sb->s_bdev = super->s_bdev; 140 sb->s_bdev = super->s_bdev;
141 if (sb->s_bdev)
142 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
143 if (sb->s_mtd)
144 sb->s_bdi = sb->s_mtd->backing_dev_info;
140 return 0; 145 return 0;
141} 146}
142 147
@@ -452,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
452 457
453 btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); 458 btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
454 btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); 459 btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
460 btree_init_mempool32(&super->s_shadow_tree.segment_map,
461 super->s_btree_pool);
455 462
456 ret = logfs_init_mapping(sb); 463 ret = logfs_init_mapping(sb);
457 if (ret) 464 if (ret)
@@ -516,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb)
516 if (super->s_erase_page) 523 if (super->s_erase_page)
517 __free_page(super->s_erase_page); 524 __free_page(super->s_erase_page);
518 super->s_devops->put_device(sb); 525 super->s_devops->put_device(sb);
519 mempool_destroy(super->s_btree_pool); 526 logfs_mempool_destroy(super->s_btree_pool);
520 mempool_destroy(super->s_alias_pool); 527 logfs_mempool_destroy(super->s_alias_pool);
521 kfree(super); 528 kfree(super);
522 log_super("LogFS: Finished unmounting\n"); 529 log_super("LogFS: Finished unmounting\n");
523} 530}
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91a7e42..16df7277a92e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1641 if (nd->last.name[nd->last.len]) { 1641 if (nd->last.name[nd->last.len]) {
1642 if (open_flag & O_CREAT) 1642 if (open_flag & O_CREAT)
1643 goto exit; 1643 goto exit;
1644 nd->flags |= LOOKUP_DIRECTORY; 1644 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1645 } 1645 }
1646 1646
1647 /* just plain open? */ 1647 /* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
1830 } 1830 }
1831 if (open_flag & O_DIRECTORY) 1831 if (open_flag & O_DIRECTORY)
1832 nd.flags |= LOOKUP_DIRECTORY; 1832 nd.flags |= LOOKUP_DIRECTORY;
1833 if (!(open_flag & O_NOFOLLOW))
1834 nd.flags |= LOOKUP_FOLLOW;
1833 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 1835 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1834 while (unlikely(!filp)) { /* trailing symlink */ 1836 while (unlikely(!filp)) { /* trailing symlink */
1835 struct path holder; 1837 struct path holder;
@@ -1837,7 +1839,7 @@ reval:
1837 void *cookie; 1839 void *cookie;
1838 error = -ELOOP; 1840 error = -ELOOP;
1839 /* S_ISDIR part is a temporary automount kludge */ 1841 /* S_ISDIR part is a temporary automount kludge */
1840 if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) 1842 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
1841 goto exit_dput; 1843 goto exit_dput;
1842 if (count++ == 32) 1844 if (count++ == 32)
1843 goto exit_dput; 1845 goto exit_dput;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cf98da1be23e..fa3385154023 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
526 sb->s_blocksize_bits = 10; 526 sb->s_blocksize_bits = 10;
527 sb->s_magic = NCP_SUPER_MAGIC; 527 sb->s_magic = NCP_SUPER_MAGIC;
528 sb->s_op = &ncp_sops; 528 sb->s_op = &ncp_sops;
529 sb->s_bdi = &server->bdi;
529 530
530 server = NCP_SBP(sb); 531 server = NCP_SBP(sb);
531 memset(server, 0, sizeof(*server)); 532 memset(server, 0, sizeof(*server));
532 533
534 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
535 if (error)
536 goto out_bdi;
537
533 server->ncp_filp = ncp_filp; 538 server->ncp_filp = ncp_filp;
534 server->ncp_sock = sock; 539 server->ncp_sock = sock;
535 540
@@ -719,6 +724,8 @@ out_fput2:
719 if (server->info_filp) 724 if (server->info_filp)
720 fput(server->info_filp); 725 fput(server->info_filp);
721out_fput: 726out_fput:
727 bdi_destroy(&server->bdi);
728out_bdi:
722 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 729 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
723 * 730 *
724 * The previously used put_filp(ncp_filp); was bogous, since 731 * The previously used put_filp(ncp_filp); was bogous, since
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb)
756 kill_pid(server->m.wdog_pid, SIGTERM, 1); 763 kill_pid(server->m.wdog_pid, SIGTERM, 1);
757 put_pid(server->m.wdog_pid); 764 put_pid(server->m.wdog_pid);
758 765
766 bdi_destroy(&server->bdi);
759 kfree(server->priv.data); 767 kfree(server->priv.data);
760 kfree(server->auth.object_name); 768 kfree(server->auth.object_name);
761 vfree(server->rxbuf); 769 vfree(server->rxbuf);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a8766c4ef2e0..acc9c4943b84 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -966,6 +966,8 @@ out_error:
966static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) 966static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
967{ 967{
968 target->flags = source->flags; 968 target->flags = source->flags;
969 target->rsize = source->rsize;
970 target->wsize = source->wsize;
969 target->acregmin = source->acregmin; 971 target->acregmin = source->acregmin;
970 target->acregmax = source->acregmax; 972 target->acregmax = source->acregmax;
971 target->acdirmin = source->acdirmin; 973 target->acdirmin = source->acdirmin;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 15671245c6ee..ea61d26e7871 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
24 24
25static void nfs_do_free_delegation(struct nfs_delegation *delegation) 25static void nfs_do_free_delegation(struct nfs_delegation *delegation)
26{ 26{
27 if (delegation->cred)
28 put_rpccred(delegation->cred);
27 kfree(delegation); 29 kfree(delegation);
28} 30}
29 31
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
36 38
37static void nfs_free_delegation(struct nfs_delegation *delegation) 39static void nfs_free_delegation(struct nfs_delegation *delegation)
38{ 40{
39 struct rpc_cred *cred;
40
41 cred = rcu_dereference(delegation->cred);
42 rcu_assign_pointer(delegation->cred, NULL);
43 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 41 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
44 if (cred)
45 put_rpccred(cred);
46} 42}
47 43
48void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) 44void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -129,21 +125,35 @@ again:
129 */ 125 */
130void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 126void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
131{ 127{
132 struct nfs_delegation *delegation = NFS_I(inode)->delegation; 128 struct nfs_delegation *delegation;
133 struct rpc_cred *oldcred; 129 struct rpc_cred *oldcred = NULL;
134 130
135 if (delegation == NULL) 131 rcu_read_lock();
136 return; 132 delegation = rcu_dereference(NFS_I(inode)->delegation);
137 memcpy(delegation->stateid.data, res->delegation.data, 133 if (delegation != NULL) {
138 sizeof(delegation->stateid.data)); 134 spin_lock(&delegation->lock);
139 delegation->type = res->delegation_type; 135 if (delegation->inode != NULL) {
140 delegation->maxsize = res->maxsize; 136 memcpy(delegation->stateid.data, res->delegation.data,
141 oldcred = delegation->cred; 137 sizeof(delegation->stateid.data));
142 delegation->cred = get_rpccred(cred); 138 delegation->type = res->delegation_type;
143 clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); 139 delegation->maxsize = res->maxsize;
144 NFS_I(inode)->delegation_state = delegation->type; 140 oldcred = delegation->cred;
145 smp_wmb(); 141 delegation->cred = get_rpccred(cred);
146 put_rpccred(oldcred); 142 clear_bit(NFS_DELEGATION_NEED_RECLAIM,
143 &delegation->flags);
144 NFS_I(inode)->delegation_state = delegation->type;
145 spin_unlock(&delegation->lock);
146 put_rpccred(oldcred);
147 rcu_read_unlock();
148 } else {
149 /* We appear to have raced with a delegation return. */
150 spin_unlock(&delegation->lock);
151 rcu_read_unlock();
152 nfs_inode_set_delegation(inode, cred, res);
153 }
154 } else {
155 rcu_read_unlock();
156 }
147} 157}
148 158
149static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 159static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
166 return inode; 176 return inode;
167} 177}
168 178
169static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) 179static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
180 const nfs4_stateid *stateid,
181 struct nfs_client *clp)
170{ 182{
171 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); 183 struct nfs_delegation *delegation =
184 rcu_dereference_protected(nfsi->delegation,
185 lockdep_is_held(&clp->cl_lock));
172 186
173 if (delegation == NULL) 187 if (delegation == NULL)
174 goto nomatch; 188 goto nomatch;
@@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
195{ 209{
196 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 210 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
197 struct nfs_inode *nfsi = NFS_I(inode); 211 struct nfs_inode *nfsi = NFS_I(inode);
198 struct nfs_delegation *delegation; 212 struct nfs_delegation *delegation, *old_delegation;
199 struct nfs_delegation *freeme = NULL; 213 struct nfs_delegation *freeme = NULL;
200 int status = 0; 214 int status = 0;
201 215
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
213 spin_lock_init(&delegation->lock); 227 spin_lock_init(&delegation->lock);
214 228
215 spin_lock(&clp->cl_lock); 229 spin_lock(&clp->cl_lock);
216 if (rcu_dereference(nfsi->delegation) != NULL) { 230 old_delegation = rcu_dereference_protected(nfsi->delegation,
217 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, 231 lockdep_is_held(&clp->cl_lock));
218 sizeof(delegation->stateid)) == 0 && 232 if (old_delegation != NULL) {
219 delegation->type == nfsi->delegation->type) { 233 if (memcmp(&delegation->stateid, &old_delegation->stateid,
234 sizeof(old_delegation->stateid)) == 0 &&
235 delegation->type == old_delegation->type) {
220 goto out; 236 goto out;
221 } 237 }
222 /* 238 /*
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
226 dfprintk(FILE, "%s: server %s handed out " 242 dfprintk(FILE, "%s: server %s handed out "
227 "a duplicate delegation!\n", 243 "a duplicate delegation!\n",
228 __func__, clp->cl_hostname); 244 __func__, clp->cl_hostname);
229 if (delegation->type <= nfsi->delegation->type) { 245 if (delegation->type <= old_delegation->type) {
230 freeme = delegation; 246 freeme = delegation;
231 delegation = NULL; 247 delegation = NULL;
232 goto out; 248 goto out;
233 } 249 }
234 freeme = nfs_detach_delegation_locked(nfsi, NULL); 250 freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
235 } 251 }
236 list_add_rcu(&delegation->super_list, &clp->cl_delegations); 252 list_add_rcu(&delegation->super_list, &clp->cl_delegations);
237 nfsi->delegation_state = delegation->type; 253 nfsi->delegation_state = delegation->type;
@@ -301,7 +317,7 @@ restart:
301 if (inode == NULL) 317 if (inode == NULL)
302 continue; 318 continue;
303 spin_lock(&clp->cl_lock); 319 spin_lock(&clp->cl_lock);
304 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 320 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
305 spin_unlock(&clp->cl_lock); 321 spin_unlock(&clp->cl_lock);
306 rcu_read_unlock(); 322 rcu_read_unlock();
307 if (delegation != NULL) { 323 if (delegation != NULL) {
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
330 struct nfs_inode *nfsi = NFS_I(inode); 346 struct nfs_inode *nfsi = NFS_I(inode);
331 struct nfs_delegation *delegation; 347 struct nfs_delegation *delegation;
332 348
333 if (rcu_dereference(nfsi->delegation) != NULL) { 349 if (rcu_access_pointer(nfsi->delegation) != NULL) {
334 spin_lock(&clp->cl_lock); 350 spin_lock(&clp->cl_lock);
335 delegation = nfs_detach_delegation_locked(nfsi, NULL); 351 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
336 spin_unlock(&clp->cl_lock); 352 spin_unlock(&clp->cl_lock);
337 if (delegation != NULL) 353 if (delegation != NULL)
338 nfs_do_return_delegation(inode, delegation, 0); 354 nfs_do_return_delegation(inode, delegation, 0);
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
346 struct nfs_delegation *delegation; 362 struct nfs_delegation *delegation;
347 int err = 0; 363 int err = 0;
348 364
349 if (rcu_dereference(nfsi->delegation) != NULL) { 365 if (rcu_access_pointer(nfsi->delegation) != NULL) {
350 spin_lock(&clp->cl_lock); 366 spin_lock(&clp->cl_lock);
351 delegation = nfs_detach_delegation_locked(nfsi, NULL); 367 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
352 spin_unlock(&clp->cl_lock); 368 spin_unlock(&clp->cl_lock);
353 if (delegation != NULL) { 369 if (delegation != NULL) {
354 nfs_msync_inode(inode); 370 nfs_msync_inode(inode);
@@ -526,7 +542,7 @@ restart:
526 if (inode == NULL) 542 if (inode == NULL)
527 continue; 543 continue;
528 spin_lock(&clp->cl_lock); 544 spin_lock(&clp->cl_lock);
529 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 545 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
530 spin_unlock(&clp->cl_lock); 546 spin_unlock(&clp->cl_lock);
531 rcu_read_unlock(); 547 rcu_read_unlock();
532 if (delegation != NULL) 548 if (delegation != NULL)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index be46f26c9a56..a7bb5c694aa3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -837,6 +837,8 @@ out_zap_parent:
837 /* If we have submounts, don't unhash ! */ 837 /* If we have submounts, don't unhash ! */
838 if (have_submounts(dentry)) 838 if (have_submounts(dentry))
839 goto out_valid; 839 goto out_valid;
840 if (dentry->d_flags & DCACHE_DISCONNECTED)
841 goto out_valid;
840 shrink_dcache_parent(dentry); 842 shrink_dcache_parent(dentry);
841 } 843 }
842 d_drop(dentry); 844 d_drop(dentry);
@@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1050 struct inode *dir; 1052 struct inode *dir;
1051 int openflags, ret = 0; 1053 int openflags, ret = 0;
1052 1054
1053 if (!is_atomic_open(nd)) 1055 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1054 goto no_open; 1056 goto no_open;
1055 parent = dget_parent(dentry); 1057 parent = dget_parent(dentry);
1056 dir = parent->d_inode; 1058 dir = parent->d_inode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 638067007c65..071fcedd517c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5218,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5218 msg.rpc_resp = &calldata->res; 5218 msg.rpc_resp = &calldata->res;
5219 task_setup_data.callback_data = calldata; 5219 task_setup_data.callback_data = calldata;
5220 task = rpc_run_task(&task_setup_data); 5220 task = rpc_run_task(&task_setup_data);
5221 if (IS_ERR(task)) 5221 if (IS_ERR(task)) {
5222 status = PTR_ERR(task); 5222 status = PTR_ERR(task);
5223 goto out;
5224 }
5223 rpc_put_task(task); 5225 rpc_put_task(task);
5226 return 0;
5224out: 5227out:
5225 dprintk("<-- %s status=%d\n", __func__, status); 5228 dprintk("<-- %s status=%d\n", __func__, status);
5226 return status; 5229 return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e01637240eeb..b4148fc00f9f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2187 if (data->version == 4) { 2187 if (data->version == 4) {
2188 error = nfs4_try_mount(flags, dev_name, data, mnt); 2188 error = nfs4_try_mount(flags, dev_name, data, mnt);
2189 kfree(data->client_address); 2189 kfree(data->client_address);
2190 kfree(data->nfs_server.export_path);
2190 goto out; 2191 goto out;
2191 } 2192 }
2192#endif /* CONFIG_NFS_V4 */ 2193#endif /* CONFIG_NFS_V4 */
@@ -2657,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2657 devname = nfs_path(path->mnt->mnt_devname, 2658 devname = nfs_path(path->mnt->mnt_devname,
2658 path->mnt->mnt_root, path->dentry, 2659 path->mnt->mnt_root, path->dentry,
2659 page, PAGE_SIZE); 2660 page, PAGE_SIZE);
2660 if (devname == NULL) 2661 if (IS_ERR(devname))
2661 goto out_freepage; 2662 goto out_freepage;
2662 tmp = kstrdup(devname, GFP_KERNEL); 2663 tmp = kstrdup(devname, GFP_KERNEL);
2663 if (tmp == NULL) 2664 if (tmp == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de38d63aa920..3aea3ca98ab7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1201 1201
1202 1202
1203#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1203#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1204static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1205{
1206 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1207 return 1;
1208 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
1209 NFS_INO_COMMIT, nfs_wait_bit_killable,
1210 TASK_KILLABLE))
1211 return 1;
1212 return 0;
1213}
1214
1215static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1216{
1217 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1218 smp_mb__after_clear_bit();
1219 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1220}
1221
1222
1204static void nfs_commitdata_release(void *data) 1223static void nfs_commitdata_release(void *data)
1205{ 1224{
1206 struct nfs_write_data *wdata = data; 1225 struct nfs_write_data *wdata = data;
@@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1262 task = rpc_run_task(&task_setup_data); 1281 task = rpc_run_task(&task_setup_data);
1263 if (IS_ERR(task)) 1282 if (IS_ERR(task))
1264 return PTR_ERR(task); 1283 return PTR_ERR(task);
1265 if (how & FLUSH_SYNC)
1266 rpc_wait_for_completion_task(task);
1267 rpc_put_task(task); 1284 rpc_put_task(task);
1268 return 0; 1285 return 0;
1269} 1286}
@@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1294 BDI_RECLAIMABLE); 1311 BDI_RECLAIMABLE);
1295 nfs_clear_page_tag_locked(req); 1312 nfs_clear_page_tag_locked(req);
1296 } 1313 }
1314 nfs_commit_clear_lock(NFS_I(inode));
1297 return -ENOMEM; 1315 return -ENOMEM;
1298} 1316}
1299 1317
@@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
1349 next: 1367 next:
1350 nfs_clear_page_tag_locked(req); 1368 nfs_clear_page_tag_locked(req);
1351 } 1369 }
1370 nfs_commit_clear_lock(NFS_I(data->inode));
1352 nfs_commitdata_release(calldata); 1371 nfs_commitdata_release(calldata);
1353} 1372}
1354 1373
@@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
1363static int nfs_commit_inode(struct inode *inode, int how) 1382static int nfs_commit_inode(struct inode *inode, int how)
1364{ 1383{
1365 LIST_HEAD(head); 1384 LIST_HEAD(head);
1366 int res; 1385 int may_wait = how & FLUSH_SYNC;
1386 int res = 0;
1367 1387
1388 if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
1389 goto out;
1368 spin_lock(&inode->i_lock); 1390 spin_lock(&inode->i_lock);
1369 res = nfs_scan_commit(inode, &head, 0, 0); 1391 res = nfs_scan_commit(inode, &head, 0, 0);
1370 spin_unlock(&inode->i_lock); 1392 spin_unlock(&inode->i_lock);
@@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
1372 int error = nfs_commit_list(inode, &head, how); 1394 int error = nfs_commit_list(inode, &head, how);
1373 if (error < 0) 1395 if (error < 0)
1374 return error; 1396 return error;
1375 } 1397 if (may_wait)
1398 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1399 nfs_wait_bit_killable,
1400 TASK_KILLABLE);
1401 } else
1402 nfs_commit_clear_lock(NFS_I(inode));
1403out:
1376 return res; 1404 return res;
1377} 1405}
1378 1406
@@ -1444,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1444 1472
1445 BUG_ON(!PageLocked(page)); 1473 BUG_ON(!PageLocked(page));
1446 for (;;) { 1474 for (;;) {
1475 wait_on_page_writeback(page);
1447 req = nfs_page_find_request(page); 1476 req = nfs_page_find_request(page);
1448 if (req == NULL) 1477 if (req == NULL)
1449 break; 1478 break;
@@ -1478,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page)
1478 .range_start = range_start, 1507 .range_start = range_start,
1479 .range_end = range_end, 1508 .range_end = range_end,
1480 }; 1509 };
1481 struct nfs_page *req;
1482 int need_commit;
1483 int ret; 1510 int ret;
1484 1511
1485 while(PagePrivate(page)) { 1512 while(PagePrivate(page)) {
1513 wait_on_page_writeback(page);
1486 if (clear_page_dirty_for_io(page)) { 1514 if (clear_page_dirty_for_io(page)) {
1487 ret = nfs_writepage_locked(page, &wbc); 1515 ret = nfs_writepage_locked(page, &wbc);
1488 if (ret < 0) 1516 if (ret < 0)
1489 goto out_error; 1517 goto out_error;
1490 } 1518 }
1491 req = nfs_find_and_lock_request(page); 1519 ret = sync_inode(inode, &wbc);
1492 if (!req) 1520 if (ret < 0)
1493 break;
1494 if (IS_ERR(req)) {
1495 ret = PTR_ERR(req);
1496 goto out_error; 1521 goto out_error;
1497 }
1498 need_commit = test_bit(PG_CLEAN, &req->wb_flags);
1499 nfs_clear_page_tag_locked(req);
1500 if (need_commit) {
1501 ret = nfs_commit_inode(inode, FLUSH_SYNC);
1502 if (ret < 0)
1503 goto out_error;
1504 }
1505 } 1522 }
1506 return 0; 1523 return 0;
1507out_error: 1524out_error:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e1703175ee28..34ccf815ea8a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
161 argp->p = page_address(argp->pagelist[0]); 161 argp->p = page_address(argp->pagelist[0]);
162 argp->pagelist++; 162 argp->pagelist++;
163 if (argp->pagelen < PAGE_SIZE) { 163 if (argp->pagelen < PAGE_SIZE) {
164 argp->end = p + (argp->pagelen>>2); 164 argp->end = argp->p + (argp->pagelen>>2);
165 argp->pagelen = 0; 165 argp->pagelen = 0;
166 } else { 166 } else {
167 argp->end = p + (PAGE_SIZE>>2); 167 argp->end = argp->p + (PAGE_SIZE>>2);
168 argp->pagelen -= PAGE_SIZE; 168 argp->pagelen -= PAGE_SIZE;
169 } 169 }
170 memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); 170 memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
@@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1426 argp->p = page_address(argp->pagelist[0]); 1426 argp->p = page_address(argp->pagelist[0]);
1427 argp->pagelist++; 1427 argp->pagelist++;
1428 if (argp->pagelen < PAGE_SIZE) { 1428 if (argp->pagelen < PAGE_SIZE) {
1429 argp->end = p + (argp->pagelen>>2); 1429 argp->end = argp->p + (argp->pagelen>>2);
1430 argp->pagelen = 0; 1430 argp->pagelen = 0;
1431 } else { 1431 } else {
1432 argp->end = p + (PAGE_SIZE>>2); 1432 argp->end = argp->p + (PAGE_SIZE>>2);
1433 argp->pagelen -= PAGE_SIZE; 1433 argp->pagelen -= PAGE_SIZE;
1434 } 1434 }
1435 } 1435 }
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e7655a..48145f505a6a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
749 sb->s_export_op = &nilfs_export_ops; 749 sb->s_export_op = &nilfs_export_ops;
750 sb->s_root = NULL; 750 sb->s_root = NULL;
751 sb->s_time_gran = 1; 751 sb->s_time_gran = 1;
752 sb->s_bdi = nilfs->ns_bdi;
752 753
753 err = load_nilfs(nilfs, sbi); 754 err = load_nilfs(nilfs, sbi);
754 if (err) 755 if (err)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbffe729..b3a159b21cfd 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
15 15
16config INOTIFY_USER 16config INOTIFY_USER
17 bool "Inotify support for userspace" 17 bool "Inotify support for userspace"
18 select ANON_INODES
18 select FSNOTIFY 19 select FSNOTIFY
19 default y 20 default y
20 ---help--- 21 ---help---
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index ecebb2276790..f9d5d3ffc75a 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
406 struct buffer_head *bh) 406 struct buffer_head *bh)
407{ 407{
408 int ret = 0; 408 int ret = 0;
409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
409 410
410 mlog_entry_void(); 411 mlog_entry_void();
411 412
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
425 426
426 get_bh(bh); /* for end_buffer_write_sync() */ 427 get_bh(bh); /* for end_buffer_write_sync() */
427 bh->b_end_io = end_buffer_write_sync; 428 bh->b_end_io = end_buffer_write_sync;
429 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
428 submit_bh(WRITE, bh); 430 submit_bh(WRITE, bh);
429 431
430 wait_on_buffer(bh); 432 wait_on_buffer(bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index a795eb91f4ea..12d5eb78a11a 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
184 BUG_ON(!lksb); 184 BUG_ON(!lksb);
185 185
186 /* only updates if this node masters the lockres */ 186 /* only updates if this node masters the lockres */
187 spin_lock(&res->spinlock);
187 if (res->owner == dlm->node_num) { 188 if (res->owner == dlm->node_num) {
188
189 spin_lock(&res->spinlock);
190 /* check the lksb flags for the direction */ 189 /* check the lksb flags for the direction */
191 if (lksb->flags & DLM_LKSB_GET_LVB) { 190 if (lksb->flags & DLM_LKSB_GET_LVB) {
192 mlog(0, "getting lvb from lockres for %s node\n", 191 mlog(0, "getting lvb from lockres for %s node\n",
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
201 * here. In the future we might want to clear it at the time 200 * here. In the future we might want to clear it at the time
202 * the put is actually done. 201 * the put is actually done.
203 */ 202 */
204 spin_unlock(&res->spinlock);
205 } 203 }
204 spin_unlock(&res->spinlock);
206 205
207 /* reset any lvb flags on the lksb */ 206 /* reset any lvb flags on the lksb */
208 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); 207 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de157a08c..b83d6107a1f5 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
112 * O_RDONLY -> PRMODE level 112 * O_RDONLY -> PRMODE level
113 * O_WRONLY -> EXMODE level 113 * O_WRONLY -> EXMODE level
114 * 114 *
115 * O_NONBLOCK -> LKM_NOQUEUE 115 * O_NONBLOCK -> NOQUEUE
116 */ 116 */
117static int dlmfs_decode_open_flags(int open_flags, 117static int dlmfs_decode_open_flags(int open_flags,
118 int *level, 118 int *level,
119 int *flags) 119 int *flags)
120{ 120{
121 if (open_flags & (O_WRONLY|O_RDWR)) 121 if (open_flags & (O_WRONLY|O_RDWR))
122 *level = LKM_EXMODE; 122 *level = DLM_LOCK_EX;
123 else 123 else
124 *level = LKM_PRMODE; 124 *level = DLM_LOCK_PR;
125 125
126 *flags = 0; 126 *flags = 0;
127 if (open_flags & O_NONBLOCK) 127 if (open_flags & O_NONBLOCK)
128 *flags |= LKM_NOQUEUE; 128 *flags |= DLM_LKF_NOQUEUE;
129 129
130 return 0; 130 return 0;
131} 131}
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
166 * to be able userspace to be able to distinguish a 166 * to be able userspace to be able to distinguish a
167 * valid lock request from one that simply couldn't be 167 * valid lock request from one that simply couldn't be
168 * granted. */ 168 * granted. */
169 if (flags & LKM_NOQUEUE && status == -EAGAIN) 169 if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
170 status = -ETXTBSY; 170 status = -ETXTBSY;
171 kfree(fp); 171 kfree(fp);
172 goto bail; 172 goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
193 status = 0; 193 status = 0;
194 if (fp) { 194 if (fp) {
195 level = fp->fp_lock_level; 195 level = fp->fp_lock_level;
196 if (level != LKM_IVMODE) 196 if (level != DLM_LOCK_IV)
197 user_dlm_cluster_unlock(&ip->ip_lockres, level); 197 user_dlm_cluster_unlock(&ip->ip_lockres, level);
198 198
199 kfree(fp); 199 kfree(fp);
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
262 if ((count + *ppos) > i_size_read(inode)) 262 if ((count + *ppos) > i_size_read(inode))
263 readlen = i_size_read(inode) - *ppos; 263 readlen = i_size_read(inode) - *ppos;
264 else 264 else
265 readlen = count - *ppos; 265 readlen = count;
266 266
267 lvb_buf = kmalloc(readlen, GFP_NOFS); 267 lvb_buf = kmalloc(readlen, GFP_NOFS);
268 if (!lvb_buf) 268 if (!lvb_buf)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc8341e..a5fbd9cea968 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -684,6 +684,7 @@ restarted_transaction:
684 if (why == RESTART_META) { 684 if (why == RESTART_META) {
685 mlog(0, "restarting function.\n"); 685 mlog(0, "restarting function.\n");
686 restart_func = 1; 686 restart_func = 1;
687 status = 0;
687 } else { 688 } else {
688 BUG_ON(why != RESTART_TRANS); 689 BUG_ON(why != RESTART_TRANS);
689 690
@@ -1981,18 +1982,18 @@ relock:
1981 /* communicate with ocfs2_dio_end_io */ 1982 /* communicate with ocfs2_dio_end_io */
1982 ocfs2_iocb_set_rw_locked(iocb, rw_level); 1983 ocfs2_iocb_set_rw_locked(iocb, rw_level);
1983 1984
1984 if (direct_io) { 1985 ret = generic_segment_checks(iov, &nr_segs, &ocount,
1985 ret = generic_segment_checks(iov, &nr_segs, &ocount, 1986 VERIFY_READ);
1986 VERIFY_READ); 1987 if (ret)
1987 if (ret) 1988 goto out_dio;
1988 goto out_dio;
1989 1989
1990 count = ocount; 1990 count = ocount;
1991 ret = generic_write_checks(file, ppos, &count, 1991 ret = generic_write_checks(file, ppos, &count,
1992 S_ISBLK(inode->i_mode)); 1992 S_ISBLK(inode->i_mode));
1993 if (ret) 1993 if (ret)
1994 goto out_dio; 1994 goto out_dio;
1995 1995
1996 if (direct_io) {
1996 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 1997 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
1997 ppos, count, ocount); 1998 ppos, count, ocount);
1998 if (written < 0) { 1999 if (written < 0) {
@@ -2007,7 +2008,10 @@ relock:
2007 goto out_dio; 2008 goto out_dio;
2008 } 2009 }
2009 } else { 2010 } else {
2010 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); 2011 current->backing_dev_info = file->f_mapping->backing_dev_info;
2012 written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
2013 ppos, count, 0);
2014 current->backing_dev_info = NULL;
2011 } 2015 }
2012 2016
2013out_dio: 2017out_dio:
@@ -2021,9 +2025,9 @@ out_dio:
2021 if (ret < 0) 2025 if (ret < 0)
2022 written = ret; 2026 written = ret;
2023 2027
2024 if (!ret && (old_size != i_size_read(inode) || 2028 if (!ret && ((old_size != i_size_read(inode)) ||
2025 old_clusters != OCFS2_I(inode)->ip_clusters || 2029 (old_clusters != OCFS2_I(inode)->ip_clusters) ||
2026 has_refcount)) { 2030 has_refcount)) {
2027 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2031 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2028 if (ret < 0) 2032 if (ret < 0)
2029 written = ret; 2033 written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb68b6d..af189887201c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -558,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
558 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 558 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
559 if (IS_ERR(handle)) { 559 if (IS_ERR(handle)) {
560 status = PTR_ERR(handle); 560 status = PTR_ERR(handle);
561 handle = NULL;
561 mlog_errno(status); 562 mlog_errno(status);
562 goto out; 563 goto out;
563 } 564 }
@@ -639,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode,
639 goto bail_unlock; 640 goto bail_unlock;
640 } 641 }
641 642
642 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, 643 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
643 orphan_dir_bh); 644 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
644 if (status < 0) { 645 orphan_dir_bh);
645 mlog_errno(status); 646 if (status < 0) {
646 goto bail_commit; 647 mlog_errno(status);
648 goto bail_commit;
649 }
647 } 650 }
648 651
649 /* set the inodes dtime */ 652 /* set the inodes dtime */
@@ -722,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
722static int ocfs2_wipe_inode(struct inode *inode, 725static int ocfs2_wipe_inode(struct inode *inode,
723 struct buffer_head *di_bh) 726 struct buffer_head *di_bh)
724{ 727{
725 int status, orphaned_slot; 728 int status, orphaned_slot = -1;
726 struct inode *orphan_dir_inode = NULL; 729 struct inode *orphan_dir_inode = NULL;
727 struct buffer_head *orphan_dir_bh = NULL; 730 struct buffer_head *orphan_dir_bh = NULL;
728 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 731 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
729 struct ocfs2_dinode *di; 732 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
730 733
731 di = (struct ocfs2_dinode *) di_bh->b_data; 734 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
732 orphaned_slot = le16_to_cpu(di->i_orphaned_slot); 735 orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
733 736
734 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 737 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
735 if (status) 738 if (status)
736 return status; 739 return status;
737 740
738 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 741 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
739 ORPHAN_DIR_SYSTEM_INODE, 742 ORPHAN_DIR_SYSTEM_INODE,
740 orphaned_slot); 743 orphaned_slot);
741 if (!orphan_dir_inode) { 744 if (!orphan_dir_inode) {
742 status = -EEXIST; 745 status = -EEXIST;
743 mlog_errno(status); 746 mlog_errno(status);
744 goto bail; 747 goto bail;
745 } 748 }
746 749
747 /* Lock the orphan dir. The lock will be held for the entire 750 /* Lock the orphan dir. The lock will be held for the entire
748 * delete_inode operation. We do this now to avoid races with 751 * delete_inode operation. We do this now to avoid races with
749 * recovery completion on other nodes. */ 752 * recovery completion on other nodes. */
750 mutex_lock(&orphan_dir_inode->i_mutex); 753 mutex_lock(&orphan_dir_inode->i_mutex);
751 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); 754 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
752 if (status < 0) { 755 if (status < 0) {
753 mutex_unlock(&orphan_dir_inode->i_mutex); 756 mutex_unlock(&orphan_dir_inode->i_mutex);
754 757
755 mlog_errno(status); 758 mlog_errno(status);
756 goto bail; 759 goto bail;
760 }
757 } 761 }
758 762
759 /* we do this while holding the orphan dir lock because we 763 /* we do this while holding the orphan dir lock because we
@@ -794,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
794 mlog_errno(status); 798 mlog_errno(status);
795 799
796bail_unlock_dir: 800bail_unlock_dir:
801 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
802 return status;
803
797 ocfs2_inode_unlock(orphan_dir_inode, 1); 804 ocfs2_inode_unlock(orphan_dir_inode, 1);
798 mutex_unlock(&orphan_dir_inode->i_mutex); 805 mutex_unlock(&orphan_dir_inode->i_mutex);
799 brelse(orphan_dir_bh); 806 brelse(orphan_dir_bh);
@@ -889,7 +896,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
889 896
890 /* Do some basic inode verification... */ 897 /* Do some basic inode verification... */
891 di = (struct ocfs2_dinode *) di_bh->b_data; 898 di = (struct ocfs2_dinode *) di_bh->b_data;
892 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { 899 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
900 !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
893 /* 901 /*
894 * Inodes in the orphan dir must have ORPHANED_FL. The only 902 * Inodes in the orphan dir must have ORPHANED_FL. The only
895 * inodes that come back out of the orphan dir are reflink 903 * inodes that come back out of the orphan dir are reflink
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07b293c..0b28e1921a39 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -100,6 +100,8 @@ struct ocfs2_inode_info
100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
101/* Does someone have the file open O_DIRECT */ 101/* Does someone have the file open O_DIRECT */
102#define OCFS2_INODE_OPEN_DIRECT 0x00000040 102#define OCFS2_INODE_OPEN_DIRECT 0x00000040
103/* Tell the inode wipe code it's not in orphan dir */
104#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080
103 105
104static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) 106static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
105{ 107{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50ae4097..4cbb18f26c5f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir,
408 } 408 }
409 } 409 }
410 410
411 status = ocfs2_add_entry(handle, dentry, inode, 411 /*
412 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 412 * Do this before adding the entry to the directory. We add
413 &lookup); 413 * also set d_op after success so that ->d_iput() will cleanup
414 if (status < 0) { 414 * the dentry lock even if ocfs2_add_entry() fails below.
415 */
416 status = ocfs2_dentry_attach_lock(dentry, inode,
417 OCFS2_I(dir)->ip_blkno);
418 if (status) {
415 mlog_errno(status); 419 mlog_errno(status);
416 goto leave; 420 goto leave;
417 } 421 }
422 dentry->d_op = &ocfs2_dentry_ops;
418 423
419 status = ocfs2_dentry_attach_lock(dentry, inode, 424 status = ocfs2_add_entry(handle, dentry, inode,
420 OCFS2_I(dir)->ip_blkno); 425 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
421 if (status) { 426 &lookup);
427 if (status < 0) {
422 mlog_errno(status); 428 mlog_errno(status);
423 goto leave; 429 goto leave;
424 } 430 }
425 431
426 insert_inode_hash(inode); 432 insert_inode_hash(inode);
427 dentry->d_op = &ocfs2_dentry_ops;
428 d_instantiate(dentry, inode); 433 d_instantiate(dentry, inode);
429 status = 0; 434 status = 0;
430leave: 435leave:
@@ -445,11 +450,6 @@ leave:
445 450
446 ocfs2_free_dir_lookup_result(&lookup); 451 ocfs2_free_dir_lookup_result(&lookup);
447 452
448 if ((status < 0) && inode) {
449 clear_nlink(inode);
450 iput(inode);
451 }
452
453 if (inode_ac) 453 if (inode_ac)
454 ocfs2_free_alloc_context(inode_ac); 454 ocfs2_free_alloc_context(inode_ac);
455 455
@@ -459,6 +459,17 @@ leave:
459 if (meta_ac) 459 if (meta_ac)
460 ocfs2_free_alloc_context(meta_ac); 460 ocfs2_free_alloc_context(meta_ac);
461 461
462 /*
463 * We should call iput after the i_mutex of the bitmap been
464 * unlocked in ocfs2_free_alloc_context, or the
465 * ocfs2_delete_inode will mutex_lock again.
466 */
467 if ((status < 0) && inode) {
468 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
469 clear_nlink(inode);
470 iput(inode);
471 }
472
462 mlog_exit(status); 473 mlog_exit(status);
463 474
464 return status; 475 return status;
@@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir,
1771 } 1782 }
1772 } 1783 }
1773 1784
1774 status = ocfs2_add_entry(handle, dentry, inode, 1785 /*
1775 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1786 * Do this before adding the entry to the directory. We add
1776 &lookup); 1787 * also set d_op after success so that ->d_iput() will cleanup
1777 if (status < 0) { 1788 * the dentry lock even if ocfs2_add_entry() fails below.
1789 */
1790 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1791 if (status) {
1778 mlog_errno(status); 1792 mlog_errno(status);
1779 goto bail; 1793 goto bail;
1780 } 1794 }
1795 dentry->d_op = &ocfs2_dentry_ops;
1781 1796
1782 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); 1797 status = ocfs2_add_entry(handle, dentry, inode,
1783 if (status) { 1798 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1799 &lookup);
1800 if (status < 0) {
1784 mlog_errno(status); 1801 mlog_errno(status);
1785 goto bail; 1802 goto bail;
1786 } 1803 }
1787 1804
1788 insert_inode_hash(inode); 1805 insert_inode_hash(inode);
1789 dentry->d_op = &ocfs2_dentry_ops;
1790 d_instantiate(dentry, inode); 1806 d_instantiate(dentry, inode);
1791bail: 1807bail:
1792 if (status < 0 && did_quota) 1808 if (status < 0 && did_quota)
@@ -1811,6 +1827,7 @@ bail:
1811 if (xattr_ac) 1827 if (xattr_ac)
1812 ocfs2_free_alloc_context(xattr_ac); 1828 ocfs2_free_alloc_context(xattr_ac);
1813 if ((status < 0) && inode) { 1829 if ((status < 0) && inode) {
1830 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
1814 clear_nlink(inode); 1831 clear_nlink(inode);
1815 iput(inode); 1832 iput(inode);
1816 } 1833 }
@@ -1976,6 +1993,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1976 } 1993 }
1977 1994
1978 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); 1995 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
1996 OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
1979 1997
1980 /* Record which orphan dir our inode now resides 1998 /* Record which orphan dir our inode now resides
1981 * in. delete_inode will use this to determine which orphan 1999 * in. delete_inode will use this to determine which orphan
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bd96f6c7877e..5cbcd0f008fc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
4083 di->i_attr = s_di->i_attr; 4083 di->i_attr = s_di->i_attr;
4084 4084
4085 if (preserve) { 4085 if (preserve) {
4086 t_inode->i_uid = s_inode->i_uid;
4087 t_inode->i_gid = s_inode->i_gid;
4088 t_inode->i_mode = s_inode->i_mode;
4086 di->i_uid = s_di->i_uid; 4089 di->i_uid = s_di->i_uid;
4087 di->i_gid = s_di->i_gid; 4090 di->i_gid = s_di->i_gid;
4088 di->i_mode = s_di->i_mode; 4091 di->i_mode = s_di->i_mode;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e5..885ab5513ac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
81#include <linux/pid_namespace.h> 81#include <linux/pid_namespace.h>
82#include <linux/ptrace.h> 82#include <linux/ptrace.h>
83#include <linux/tracehook.h> 83#include <linux/tracehook.h>
84#include <linux/swapops.h>
85 84
86#include <asm/pgtable.h> 85#include <asm/pgtable.h>
87#include <asm/processor.h> 86#include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
495 rsslim, 494 rsslim,
496 mm ? mm->start_code : 0, 495 mm ? mm->start_code : 0,
497 mm ? mm->end_code : 0, 496 mm ? mm->end_code : 0,
498 (permitted && mm) ? task->stack_start : 0, 497 (permitted && mm) ? mm->start_stack : 0,
499 esp, 498 esp,
500 eip, 499 eip,
501 /* The signal information here is obsolete. 500 /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7621db800a74..8418fcc0a6ab 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2909,7 +2909,7 @@ out_no_task:
2909 */ 2909 */
2910static const struct pid_entry tid_base_stuff[] = { 2910static const struct pid_entry tid_base_stuff[] = {
2911 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2911 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2912 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), 2912 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2913 REG("environ", S_IRUSR, proc_environ_operations), 2913 REG("environ", S_IRUSR, proc_environ_operations),
2914 INF("auxv", S_IRUSR, proc_pid_auxv), 2914 INF("auxv", S_IRUSR, proc_pid_auxv),
2915 ONE("status", S_IRUGO, proc_pid_status), 2915 ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd5..47f5b145f56e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
247 } else if (vma->vm_start <= mm->start_stack && 247 } else if (vma->vm_start <= mm->start_stack &&
248 vma->vm_end >= mm->start_stack) { 248 vma->vm_end >= mm->start_stack) {
249 name = "[stack]"; 249 name = "[stack]";
250 } else {
251 unsigned long stack_start;
252 struct proc_maps_private *pmp;
253
254 pmp = m->private;
255 stack_start = pmp->task->stack_start;
256
257 if (vma->vm_start <= stack_start &&
258 vma->vm_end >= stack_start) {
259 pad_len_spaces(m, len);
260 seq_printf(m,
261 "[threadstack:%08lx]",
262#ifdef CONFIG_STACK_GROWSUP
263 vma->vm_end - stack_start
264#else
265 stack_start - vma->vm_start
266#endif
267 );
268 }
269 } 250 }
270 } else { 251 } else {
271 name = "[vdso]"; 252 name = "[vdso]";
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index f8a6075abf50..07930449a958 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir,
46 struct reiserfs_de_head *deh) 46 struct reiserfs_de_head *deh)
47{ 47{
48 struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; 48 struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
49 if (reiserfs_expose_privroot(dir->d_sb))
50 return 0;
51 return (dir == dir->d_parent && privroot->d_inode && 49 return (dir == dir->d_parent && privroot->d_inode &&
52 deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); 50 deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
53} 51}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4f9586bb7631..e7cc00e636dc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
554 if (!err && new_size < i_size_read(dentry->d_inode)) { 554 if (!err && new_size < i_size_read(dentry->d_inode)) {
555 struct iattr newattrs = { 555 struct iattr newattrs = {
556 .ia_ctime = current_fs_time(inode->i_sb), 556 .ia_ctime = current_fs_time(inode->i_sb),
557 .ia_size = buffer_size, 557 .ia_size = new_size,
558 .ia_valid = ATTR_SIZE | ATTR_CTIME, 558 .ia_valid = ATTR_SIZE | ATTR_CTIME,
559 }; 559 };
560 560
@@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask)
973 return generic_permission(inode, mask, NULL); 973 return generic_permission(inode, mask, NULL);
974} 974}
975 975
976/* This will catch lookups from the fs root to .reiserfs_priv */ 976static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
977static int
978xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
979{ 977{
980 struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; 978 return -EPERM;
981 if (container_of(q1, struct dentry, d_name) == priv_root)
982 return -ENOENT;
983 if (q1->len == name->len &&
984 !memcmp(q1->name, name->name, name->len))
985 return 0;
986 return 1;
987} 979}
988 980
989static const struct dentry_operations xattr_lookup_poison_ops = { 981static const struct dentry_operations xattr_lookup_poison_ops = {
990 .d_compare = xattr_lookup_poison, 982 .d_revalidate = xattr_hide_revalidate,
991}; 983};
992 984
993int reiserfs_lookup_privroot(struct super_block *s) 985int reiserfs_lookup_privroot(struct super_block *s)
@@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
1001 strlen(PRIVROOT_NAME)); 993 strlen(PRIVROOT_NAME));
1002 if (!IS_ERR(dentry)) { 994 if (!IS_ERR(dentry)) {
1003 REISERFS_SB(s)->priv_root = dentry; 995 REISERFS_SB(s)->priv_root = dentry;
1004 if (!reiserfs_expose_privroot(s)) 996 dentry->d_op = &xattr_lookup_poison_ops;
1005 s->s_root->d_op = &xattr_lookup_poison_ops;
1006 if (dentry->d_inode) 997 if (dentry->d_inode)
1007 dentry->d_inode->i_flags |= S_PRIVATE; 998 dentry->d_inode->i_flags |= S_PRIVATE;
1008 } else 999 } else
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1c4c8f089970..dfa1d67f8fca 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb)
479 if (server->conn_pid) 479 if (server->conn_pid)
480 kill_pid(server->conn_pid, SIGTERM, 1); 480 kill_pid(server->conn_pid, SIGTERM, 1);
481 481
482 bdi_destroy(&server->bdi);
482 kfree(server->ops); 483 kfree(server->ops);
483 smb_unload_nls(server); 484 smb_unload_nls(server);
484 sb->s_fs_info = NULL; 485 sb->s_fs_info = NULL;
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
525 if (!server) 526 if (!server)
526 goto out_no_server; 527 goto out_no_server;
527 sb->s_fs_info = server; 528 sb->s_fs_info = server;
529
530 if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
531 goto out_bdi;
532
533 sb->s_bdi = &server->bdi;
528 534
529 server->super_block = sb; 535 server->super_block = sb;
530 server->mnt = NULL; 536 server->mnt = NULL;
@@ -624,6 +630,8 @@ out_no_smbiod:
624out_bad_option: 630out_bad_option:
625 kfree(mem); 631 kfree(mem);
626out_no_mem: 632out_no_mem:
633 bdi_destroy(&server->bdi);
634out_bdi:
627 if (!server->mnt) 635 if (!server->mnt)
628 printk(KERN_ERR "smb_fill_super: allocation failure\n"); 636 printk(KERN_ERR "smb_fill_super: allocation failure\n");
629 sb->s_fs_info = NULL; 637 sb->s_fs_info = NULL;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 1cb0d81b164b..653c030eb840 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
87 u64 cur_index = index >> msblk->devblksize_log2; 87 u64 cur_index = index >> msblk->devblksize_log2;
88 int bytes, compressed, b = 0, k = 0, page = 0, avail; 88 int bytes, compressed, b = 0, k = 0, page = 0, avail;
89 89
90 90 bh = kcalloc(((srclength + msblk->devblksize - 1)
91 bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1, 91 >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
92 sizeof(*bh), GFP_KERNEL);
93 if (bh == NULL) 92 if (bh == NULL)
94 return -ENOMEM; 93 return -ENOMEM;
95 94
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3550aec2f655..48b6f4a385a6 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -275,7 +275,8 @@ allocate_root:
275 275
276 err = squashfs_read_inode(root, root_inode); 276 err = squashfs_read_inode(root, root_inode);
277 if (err) { 277 if (err) {
278 iget_failed(root); 278 make_bad_inode(root);
279 iput(root);
279 goto failed_mount; 280 goto failed_mount;
280 } 281 }
281 insert_inode_hash(root); 282 insert_inode_hash(root);
@@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb)
353 kfree(sbi->id_table); 354 kfree(sbi->id_table);
354 kfree(sbi->fragment_index); 355 kfree(sbi->fragment_index);
355 kfree(sbi->meta_index); 356 kfree(sbi->meta_index);
357 kfree(sbi->inode_lookup_table);
356 kfree(sb->s_fs_info); 358 kfree(sb->s_fs_info);
357 sb->s_fs_info = NULL; 359 sb->s_fs_info = NULL;
358 } 360 }
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 15a03d0fb9f3..7a603874e483 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
128 goto release_mutex; 128 goto release_mutex;
129 } 129 }
130 130
131 length = stream->total_out;
131 mutex_unlock(&msblk->read_data_mutex); 132 mutex_unlock(&msblk->read_data_mutex);
132 return stream->total_out; 133 return length;
133 134
134release_mutex: 135release_mutex:
135 mutex_unlock(&msblk->read_data_mutex); 136 mutex_unlock(&msblk->read_data_mutex);
diff --git a/fs/super.c b/fs/super.c
index f35ac6022109..1527e6a0ee35 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
37#include <linux/kobject.h> 37#include <linux/kobject.h>
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/backing-dev.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include "internal.h" 42#include "internal.h"
42 43
@@ -693,6 +694,7 @@ int set_anon_super(struct super_block *s, void *data)
693 return -EMFILE; 694 return -EMFILE;
694 } 695 }
695 s->s_dev = MKDEV(0, dev & MINORMASK); 696 s->s_dev = MKDEV(0, dev & MINORMASK);
697 s->s_bdi = &noop_backing_dev_info;
696 return 0; 698 return 0;
697} 699}
698 700
@@ -954,10 +956,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
954 if (error < 0) 956 if (error < 0)
955 goto out_free_secdata; 957 goto out_free_secdata;
956 BUG_ON(!mnt->mnt_sb); 958 BUG_ON(!mnt->mnt_sb);
959 WARN_ON(!mnt->mnt_sb->s_bdi);
957 960
958 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 961 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
959 if (error) 962 if (error)
960 goto out_sb; 963 goto out_sb;
961 964
962 /* 965 /*
963 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 966 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
diff --git a/fs/sync.c b/fs/sync.c
index fc5c3d75cf3c..92b228176f7c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,6 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
17#include <linux/backing-dev.h>
17#include "internal.h" 18#include "internal.h"
18 19
19#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ 20#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
32 * This should be safe, as we require bdi backing to actually 33 * This should be safe, as we require bdi backing to actually
33 * write out data in the first place 34 * write out data in the first place
34 */ 35 */
35 if (!sb->s_bdi) 36 if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
36 return 0; 37 return 0;
37 38
38 if (sb->s_qcop && sb->s_qcop->quota_sync) 39 if (sb->s_qcop && sb->s_qcop->quota_sync)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b487ced..29f1edca76de 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(
1209 1209
1210 xfs_unmountfs(mp); 1210 xfs_unmountfs(mp);
1211 xfs_freesb(mp); 1211 xfs_freesb(mp);
1212 xfs_inode_shrinker_unregister(mp);
1212 xfs_icsb_destroy_counters(mp); 1213 xfs_icsb_destroy_counters(mp);
1213 xfs_close_devices(mp); 1214 xfs_close_devices(mp);
1214 xfs_dmops_put(mp); 1215 xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
1622 if (error) 1623 if (error)
1623 goto fail_vnrele; 1624 goto fail_vnrele;
1624 1625
1626 xfs_inode_shrinker_register(mp);
1627
1625 kfree(mtpt); 1628 kfree(mtpt);
1626 return 0; 1629 return 0;
1627 1630
@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
1867 goto out_cleanup_procfs; 1870 goto out_cleanup_procfs;
1868 1871
1869 vfs_initquota(); 1872 vfs_initquota();
1873 xfs_inode_shrinker_init();
1870 1874
1871 error = register_filesystem(&xfs_fs_type); 1875 error = register_filesystem(&xfs_fs_type);
1872 if (error) 1876 if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
1894{ 1898{
1895 vfs_exitquota(); 1899 vfs_exitquota();
1896 unregister_filesystem(&xfs_fs_type); 1900 unregister_filesystem(&xfs_fs_type);
1901 xfs_inode_shrinker_destroy();
1897 xfs_sysctl_unregister(); 1902 xfs_sysctl_unregister();
1898 xfs_cleanup_procfs(); 1903 xfs_cleanup_procfs();
1899 xfs_buf_terminate(); 1904 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fd9698215759..a427c638d909 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
95 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
96 int flags, 96 int flags,
97 int tag, 97 int tag,
98 int exclusive) 98 int exclusive,
99 int *nr_to_scan)
99{ 100{
100 uint32_t first_index; 101 uint32_t first_index;
101 int last_error = 0; 102 int last_error = 0;
@@ -134,7 +135,7 @@ restart:
134 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
135 break; 136 break;
136 137
137 } while (1); 138 } while ((*nr_to_scan)--);
138 139
139 if (skipped) { 140 if (skipped) {
140 delay(1); 141 delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
150 struct xfs_perag *pag, int flags), 151 struct xfs_perag *pag, int flags),
151 int flags, 152 int flags,
152 int tag, 153 int tag,
153 int exclusive) 154 int exclusive,
155 int *nr_to_scan)
154{ 156{
155 int error = 0; 157 int error = 0;
156 int last_error = 0; 158 int last_error = 0;
157 xfs_agnumber_t ag; 159 xfs_agnumber_t ag;
160 int nr;
158 161
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
160 struct xfs_perag *pag; 164 struct xfs_perag *pag;
161 165
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
165 continue; 169 continue;
166 } 170 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive); 172 exclusive, &nr);
169 xfs_perag_put(pag); 173 xfs_perag_put(pag);
170 if (error) { 174 if (error) {
171 last_error = error; 175 last_error = error;
172 if (error == EFSCORRUPTED) 176 if (error == EFSCORRUPTED)
173 break; 177 break;
174 } 178 }
179 if (nr <= 0)
180 break;
175 } 181 }
182 if (nr_to_scan)
183 *nr_to_scan = nr;
176 return XFS_ERROR(last_error); 184 return XFS_ERROR(last_error);
177} 185}
178 186
@@ -291,7 +299,7 @@ xfs_sync_data(
291 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 299 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
292 300
293 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 301 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
294 XFS_ICI_NO_TAG, 0); 302 XFS_ICI_NO_TAG, 0, NULL);
295 if (error) 303 if (error)
296 return XFS_ERROR(error); 304 return XFS_ERROR(error);
297 305
@@ -310,7 +318,7 @@ xfs_sync_attr(
310 ASSERT((flags & ~SYNC_WAIT) == 0); 318 ASSERT((flags & ~SYNC_WAIT) == 0);
311 319
312 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 320 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
313 XFS_ICI_NO_TAG, 0); 321 XFS_ICI_NO_TAG, 0, NULL);
314} 322}
315 323
316STATIC int 324STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
673 radix_tree_tag_set(&pag->pag_ici_root, 681 radix_tree_tag_set(&pag->pag_ici_root,
674 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 682 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
675 XFS_ICI_RECLAIM_TAG); 683 XFS_ICI_RECLAIM_TAG);
684 pag->pag_ici_reclaimable++;
676} 685}
677 686
678/* 687/*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
705{ 714{
706 radix_tree_tag_clear(&pag->pag_ici_root, 715 radix_tree_tag_clear(&pag->pag_ici_root,
707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 716 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
717 pag->pag_ici_reclaimable--;
708} 718}
709 719
710/* 720/*
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
854 int mode) 864 int mode)
855{ 865{
856 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 866 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
857 XFS_ICI_RECLAIM_TAG, 1); 867 XFS_ICI_RECLAIM_TAG, 1, NULL);
868}
869
870/*
871 * Shrinker infrastructure.
872 *
873 * This is all far more complex than it needs to be. It adds a global list of
874 * mounts because the shrinkers can only call a global context. We need to make
875 * the shrinkers pass a context to avoid the need for global state.
876 */
877static LIST_HEAD(xfs_mount_list);
878static struct rw_semaphore xfs_mount_list_lock;
879
880static int
881xfs_reclaim_inode_shrink(
882 int nr_to_scan,
883 gfp_t gfp_mask)
884{
885 struct xfs_mount *mp;
886 struct xfs_perag *pag;
887 xfs_agnumber_t ag;
888 int reclaimable = 0;
889
890 if (nr_to_scan) {
891 if (!(gfp_mask & __GFP_FS))
892 return -1;
893
894 down_read(&xfs_mount_list_lock);
895 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
896 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
897 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
898 if (nr_to_scan <= 0)
899 break;
900 }
901 up_read(&xfs_mount_list_lock);
902 }
903
904 down_read(&xfs_mount_list_lock);
905 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
906 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
907
908 pag = xfs_perag_get(mp, ag);
909 if (!pag->pag_ici_init) {
910 xfs_perag_put(pag);
911 continue;
912 }
913 reclaimable += pag->pag_ici_reclaimable;
914 xfs_perag_put(pag);
915 }
916 }
917 up_read(&xfs_mount_list_lock);
918 return reclaimable;
919}
920
921static struct shrinker xfs_inode_shrinker = {
922 .shrink = xfs_reclaim_inode_shrink,
923 .seeks = DEFAULT_SEEKS,
924};
925
926void __init
927xfs_inode_shrinker_init(void)
928{
929 init_rwsem(&xfs_mount_list_lock);
930 register_shrinker(&xfs_inode_shrinker);
931}
932
933void
934xfs_inode_shrinker_destroy(void)
935{
936 ASSERT(list_empty(&xfs_mount_list));
937 unregister_shrinker(&xfs_inode_shrinker);
938}
939
940void
941xfs_inode_shrinker_register(
942 struct xfs_mount *mp)
943{
944 down_write(&xfs_mount_list_lock);
945 list_add_tail(&mp->m_mplist, &xfs_mount_list);
946 up_write(&xfs_mount_list_lock);
947}
948
949void
950xfs_inode_shrinker_unregister(
951 struct xfs_mount *mp)
952{
953 down_write(&xfs_mount_list_lock);
954 list_del(&mp->m_mplist);
955 up_write(&xfs_mount_list_lock);
858} 956}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c346cabb..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
54int xfs_inode_ag_iterator(struct xfs_mount *mp, 54int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
57 62
58#endif 63#endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d492db..50bee07d6b0e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
895 XFS_ICI_NO_TAG, 0, NULL);
895} 896}
896 897
897/*------------------------------------------------------------------------*/ 898/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1ff88ea..abb8222b88c9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
223 int pag_ici_init; /* incore inode cache initialised */ 223 int pag_ici_init; /* incore inode cache initialised */
224 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
225 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
226 int pag_ici_reclaimable; /* reclaimable inodes */
226#endif 227#endif
227 int pagb_count; /* pagb slots in use */ 228 int pagb_count; /* pagb slots in use */
228 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ 229 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index cd27c9d6c71f..5bba29a07812 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -177,16 +177,26 @@ xfs_swap_extents_check_format(
177 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) 177 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
178 return EINVAL; 178 return EINVAL;
179 179
180 /* Check root block of temp in btree form to max in target */ 180 /*
181 * If we are in a btree format, check that the temp root block will fit
182 * in the target and that it has enough extents to be in btree format
183 * in the target.
184 *
185 * Note that we have to be careful to allow btree->extent conversions
186 * (a common defrag case) which will occur when the temp inode is in
187 * extent format...
188 */
181 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && 189 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(ip) && 190 ((XFS_IFORK_BOFF(ip) &&
183 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) 191 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
192 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
184 return EINVAL; 193 return EINVAL;
185 194
186 /* Check root block of target in btree form to max in temp */ 195 /* Reciprocal target->temp btree format checks */
187 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && 196 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
188 XFS_IFORK_BOFF(tip) && 197 ((XFS_IFORK_BOFF(tip) &&
189 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) 198 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
199 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
190 return EINVAL; 200 return EINVAL;
191 201
192 return 0; 202 return 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7b983e..9ff48a16a7ee 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */
262} xfs_mount_t; 263} xfs_mount_t;
263 264
264/* 265/*